-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.rb
68 lines (54 loc) · 1.48 KB
/
main.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
require 'logger'
require 'net/http'
require 'pathname'
require 'zlib'
require 'nokogiri'
require 'mime/types'
require 'thread'
require './task_processor'
require './crawler'
require './util'
######################
# Global
class CrawlerLogFormatter < Logger::Formatter
def call(severity, time, progname, msg)
"[%s(%d:%s)%5s] %s\n" % [format_datetime(time), $$, Thread.current.object_id, severity, msg2str(msg)]
end
end
$logger = Logger.new(STDOUT)
#$logger.level = Logger::INFO
$logger.level = Logger::DEBUG
$logger.formatter = CrawlerLogFormatter.new
######
# Main
def main
if ARGV.empty?
puts 'usage) %s {search-keyword}' % $PROGRAM_NAME
exit 1
end
### parameters
search_keywords = ARGV
search_pages = 20
thread_count = number_of_processors * 4
##############
$logger.info 'program started...'
$logger.debug "# number of processors : #{number_of_processors}"
task_processor = TaskProcessor.new(thread_count)
task_processor.start
engines = Array.new(search_keywords.size)
$logger.info 'ready...'
for index in (0..search_keywords.size-1)
search_keyword = search_keywords[index]
$logger.info('search keyword : %s' % search_keyword)
engines[index] = Crawler.new(task_processor, 'downloads', search_keyword)
end
# todo make this method to async.
(0..search_pages-1).each do |page|
(0..search_keywords.size-1).each do |index|
engines[index].download_google_thumbnails(page)
end
end
sleep 1
task_processor.join
end
main