Skip to content

Commit

Permalink
Merge pull request #187 from ninoseki/fix-issues
Browse files Browse the repository at this point in the history
fix: fix issues
  • Loading branch information
ninoseki authored Jan 29, 2024
2 parents 827868e + 3d63ff3 commit 4070665
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 34 deletions.
19 changes: 14 additions & 5 deletions lib/miteru/crawler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class Crawler < Service
def call(website)
Try[OpenSSL::SSL::SSLError, ::HTTP::Error, Addressable::URI::InvalidURIError] do
Miteru.logger.info("Website:#{website.truncated_url} has #{website.kits.length} kit(s).")
return unless website.has_kits?
return unless website.kits?

notify website

Expand All @@ -17,11 +17,10 @@ def call(website)
website.kits.each do |kit|
downloader = Downloader.new(kit)
result = downloader.result

if result.success?
Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{result.value!}")
Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{result.value!}.")
else
Miteru.logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}")
Miteru.logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.")
end
end
end.recover { nil }.value!
Expand All @@ -33,8 +32,18 @@ def auto_download?
Miteru.config.auto_download
end

#
# @param [Miteru::Website] website
#
def notify(website)
Parallel.each(notifiers) { |notifier| notifier.call(website) }
notifiers.each do |notifier|
result = notifier.result(website)
if result.success?
Miteru.logger.info("Notifier:#{notifier.name} succeeded.")
else
Miteru.logger.warn("Notifier:#{notifier.name} failed - #{result.failure}.")
end
end
end

#
Expand Down
5 changes: 3 additions & 2 deletions lib/miteru/kit.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ class Kit < Service
# @param [String] source
#
def initialize(url, source:)
super()

@url = url
@source = source

Expand All @@ -37,7 +39,6 @@ def initialize(url, source:)
def valid?
# make a HEAD request for the validation
before_validation

valid_ext? && reachable? && valid_mime_type? && valid_content_length?
end

Expand Down Expand Up @@ -110,7 +111,7 @@ def valid_ext?
end

def http
HTTP::Factory.build
HTTP::Factory.build(raise_exception: false)
end

def before_validation
Expand Down
4 changes: 4 additions & 0 deletions lib/miteru/notifiers/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ def callable?
raise NotImplementedError
end

def name
@name ||= self.class.to_s.split("::").last
end

class << self
def inherited(child)
super
Expand Down
9 changes: 3 additions & 6 deletions lib/miteru/notifiers/urlscan.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@ class UrlScan < Base
def call(website)
return unless callable?

kits = website.kits.select(&:downloaded?)
return unless kits.any?

kits.each { |kit| submit(kit.url) }
website.kits.each { |kit| submit(kit.url) }
end

def callable?
Expand All @@ -33,7 +30,7 @@ def headers
end

def timeout
Miteru.config.timeout
Miteru.config.api_timeout
end

def tags
Expand All @@ -45,7 +42,7 @@ def visibility
end

def submit(url)
http.post("/api/v1/scan/", json: {tags:, visibility:, url:})
http.post("https://urlscan.io/api/v1/scan/", json: {tags:, visibility:, url:})
end
end
end
Expand Down
13 changes: 9 additions & 4 deletions lib/miteru/orchestrator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,21 @@ def call
Miteru.logger.info("#{websites.length} websites loaded in total.") if verbose?

if Miteru.sidekiq?

websites.each do |website|
Jobs::CrawleJob.perform_async(website.url, website.source)
Miteru.logger.info("Website:#{website.truncated_url} crawler job queued") if verbose?
Miteru.logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
end
else
Miteru.logger.info("Use #{threads} thread(s).") if verbose?
Parallel.each(websites, in_threads: threads) do |website|
Miteru.logger.info("Website:#{website.truncated_url} crawling started") if verbose?
crawl(website)
Miteru.logger.info("Website:#{website.truncated_url} crawling started.") if verbose?

result = Crawler.result(website)
if result.success?
Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
else
Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
end
end
end
end
Expand Down
9 changes: 8 additions & 1 deletion lib/miteru/sidekiq/jobs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@ class CrawleJob
#
def perform(url, source)
website = Miteru::Website.new(url, source:)
with_db_connection { Crawler.call(website) }
with_db_connection do
result = Crawler.result(website)
if result.success?
Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
else
Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
end
end
end
end
end
Expand Down
16 changes: 5 additions & 11 deletions lib/miteru/website.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,8 @@ def index?
title.to_s.start_with? "Index of"
end

def has_kits?
@has_kits ||= lambda do
Try[Addressable::URI::InvalidURIError, Encoding::CompatibilityError, ::HTTP::Error, LL::ParserError,
OpenSSL::SSL::SSLError, StatusError, ArgumentError] do
!kits.empty?
end.recover do
false
end.value!
end.call
def kits?
kits.any?
end

def links
Expand Down Expand Up @@ -81,8 +74,9 @@ def href_links
Try[Addressable::URI::InvalidURIError, Encoding::CompatibilityError, ::HTTP::Error, LL::ParserError,
OpenSSL::SSL::SSLError, StatusError, ArgumentError] do
doc.css("a").filter_map { |a| a.get("href") }.map do |href|
href = href.start_with?("/") ? href : "/#{href}"
url + href
normalized_href = href.start_with?("/") ? href : "/#{href}"
normalized_url = url.end_with?("/") ? url.delete_suffix("/") : url
normalized_url + normalized_href
end
end.recover { [] }.value!
end
Expand Down
1 change: 1 addition & 0 deletions miteru.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Gem::Specification.new do |spec|
spec.add_development_dependency 'webmock', '~> 3.19'

spec.add_dependency 'activerecord', '7.1.3'
spec.add_dependency 'addressable', '2.8.6'
spec.add_dependency 'anyway_config', '2.6.2'
spec.add_dependency 'colorize', '1.1.0'
spec.add_dependency 'dotenv', '2.8.1'
Expand Down
8 changes: 3 additions & 5 deletions spec/website_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,9 @@
end
end

context "when giving a url which contains a phishing kit" do
describe "#has_kits?" do
it do
expect(website.has_kits?).to be(true)
end
describe "#kits?" do
it do
expect(website.kits?).to be(true)
end
end
end

0 comments on commit 4070665

Please sign in to comment.