From 17498aa0435af4ae2d8ed1d1bb87847929557979 Mon Sep 17 00:00:00 2001 From: Manabu Niseki Date: Thu, 8 Feb 2024 19:42:37 +0900 Subject: [PATCH] v2.2.0 --- lib/miteru.rb | 13 ++++- lib/miteru/cache.rb | 53 +++++++++++++++++++ lib/miteru/config.rb | 12 +++++ lib/miteru/crawler.rb | 36 +++++++++---- lib/miteru/orchestrator.rb | 22 ++++++-- lib/miteru/version.rb | 2 +- miteru.gemspec | 101 +++++++++++++++++++------------------ spec/orchestrator_spec.rb | 2 + 8 files changed, 174 insertions(+), 67 deletions(-) create mode 100644 lib/miteru/cache.rb diff --git a/lib/miteru.rb b/lib/miteru.rb index fd41a70..0b1d56d 100644 --- a/lib/miteru.rb +++ b/lib/miteru.rb @@ -33,6 +33,9 @@ require "miteru/concerns/error_unwrappable" # Core classes +require "miteru/service" + +require "miteru/cache" require "miteru/config" require "miteru/http" @@ -93,6 +96,14 @@ def development? env == "development" end + def cache? + !Miteru.config.cache_redis_url.nil? + end + + def cache + @cache ||= Cache.new(Miteru.config.cache_redis_url) + end + # # @return [Boolean] # @@ -117,8 +128,6 @@ def initialize_sentry end # Services -require "miteru/service" - require "miteru/crawler" require "miteru/downloader" require "miteru/kit" diff --git a/lib/miteru/cache.rb b/lib/miteru/cache.rb new file mode 100644 index 0000000..00d916b --- /dev/null +++ b/lib/miteru/cache.rb @@ -0,0 +1,53 @@ +require "redis" + +module Miteru + class Cache < Service + # @return [String] + attr_reader :url + + # + # @param [String] url + # + def initialize(url) + super() + @url = url + end + + # + # @param [String] key + # @param [String] value + # @param [Integer. nil] ex + # + def set(key, value, ex:) + value = redis.set("#{prefix}:#{key}", value, ex:) + Miteru.logger.info("Cache:#{key} is set.") if verbose? + value + end + + # + # @param [String] key + # + def cached?(key) + value = redis.exists?("#{prefix}:#{key}") + Miteru.logger.info("Cache:#{key} found.") if verbose? + value + end + + private + + def verbose? + Miteru.config.verbose + end + + def prefix + Miteru.config.cache_prefix + end + + # + # @return [Redis] + # + def redis + @redis ||= Redis.new(url:) + end + end +end diff --git a/lib/miteru/config.rb b/lib/miteru/config.rb index 8277c2b..ab4fec2 100644 --- a/lib/miteru/config.rb +++ b/lib/miteru/config.rb @@ -22,6 +22,9 @@ class Config < Anyway::Config sentry_dsn: nil, sentry_trace_sample_rate: 0.25, sidekiq_redis_url: nil, + cache_redis_url: nil, + cache_ex: nil, + cache_prefix: "miteru:cache", slack_channel: "#general", slack_webhook_url: nil, threads: Parallel.processor_count, @@ -39,6 +42,15 @@ class Config < Anyway::Config # @return [Float] # @!attribute [r] sidekiq_redis_url + # @return [String, nil] + + # @!attribute [r] cache_redis_url + # @return [String, nil] + + # @!attribute [r] cache_ex + # @return [Integer, nil] + + # @!attribute [r] cache_prefix # @return [String] # @!attribute [r] http_timeout diff --git a/lib/miteru/crawler.rb b/lib/miteru/crawler.rb index e97a972..f6dc6eb 100644 --- a/lib/miteru/crawler.rb +++ b/lib/miteru/crawler.rb @@ -11,28 +11,44 @@ def call(website) Try[OpenSSL::SSL::SSLError, ::HTTP::Error, Addressable::URI::InvalidURIError] do info = "Website:#{website.info}." info = info.colorize(:red) if website.kits? - Miteru.logger.info(info) - return unless website.kits? - - notify website - - return unless auto_download? website.kits.each do |kit| downloader = Downloader.new(kit) result = downloader.result - if result.success? - Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{result.value!}.") - else + + unless result.success? Miteru.logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.") + next end + + destination = result.value! + Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{destination}.") + # Remove downloaded file if auto_download is not allowed + FileUtils.rm(destination, force: true) unless auto_download? + # Notify the website + notify website end + + # Cache the website + cache.set(website.url, website.source, ex: cache_ex) if cache? end.recover { nil }.value! end private + def cache? + Miteru.cache? + end + + def cache + Miteru.cache + end + + def cache_ex + Miteru.config.cache_ex + end + def auto_download? Miteru.config.auto_download end @@ -41,7 +57,7 @@ def auto_download? # @param [Miteru::Website] website # def notify(website) - notifiers.each do |notifier| + [].each do |notifier| result = notifier.result(website) if result.success? Miteru.logger.info("Notifier:#{notifier.name} succeeded.") diff --git a/lib/miteru/orchestrator.rb b/lib/miteru/orchestrator.rb index 9006ad0..c217d7e 100644 --- a/lib/miteru/orchestrator.rb +++ b/lib/miteru/orchestrator.rb @@ -3,16 +3,16 @@ module Miteru class Orchestrator < Service def call - Miteru.logger.info("#{websites.length} websites loaded in total.") if verbose? + Miteru.logger.info("#{non_cached_websites.length} websites loaded in total.") if verbose? if Miteru.sidekiq? - websites.each do |website| + non_cached_websites.each do |website| Jobs::CrawleJob.perform_async(website.url, website.source) Miteru.logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose? end else Miteru.logger.info("Use #{threads} thread(s).") if verbose? - Parallel.each(websites, in_threads: threads) do |website| + Parallel.each(non_cached_websites, in_threads: threads) do |website| Miteru.logger.info("Website:#{website.truncated_url} crawling started.") if verbose? result = Crawler.result(website) @@ -26,7 +26,7 @@ def call end # - # @return [Array] + # @return [Array] # def websites @websites ||= [].tap do |out| @@ -43,6 +43,12 @@ def websites end.flatten.uniq(&:url) end + def non_cached_websites + return websites unless cache? + + websites.reject { |website| cache.cached?(website.url) } + end + # # @return [Array] # @@ -52,6 +58,14 @@ def feeds private + def cache? + Miteru.cache? + end + + def cache + Miteru.cache + end + def threads Miteru.config.threads end diff --git a/lib/miteru/version.rb b/lib/miteru/version.rb index 07e03c4..9e17351 100644 --- a/lib/miteru/version.rb +++ b/lib/miteru/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Miteru - VERSION = "2.1.1" + VERSION = "2.2.0" end diff --git a/miteru.gemspec b/miteru.gemspec index b2d17e8..5daecd7 100644 --- a/miteru.gemspec +++ b/miteru.gemspec @@ -1,67 +1,68 @@ # frozen_string_literal: true -lib = File.expand_path('lib', __dir__) +lib = File.expand_path("lib", __dir__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) -require 'miteru/version' +require "miteru/version" Gem::Specification.new do |spec| - spec.name = 'miteru' + spec.name = "miteru" spec.version = Miteru::VERSION - spec.authors = ['Manabu Niseki'] - spec.email = ['manabu.niseki@gmail.com'] - spec.metadata['rubygems_mfa_required'] = 'true' + spec.authors = ["Manabu Niseki"] + spec.email = ["manabu.niseki@gmail.com"] + spec.metadata["rubygems_mfa_required"] = "true" - spec.summary = 'A phishing kit collector for scavengers' - spec.description = 'A phishing kit collector for scavengers' - spec.homepage = 'https://github.com/ninoseki/miteru' - spec.license = 'MIT' + spec.summary = "A phishing kit collector for scavengers" + spec.description = "A phishing kit collector for scavengers" + spec.homepage = "https://github.com/ninoseki/miteru" + spec.license = "MIT" # Specify which files should be added to the gem when it is released. # The `git ls-files -z` loads the files in the RubyGem that have been added into git. spec.files = Dir.chdir(File.expand_path(__dir__)) do `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } end - spec.bindir = 'exe' + spec.bindir = "exe" spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } - spec.require_paths = ['lib'] + spec.require_paths = ["lib"] - spec.add_development_dependency 'bundler', '~> 2.5' - spec.add_development_dependency 'capybara', '~> 3.40' - spec.add_development_dependency 'coveralls_reborn', '~> 0.28' - spec.add_development_dependency 'fuubar', '~> 2.5' - spec.add_development_dependency 'mysql2', '~> 0.5' - spec.add_development_dependency 'pg', '~> 1.5' - spec.add_development_dependency 'rake', '~> 13.1' - spec.add_development_dependency 'rspec', '~> 3.13' - spec.add_development_dependency 'simplecov-lcov', '~> 0.8' - spec.add_development_dependency 'standard', '~> 1.33' - spec.add_development_dependency 'test-prof', '~> 1.3' - spec.add_development_dependency 'vcr', '~> 6.2' - spec.add_development_dependency 'webmock', '~> 3.19' + spec.add_development_dependency "bundler", "~> 2.5" + spec.add_development_dependency "capybara", "~> 3.40" + spec.add_development_dependency "coveralls_reborn", "~> 0.28" + spec.add_development_dependency "fuubar", "~> 2.5" + spec.add_development_dependency "mysql2", "~> 0.5" + spec.add_development_dependency "pg", "~> 1.5" + spec.add_development_dependency "rake", "~> 13.1" + spec.add_development_dependency "rspec", "~> 3.13" + spec.add_development_dependency "simplecov-lcov", "~> 0.8" + spec.add_development_dependency "standard", "~> 1.33" + spec.add_development_dependency "test-prof", "~> 1.3" + spec.add_development_dependency "vcr", "~> 6.2" + spec.add_development_dependency "webmock", "~> 3.19" - spec.add_dependency 'activerecord', '7.1.3' - spec.add_dependency 'addressable', '2.8.6' - spec.add_dependency 'anyway_config', '2.6.2' - spec.add_dependency 'colorize', '1.1.0' - spec.add_dependency 'dotenv', '2.8.1' - spec.add_dependency 'down', '5.4.1' - spec.add_dependency 'dry-files', '1.1.0' - spec.add_dependency 'dry-monads', '1.6.0' - spec.add_dependency 'http', '5.2.0' - spec.add_dependency 'memo_wise', '1.8.0' - spec.add_dependency 'oga', '3.4' - spec.add_dependency 'parallel', '1.24.0' - spec.add_dependency 'puma', '6.4.2' - spec.add_dependency 'rack', '3.0.9' - spec.add_dependency 'rack-session', '2.0.0' - spec.add_dependency 'rackup', '2.1.0' - spec.add_dependency 'semantic_logger', '4.15.0' - spec.add_dependency 'sentry-ruby', '5.16.1' - spec.add_dependency 'sentry-sidekiq', '5.16.1' - spec.add_dependency 'sidekiq', '7.2.1' - spec.add_dependency 'slack-notifier', '2.4.0' - spec.add_dependency 'sqlite3', '1.7.2' - spec.add_dependency 'thor', '1.3.0' - spec.add_dependency 'thor-hollaback', '0.2.1' - spec.add_dependency 'uuidtools', '2.2.0' + spec.add_dependency "activerecord", "7.1.3" + spec.add_dependency "addressable", "2.8.6" + spec.add_dependency "anyway_config", "2.6.3" + spec.add_dependency "colorize", "1.1.0" + spec.add_dependency "dotenv", "2.8.1" + spec.add_dependency "down", "5.4.1" + spec.add_dependency "dry-files", "1.1.0" + spec.add_dependency "dry-monads", "1.6.0" + spec.add_dependency "http", "5.2.0" + spec.add_dependency "memo_wise", "1.8.0" + spec.add_dependency "oga", "3.4" + spec.add_dependency "parallel", "1.24.0" + spec.add_dependency "puma", "6.4.2" + spec.add_dependency "rack", "3.0.9" + spec.add_dependency "rack-session", "2.0.0" + spec.add_dependency "rackup", "2.1.0" + spec.add_dependency "redis", "5.0.8" + spec.add_dependency "semantic_logger", "4.15.0" + spec.add_dependency "sentry-ruby", "5.16.1" + spec.add_dependency "sentry-sidekiq", "5.16.1" + spec.add_dependency "sidekiq", "7.2.1" + spec.add_dependency "slack-notifier", "2.4.0" + spec.add_dependency "sqlite3", "1.7.2" + spec.add_dependency "thor", "1.3.0" + spec.add_dependency "thor-hollaback", "0.2.1" + spec.add_dependency "uuidtools", "2.2.0" end diff --git a/spec/orchestrator_spec.rb b/spec/orchestrator_spec.rb index 3469dad..320339c 100644 --- a/spec/orchestrator_spec.rb +++ b/spec/orchestrator_spec.rb @@ -11,6 +11,8 @@ def urls end RSpec.describe Miteru::Orchestrator do + include_context "with mocked logger" + subject(:orchestrator) { described_class.new } before do