From ee206c6b3663ef6a88773623989dcb2ecb1622d1 Mon Sep 17 00:00:00 2001 From: Nilay Gupta <102874321+g4ze@users.noreply.github.com> Date: Thu, 8 Aug 2024 19:55:39 +0530 Subject: [PATCH] Polyswarm analyzer closes#1255 (#2439) * polyswarm * polyswarm * polyswarm * polyswarm * polyswarm * mign * logs * logs * mign * obs * obs * obs * tests * modular --- .../file_analyzers/polyswarm.py | 111 ++++++++++ .../0113_analyzer_config_polyswarm.py | 196 ++++++++++++++++++ .../0114_analyzer_config_polyswarmobs.py | 129 ++++++++++++ .../observable_analyzers/polyswarm_obs.py | 64 ++++++ docs/source/Usage.md | 2 + requirements/project-requirements.txt | 1 + tests/api_app/test_api.py | 2 +- 7 files changed, 504 insertions(+), 1 deletion(-) create mode 100644 api_app/analyzers_manager/file_analyzers/polyswarm.py create mode 100644 api_app/analyzers_manager/migrations/0113_analyzer_config_polyswarm.py create mode 100644 api_app/analyzers_manager/migrations/0114_analyzer_config_polyswarmobs.py create mode 100644 api_app/analyzers_manager/observable_analyzers/polyswarm_obs.py diff --git a/api_app/analyzers_manager/file_analyzers/polyswarm.py b/api_app/analyzers_manager/file_analyzers/polyswarm.py new file mode 100644 index 0000000000..8a931984d4 --- /dev/null +++ b/api_app/analyzers_manager/file_analyzers/polyswarm.py @@ -0,0 +1,111 @@ +# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl +# See the file 'LICENSE' for copying permission. +import logging + +from polyswarm_api.api import PolyswarmAPI + +from api_app.analyzers_manager.classes import FileAnalyzer +from api_app.analyzers_manager.exceptions import AnalyzerRunException +from tests.mock_utils import if_mock_connections, patch + +logger = logging.getLogger(__name__) + +import abc + +from api_app.analyzers_manager.classes import BaseAnalyzerMixin + + +class PolyswarmBase(BaseAnalyzerMixin, metaclass=abc.ABCMeta): + # this class also acts as a super class + # for PolyswarmObs in observable analyzers + url = "https://api.polyswarm.network/v3" + _api_key: str = None + timeout: int = 60 * 15 # default as in the package settings + polyswarm_community: str = "default" + + def update(self): + pass + + @staticmethod + def construct_result(result): + res = {"assertions": []} + positives = 0 + total = 0 + for assertion in result.assertions: + if assertion.verdict: + positives += 1 + total += 1 + res["assertions"].append( + { + "engine": assertion.author_name, + "asserts": "Malicious" if assertion.verdict else "Benign", + } + ) + res["positives"] = positives + res["total"] = total + res["PolyScore"] = result.polyscore + res["sha256"] = result.sha256 + res["md5"] = result.md5 + res["sha1"] = result.sha1 + res["extended_type"] = result.extended_type + res["first_seen"] = result.first_seen.isoformat() + res["last_seen"] = result.last_seen.isoformat() + res["permalink"] = result.permalink + return res + + +class Polyswarm(FileAnalyzer, PolyswarmBase): + def run(self): + api = PolyswarmAPI(key=self._api_key, community=self.polyswarm_community) + instance = api.submit(self.filepath) + result = api.wait_for(instance, timeout=self.timeout) + if result.failed: + raise AnalyzerRunException( + f"Failed to get results from Polyswarm for {self.md5}" + ) + result = self.construct_result(result) + + return result + + def update(self): + pass + + @classmethod + def _monkeypatch(cls): + patches = [ + if_mock_connections( + patch.object( + Polyswarm, + "run", + # flake8: noqa + return_value={ + "assertions": [ + {"engine": "Kaspersky", "asserts": "Benign"}, + {"engine": "Qihoo 360", "asserts": "Benign"}, + {"engine": "XVirus", "asserts": "Benign"}, + {"engine": "SecureAge", "asserts": "Benign"}, + {"engine": "DrWeb", "asserts": "Benign"}, + {"engine": "Proton", "asserts": "Benign"}, + {"engine": "Electron", "asserts": "Benign"}, + {"engine": "Filseclab", "asserts": "Benign"}, + {"engine": "ClamAV", "asserts": "Benign"}, + {"engine": "SecondWrite", "asserts": "Benign"}, + {"engine": "Ikarus", "asserts": "Benign"}, + {"engine": "NanoAV", "asserts": "Benign"}, + {"engine": "Alibaba", "asserts": "Benign"}, + ], + "positives": 0, + "total": 13, + "PolyScore": 0.33460048640798623, + "sha256": "50f4d8be8d47d26ecb04f1a24f17a39f3ea194d8cdc3b833aef2df88e1ce828b", + "md5": "76deca20806c16df50ffeda163fd50e9", + "sha1": "99ff1cd17aea94feb355e7bdb01e9f788a4971bb", + "extended_type": "GIF image data, version 89a, 821 x 500", + "first_seen": "2024-07-27T20:20:12.121980", + "last_seen": "2024-07-27T20:20:12.121980", + "permalink": "https://polyswarm.network/scan/results/file/50f4d8be8d47d26ecb04f1a24f17a39f3ea194d8cdc3b833aef2df88e1ce828b/76218824984622961", + }, + ), + ) + ] + return super()._monkeypatch(patches=patches) diff --git a/api_app/analyzers_manager/migrations/0113_analyzer_config_polyswarm.py b/api_app/analyzers_manager/migrations/0113_analyzer_config_polyswarm.py new file mode 100644 index 0000000000..68c1e92e9f --- /dev/null +++ b/api_app/analyzers_manager/migrations/0113_analyzer_config_polyswarm.py @@ -0,0 +1,196 @@ +from django.db import migrations +from django.db.models.fields.related_descriptors import ( + ForwardManyToOneDescriptor, + ForwardOneToOneDescriptor, + ManyToManyDescriptor, +) + +plugin = { + "python_module": { + "health_check_schedule": None, + "update_schedule": None, + "module": "polyswarm.Polyswarm", + "base_path": "api_app.analyzers_manager.file_analyzers", + }, + "name": "Polyswarm", + "description": "Scan a file using the [Polyswarm](https://docs.polyswarm.io/) API.", + "disabled": False, + "soft_time_limit": 900, + "routing_key": "default", + "health_check_status": True, + "type": "file", + "docker_based": False, + "maximum_tlp": "AMBER", + "observable_supported": [], + "supported_filetypes": [], + "run_hash": False, + "run_hash_type": "", + "not_supported_filetypes": [], + "model": "analyzers_manager.AnalyzerConfig", +} + +params = [ + { + "python_module": { + "module": "polyswarm.Polyswarm", + "base_path": "api_app.analyzers_manager.file_analyzers", + }, + "name": "api_key", + "type": "str", + "description": "api key for polyswarm", + "is_secret": True, + "required": False, + }, + { + "python_module": { + "module": "polyswarm.Polyswarm", + "base_path": "api_app.analyzers_manager.file_analyzers", + }, + "name": "timeout", + "type": "int", + "description": "timeout for Polyswarm api, default is 900s", + "is_secret": False, + "required": False, + }, + { + "python_module": { + "module": "polyswarm.Polyswarm", + "base_path": "api_app.analyzers_manager.file_analyzers", + }, + "name": "polyswarm_community", + "type": "str", + "description": 'polyswarm_community for polyswarm analyzer, default is "default"', + "is_secret": False, + "required": False, + }, +] + +values = [ + { + "parameter": { + "python_module": { + "module": "polyswarm.Polyswarm", + "base_path": "api_app.analyzers_manager.file_analyzers", + }, + "name": "timeout", + "type": "int", + "description": "timeout for Polyswarm api, default is 900s", + "is_secret": False, + "required": False, + }, + "analyzer_config": "Polyswarm", + "connector_config": None, + "visualizer_config": None, + "ingestor_config": None, + "pivot_config": None, + "for_organization": False, + "value": 900, + "updated_at": "2024-07-28T18:00:00.981259Z", + "owner": None, + }, + { + "parameter": { + "python_module": { + "module": "polyswarm.Polyswarm", + "base_path": "api_app.analyzers_manager.file_analyzers", + }, + "name": "polyswarm_community", + "type": "str", + "description": 'polyswarm_community for polyswarm analyzer, default is "default"', + "is_secret": False, + "required": False, + }, + "analyzer_config": "Polyswarm", + "connector_config": None, + "visualizer_config": None, + "ingestor_config": None, + "pivot_config": None, + "for_organization": False, + "value": "default", + "updated_at": "2024-07-28T18:00:01.001510Z", + "owner": None, + }, +] + + +def _get_real_obj(Model, field, value): + def _get_obj(Model, other_model, value): + if isinstance(value, dict): + real_vals = {} + for key, real_val in value.items(): + real_vals[key] = _get_real_obj(other_model, key, real_val) + value = other_model.objects.get_or_create(**real_vals)[0] + # it is just the primary key serialized + else: + if isinstance(value, int): + if Model.__name__ == "PluginConfig": + value = other_model.objects.get(name=plugin["name"]) + else: + value = other_model.objects.get(pk=value) + else: + value = other_model.objects.get(name=value) + return value + + if ( + type(getattr(Model, field)) + in [ForwardManyToOneDescriptor, ForwardOneToOneDescriptor] + and value + ): + other_model = getattr(Model, field).get_queryset().model + value = _get_obj(Model, other_model, value) + elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value: + other_model = getattr(Model, field).rel.model + value = [_get_obj(Model, other_model, val) for val in value] + return value + + +def _create_object(Model, data): + mtm, no_mtm = {}, {} + for field, value in data.items(): + value = _get_real_obj(Model, field, value) + if type(getattr(Model, field)) is ManyToManyDescriptor: + mtm[field] = value + else: + no_mtm[field] = value + try: + o = Model.objects.get(**no_mtm) + except Model.DoesNotExist: + o = Model(**no_mtm) + o.full_clean() + o.save() + for field, value in mtm.items(): + attribute = getattr(o, field) + if value is not None: + attribute.set(value) + return False + return True + + +def migrate(apps, schema_editor): + Parameter = apps.get_model("api_app", "Parameter") + PluginConfig = apps.get_model("api_app", "PluginConfig") + python_path = plugin.pop("model") + Model = apps.get_model(*python_path.split(".")) + if not Model.objects.filter(name=plugin["name"]).exists(): + exists = _create_object(Model, plugin) + if not exists: + for param in params: + _create_object(Parameter, param) + for value in values: + _create_object(PluginConfig, value) + + +def reverse_migrate(apps, schema_editor): + python_path = plugin.pop("model") + Model = apps.get_model(*python_path.split(".")) + Model.objects.get(name=plugin["name"]).delete() + + +class Migration(migrations.Migration): + atomic = False + dependencies = [ + ("api_app", "0062_alter_parameter_python_module"), + ("analyzers_manager", "0112_analyzer_config_criminalip_scan"), + ] + + operations = [migrations.RunPython(migrate, reverse_migrate)] diff --git a/api_app/analyzers_manager/migrations/0114_analyzer_config_polyswarmobs.py b/api_app/analyzers_manager/migrations/0114_analyzer_config_polyswarmobs.py new file mode 100644 index 0000000000..f00fe2fa27 --- /dev/null +++ b/api_app/analyzers_manager/migrations/0114_analyzer_config_polyswarmobs.py @@ -0,0 +1,129 @@ +from django.db import migrations +from django.db.models.fields.related_descriptors import ( + ForwardManyToOneDescriptor, + ForwardOneToOneDescriptor, + ManyToManyDescriptor, +) + +plugin = { + "python_module": { + "health_check_schedule": None, + "update_schedule": None, + "module": "polyswarm_obs.PolyswarmObs", + "base_path": "api_app.analyzers_manager.observable_analyzers", + }, + "name": "PolyswarmObs", + "description": "Scan an observable using [Polyswarm](https://docs.polyswarm.io/) API. Paid plan is required for IP and Domain scans. Hash scan is free.", + "disabled": False, + "soft_time_limit": 20, + "routing_key": "default", + "health_check_status": True, + "type": "observable", + "docker_based": False, + "maximum_tlp": "AMBER", + "observable_supported": ["ip", "domain", "hash"], + "supported_filetypes": [], + "run_hash": False, + "run_hash_type": "", + "not_supported_filetypes": [], + "model": "analyzers_manager.AnalyzerConfig", +} + +params = [ + { + "python_module": { + "module": "polyswarm_obs.PolyswarmObs", + "base_path": "api_app.analyzers_manager.observable_analyzers", + }, + "name": "api_key", + "type": "str", + "description": "api key for PolyswarmObs", + "is_secret": True, + "required": False, + } +] + +values = [] + + +def _get_real_obj(Model, field, value): + def _get_obj(Model, other_model, value): + if isinstance(value, dict): + real_vals = {} + for key, real_val in value.items(): + real_vals[key] = _get_real_obj(other_model, key, real_val) + value = other_model.objects.get_or_create(**real_vals)[0] + # it is just the primary key serialized + else: + if isinstance(value, int): + if Model.__name__ == "PluginConfig": + value = other_model.objects.get(name=plugin["name"]) + else: + value = other_model.objects.get(pk=value) + else: + value = other_model.objects.get(name=value) + return value + + if ( + type(getattr(Model, field)) + in [ForwardManyToOneDescriptor, ForwardOneToOneDescriptor] + and value + ): + other_model = getattr(Model, field).get_queryset().model + value = _get_obj(Model, other_model, value) + elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value: + other_model = getattr(Model, field).rel.model + value = [_get_obj(Model, other_model, val) for val in value] + return value + + +def _create_object(Model, data): + mtm, no_mtm = {}, {} + for field, value in data.items(): + value = _get_real_obj(Model, field, value) + if type(getattr(Model, field)) is ManyToManyDescriptor: + mtm[field] = value + else: + no_mtm[field] = value + try: + o = Model.objects.get(**no_mtm) + except Model.DoesNotExist: + o = Model(**no_mtm) + o.full_clean() + o.save() + for field, value in mtm.items(): + attribute = getattr(o, field) + if value is not None: + attribute.set(value) + return False + return True + + +def migrate(apps, schema_editor): + Parameter = apps.get_model("api_app", "Parameter") + PluginConfig = apps.get_model("api_app", "PluginConfig") + python_path = plugin.pop("model") + Model = apps.get_model(*python_path.split(".")) + if not Model.objects.filter(name=plugin["name"]).exists(): + exists = _create_object(Model, plugin) + if not exists: + for param in params: + _create_object(Parameter, param) + for value in values: + _create_object(PluginConfig, value) + + +def reverse_migrate(apps, schema_editor): + python_path = plugin.pop("model") + Model = apps.get_model(*python_path.split(".")) + Model.objects.get(name=plugin["name"]).delete() + + +class Migration(migrations.Migration): + atomic = False + dependencies = [ + ("api_app", "0062_alter_parameter_python_module"), + ("analyzers_manager", "0113_analyzer_config_polyswarm"), + ] + + operations = [migrations.RunPython(migrate, reverse_migrate)] diff --git a/api_app/analyzers_manager/observable_analyzers/polyswarm_obs.py b/api_app/analyzers_manager/observable_analyzers/polyswarm_obs.py new file mode 100644 index 0000000000..e6ebadeaef --- /dev/null +++ b/api_app/analyzers_manager/observable_analyzers/polyswarm_obs.py @@ -0,0 +1,64 @@ +import logging + +from polyswarm_api.api import PolyswarmAPI + +from api_app.analyzers_manager.classes import ObservableAnalyzer +from api_app.analyzers_manager.exceptions import AnalyzerRunException +from tests.mock_utils import if_mock_connections, patch + +logger = logging.getLogger(__name__) + +from ..file_analyzers.polyswarm import PolyswarmBase + + +class PolyswarmObs(ObservableAnalyzer, PolyswarmBase): + def run(self): + api = PolyswarmAPI(key=self._api_key, community=self.polyswarm_community) + if self.observable_classification == self.ObservableTypes.HASH.value: + results = api.search(self.observable_name) + result = self.get_results(results) + return result + elif self.observable_classification == self.ObservableTypes.DOMAIN.value: + # https://docs.polyswarm.io/consumers/polyswarm-customer-api-v3#ioc-searching + return api.check_known_hosts(domains=[self.observable_name])[0].json() + + elif self.observable_classification == self.ObservableTypes.IP.value: + return api.check_known_hosts(ips=[self.observable_name])[0].json() + + def get_results(self, results): + for result in results: # should run only once + if result.failed: + raise AnalyzerRunException( + f"Failed to get results from Polyswarm for {self.observable_name}" + ) + if not result.assertions: + raise AnalyzerRunException( + f"Failed to get assertions from Polyswarm for {self.observable_name}" + ) + return self.construct_result(result) + + @classmethod + def _monkeypatch(cls): + patches = [ + if_mock_connections( + patch.object( + PolyswarmObs, + "run", + # flake8: noqa + return_value={ + "positives": 1, + "total": 1, + "PolyScore": 0.5, + "sha256": "sha256", + "md5": "md5", + "sha1": "sha1", + "extended_type": "extended_type", + "first_seen": "2024-05-22T12:25:45.001333Z", + "last_seen": "2024-05-22T12:25:45.001333Z", + "permalink": "https://polyswarm.network/permalink", + "assertions": [{"engine": "engine", "asserts": "Malicious"}], + }, + ), + ) + ] + return super()._monkeypatch(patches=patches) diff --git a/docs/source/Usage.md b/docs/source/Usage.md index c77658c825..1266cb3fc8 100644 --- a/docs/source/Usage.md +++ b/docs/source/Usage.md @@ -155,6 +155,7 @@ The following is the list of the available analyzers you can run out-of-the-box. - `Blint`: [Blint](https://github.com/owasp-dep-scan/blint) is a Binary Linter that checks the security properties and capabilities of your executables. Supported binary formats: - Android (apk, aab) - ELF (GNU, musl) - PE (exe, dll) - Mach-O (x64, arm64) - `MalprobScan` : [Malprob](https://malprob.io/) is a leading malware detection and identification service, powered by cutting-edge AI technology. - `IocExtract`: [IocExtract](https://github.com/InQuest/iocextract) package is a library and command line interface (CLI) for extracting URLs, IP addresses, MD5/SHA hashes, email addresses, and YARA rules from text corpora. It allows for you to extract encoded and "defanged" IOCs and optionally decode or refang them. +- `Polyswarm`: Scan a file using the [Polyswarm](https://docs.polyswarm.io/) API. ##### Observable analyzers (ip, domain, url, hash) @@ -274,6 +275,7 @@ threat prevention, reducing and automating the manual work of security analysts. * `Spamhaus_DROP`:[Spamhaus_DROP](https://www.spamhaus.org/blocklists/do-not-route-or-peer/) protects from activity directly originating from rogue networks, such as spam campaigns, encryption via ransomware, DNS-hijacking and exploit attempts, authentication attacks to discover working access credentials, harvesting, DDoS attacks. * `CriminalIp`: [Criminal IP](https://www.criminalip.io/) is an OSINT search engine specialized in attack surface assessment and threat hunting. It offers extensive cyber threat intelligence, including device reputation, geolocation, IP reputation for C2 or scanners, domain safety, malicious link detection, and APT attack vectors via search and API. * `CriminalIp_Scan`:CriminalIp_Scan is an implementation of scan APIs provided by [CriminalIp](https://www.criminalip.io/) specifically for domains. Criminal IP is an OSINT search engine specialized in attack surface assessment and threat hunting. It offers extensive cyber threat intelligence, including device reputation, geolocation, IP reputation for C2 or scanners, domain safety, malicious link detection, and APT attack vectors via search and API. +* `PolyswarmObs`: Scan an observable using [Polyswarm](https://docs.polyswarm.io/) API. Paid plan is required for IP and Domain scans. Hash scan is free. ##### Generic analyzers (email, phone number, etc.; anything really) diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt index 37726bf856..24e9991f6c 100644 --- a/requirements/project-requirements.txt +++ b/requirements/project-requirements.txt @@ -81,6 +81,7 @@ permhash==0.1.4 ail_typo_squatting==2.7.4 iocextract==1.16.1 ioc-finder==7.0.0 +polyswarm-api==3.8.0 # this is required because XLMMacroDeobfuscator does not pin the following packages pyxlsb2==0.0.8 diff --git a/tests/api_app/test_api.py b/tests/api_app/test_api.py index d6d31f6560..f824ab8d32 100644 --- a/tests/api_app/test_api.py +++ b/tests/api_app/test_api.py @@ -129,7 +129,7 @@ def test_analyze_file__pcap(self): self.assertEqual(md5, job.md5) self.assertCountEqual( - ["Suricata", "YARAify_File_Scan", "Hfinger", "DetectItEasy"], + ["Suricata", "YARAify_File_Scan", "Hfinger", "DetectItEasy", "Polyswarm"], list(job.analyzers_to_execute.all().values_list("name", flat=True)), )