Skip to content
This repository has been archived by the owner on Nov 2, 2024. It is now read-only.

Commit

Permalink
Malware bazaar ingestor (intelowlproject#2259)
Browse files Browse the repository at this point in the history
* added malware bazaar ingestor

fixed json serialization for types: bytes and File

* typo

* added support to delayed celery jobs startup for ingestors

* moved url to config parameter

in this way you can set an health checker if you want

* fixed wrong access to observable name

* changed timedelta from class to object

* added _monkeypatch()

* omitted full_name field and generate ingestors plugin config

* added threatfox url migration

* fixed linter

* fixed linter

* fixed linter

* fixed linter

* fixed linter

* fixed linter

* fixed linter

* fixed linter

* updated threatfox migration

* changed migration order

* fixed reverse migrations

* fixed default signatures

* fixed default signatures

* added malware bazaar userprofile

fixed threatfox migration

* isort

* added default value to timedelta

* fixed delay parameter default value and int conversion

* fixed userprofile dumpplugin

* reduced code complexity and fixed generator job creation

* fixed deepsource warnings

* fixed deepsoruce cyclic import

* changed order PivotConfigurationException

* made code review changes

* fixed errors

* fixed errors
  • Loading branch information
federicofantini authored and Michalsus committed Oct 11, 2024
1 parent e144e9b commit 9781964
Show file tree
Hide file tree
Showing 15 changed files with 651 additions and 26 deletions.
17 changes: 14 additions & 3 deletions api_app/classes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import logging
import traceback
import typing
Expand All @@ -7,6 +8,7 @@
import requests
from billiard.exceptions import SoftTimeLimitExceeded
from django.conf import settings
from django.core.files import File
from django.utils import timezone
from django.utils.functional import cached_property
from requests import HTTPError
Expand Down Expand Up @@ -121,9 +123,18 @@ def after_run(self):
self.report.save()

def after_run_success(self, content: typing.Any):
if isinstance(content, typing.Generator):
content = list(content)
self.report.report = content
# avoiding JSON serialization errors for types: File and bytes
report_content = []
if isinstance(content, typing.List):
for n in content:
if isinstance(n, File):
report_content.append(base64.b64encode(n.read()).decode("utf-8"))
elif isinstance(n, bytes):
report_content.append(base64.b64encode(n).decode("utf-8"))
else:
report_content.append(n)

self.report.report = report_content
self.report.status = self.report.Status.SUCCESS.value
self.report.save(update_fields=["status", "report"])

Expand Down
6 changes: 5 additions & 1 deletion api_app/ingestors_manager/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,19 @@ def before_run(self):
self._config.validate_playbook_to_execute(self._user)

def after_run_success(self, content):
# exhaust generator
if isinstance(content, typing.Generator):
content = list(content)

super().after_run_success(content)
self._config: IngestorConfig
# exhaust generator
deque(
self._config.create_jobs(
# every job created from an ingestor
content,
TLP.CLEAR.value,
self._user,
self._config.delay,
),
maxlen=0,
)
Expand Down
160 changes: 160 additions & 0 deletions api_app/ingestors_manager/ingestors/malware_bazaar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import io
import logging
import time
from typing import Any, Iterable
from unittest.mock import patch

import pyzipper
import requests
from django.utils import timezone

from api_app.ingestors_manager.classes import Ingestor
from api_app.ingestors_manager.exceptions import IngestorRunException
from tests.mock_utils import MockUpResponse, if_mock_connections

logger = logging.getLogger(__name__)


class MalwareBazaar(Ingestor):
# API endpoint
url: str
# Download samples that are up to X hours old
hours: int
# Download samples from chosen signatures (aka malware families)
signatures: str

@classmethod
def update(cls) -> bool:
pass

# retrieve information about the given signature
def get_signature_information(self, signature):
result = requests.post(
self.url,
data={"query": "get_siginfo", "signature": signature, "limit": 100},
timeout=30,
)
result.raise_for_status()
content = result.json()
logger.info(f"Malware bazaar data for signature {signature} is {content}")
if content["query_status"] != "ok":
raise IngestorRunException(
f"Query status is invalid: {content['query_status']}"
)
if not isinstance(content["data"], list):
raise IngestorRunException(f"Content {content} not expected")
return content["data"]

# extract file hashes per signature
def get_recent_samples(self):
hashes = set()
current_time = timezone.now()
for signature in self.signatures:
data = self.get_signature_information(signature)
for elem in data:
first_seen = timezone.make_aware(
timezone.datetime.strptime(elem["first_seen"], "%Y-%m-%d %H:%M:%S")
)
diff = int((current_time - first_seen).total_seconds()) // 3600
if elem["signature"] == signature and diff <= self.hours:
hashes.add(elem["sha256_hash"])

last_hours_str = (
"Last hour" if self.hours == 1 else f"Last {self.hours} hours"
)
logger.info(
f"{last_hours_str} {signature} samples: " f"{len(hashes)}/{len(data)}"
)
return hashes

def download_sample(self, h):
logger.info(f"Downloading sample {h}")
sample_archive = requests.post(
self.url,
data={
"query": "get_file",
"sha256_hash": h,
},
timeout=60,
)
sample_archive.raise_for_status()
logger.info(f"Correctly downloaded sample {h}")
logger.info("Sleeping for 1 second")
time.sleep(1)
with pyzipper.AESZipFile(io.BytesIO(sample_archive.content)) as zf:
zf.setpassword(b"infected")
files = zf.namelist()
# expected only one file
if files and len(files) == 1:
return zf.read(files[0])

def run(self) -> Iterable[Any]:
hashes = self.get_recent_samples()
hashes_len = len(hashes)
# download sample and create new analysis
for idx, h in enumerate(hashes):
logger.info(f"Downloading sample {idx+1}/{hashes_len}")
sample = self.download_sample(h)
yield sample

@classmethod
def _monkeypatch(cls):
patches = [
if_mock_connections(
patch(
"requests.post",
return_value=MockUpResponse(
{
"query_status": "ok",
"data": [
{
"sha256_hash": "c5c810beaf075f8fee52146b381b0f94a6"
"e303fada3bce12bcc07fbfa07ba07e",
"sha3_384_hash": "bdd25a594b5a5d8ab14b00c04ee75d6a"
"476bf2a7df49223284eebfac82be107a"
"b94ffaae294ef4cf0a1c23a206e1fbd9",
"sha1_hash": "3fea40223c02a15678912a29147d2b32d05c"
"46df",
"md5_hash": "dc591fd6d108b50bd9aa1f3dce2f3fe4",
"first_seen": "2024-04-11 12:35:10",
"last_seen": None,
"file_name": "17128389081d4616ae42b2693f5ea6783112"
"f41cb2ee5184f49d983f8bf833df0b0e97b4"
"29449.dat-decoded",
"file_size": 240128,
"file_type_mime": "application/x-dosexec",
"file_type": "exe",
"reporter": "abuse_ch",
"anonymous": 0,
"signature": "AgentTesla",
"imphash": "f34d5f2d4577ed6d9ceec516c1f5a744",
"tlsh": "T17534FD037E88EB15E5A87E3782EF6C2413B2B0C"
"71633C60B6F49AF6518516426D7E72D",
"telfhash": None,
"gimphash": None,
"ssdeep": "3072:z+ymieCL2QfOdb/TmqtbqRFP55EMX+CWQ:"
"z+ymieCLPfOdbqq9qRFvXJW",
"dhash_icon": None,
"tags": ["AgentTesla", "base64-decoded", "exe"],
"code_sign": [],
"intelligence": {
"clamav": None,
"downloads": "338",
"uploads": "1",
"mail": None,
},
}
],
},
200,
),
),
patch(
"requests.post",
return_value=MockUpResponse(
{}, content=b"AgentTesla malware downloaded!", status_code=200
),
),
)
]
return super()._monkeypatch(patches=patches)
13 changes: 8 additions & 5 deletions api_app/ingestors_manager/ingestors/threatfox.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,20 @@


class ThreatFox(Ingestor):
# API endpoint
url = "https://threatfox-api.abuse.ch/api/v1/"
# Days to check. From 1 to 7
days: int

BASE_URL = "https://threatfox-api.abuse.ch/api/v1/"
@classmethod
def update(cls) -> bool:
pass

def run(self) -> Iterable[Any]:
result = requests.post(
self.BASE_URL, json={"query": "get_iocs", "days": self.days}
)
result = requests.post(self.url, json={"query": "get_iocs", "days": self.days})
result.raise_for_status()
content = result.json()
logger.info(f"Threatfox data is {content}")
logger.info(f"ThreatFox data is {content}")
if content["query_status"] != "ok":
raise IngestorRunException(
f"Query status is invalid: {content['query_status']}"
Expand Down
22 changes: 22 additions & 0 deletions api_app/ingestors_manager/migrations/0018_ingestorconfig_delay.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.11 on 2024-04-09 15:19

import datetime

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("ingestors_manager", "0017_4_change_primary_key"),
]

operations = [
migrations.AddField(
model_name="ingestorconfig",
name="delay",
field=models.DurationField(
default=datetime.timedelta,
help_text="Expects data in the format 'DD HH:MM:SS'",
),
),
]
Loading

0 comments on commit 9781964

Please sign in to comment.