Hypothesis and Hunt planning support

srozb · Jan 16, 2025 · 45c0a6d · 45c0a6d
1 parent eeced2f
commit 45c0a6d
Show file tree

Hide file tree

Showing 7 changed files with 532 additions and 86 deletions.
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ sensai is a Python library and CLI application designed to assist threat hunters
 
 ## **Requirements**
 
-* Python >= 3.9 (3.11 recommended)
+* Python >= 3.10 (3.11 recommended)
 * All dependencies are listed in `requirements.txt`.  
 
 ---
@@ -79,21 +79,95 @@ Refer to Ollama documentation for advanced configuration.
 
 ### **Basic Usage**
 
-* **Analyze a Report from a URL:**
+The `sensai` CLI tool provides three main commands: `analyze`, `benchmark`, and `hunt`.
 
-  ```bash
-  python sensai/cli.py analyze -c <optional css selector> -m qwen2.5:32b <report url>
-  ```
+#### **Analyze**
 
-  Where `-c` is an optional CSS selector to refine the scraping if needed, `-m` specifies the model to use, and `<report url>` is the URL of the report to analyze.
+Analyze threat intelligence and extract Indicators of Compromise (IOCs).
 
-* **Analyze a Local Report File:**
+```bash
+python sensai/cli.py analyze [OPTIONS] SOURCE
+```
 
-  ```bash
-  python sensai/cli.py analyze -m qwen2.5:32b report.pdf
-  ```
+**Options:**
+
+* `-m, --model TEXT`: LLM model to be used for inference. [required]
+* `-s, --chunk-size INTEGER`: Intel document split size. [default: 2600]
+* `-o, --chunk-overlap INTEGER`: Intel document split overlap. [default: 300]
+* `--num-predict INTEGER`: Maximum number of tokens to predict when generating text (-1 = infinite). [default: -1]
+* `--num-ctx INTEGER`: Size of the context window used to generate the next token. [default: 4096]
+* `-c, --css-selector TEXT`: Optional CSS selector value to limit the HTML parsing. [default: "body"]
+* `-d, --output-dir TEXT`: Location of the report directory. [default: "./"]
+* `-i, --write-iocs`: Create a report file. [default: False]
+* `-n, --write-intel-docs`: Create a file with intelligence either scrapped or acquired from file. [default: False]
+* `-y, --write-hypotheses`: Create a file with proposed hypotheses. [default: False]
+
+**Examples:**
+
+* Analyze a report from a URL:
+
+```bash
+python sensai/cli.py analyze -c "body" -m qwen2.5:32b https://example.com/report.html
+```
+
+* Analyze a local report file:
+
+```bash
+python sensai/cli.py analyze -m qwen2.5:32b report.pdf
+```
+
+#### **Benchmark**
+
+Run benchmarks on multiple language models to evaluate performance.
+
+```bash
+python sensai/cli.py benchmark [OPTIONS]
+```
+
+**Options:**
+
+* `-m, --models TEXT`: Comma-separated list of models in the format name:size (e.g., qwen2.5:32b,qwen2.5:14b). [required]
+* `-s, --chunk-size TEXT`: Comma-separated list of chunk_size values (e.g., 2400,3200). [default: "2600"]
+* `-o, --chunk-overlap TEXT`: Comma-separated list of chunk_overlap values (e.g., 150,300). [default: "200"]
+
+**Examples:**
+
+* Benchmark multiple models with various configurations:
+
+```bash
+python sensai/cli.py benchmark -m "qwen2.5:32b,qwen2.5:14b" -s "2400,3200" -o "150,300"
+```
+
+#### **Hunt**
+
+Prepare the hunt plan template based on the given IoCs.
 
-  This example analyzes a local PDF file using the specified model.
+```bash
+python sensai/cli.py hunt [OPTIONS] SOURCE
+```
+
+**Options:**
+
+* `-m, --model TEXT`: LLM model to be used for inference. [required]
+* `-s, --chunk-size INTEGER`: Intel document split size. [default: 3000]
+* `-o, --chunk-overlap INTEGER`: Intel document split overlap. [default: 100]
+* `--num-predict INTEGER`: Maximum number of tokens to predict when generating text (-1 = infinite). [default: -1]
+* `--num-ctx INTEGER`: Size of the context window used to generate the next token. [default: 4096]
+* `-d, --work-dir TEXT`: Location of the workspace directory. [default: "./"]
+* `-c, --scopes TEXT`: Location of the workspace directory.
+* `-p, --playbooks TEXT`: Location of the workspace directory.
+* `-n, --num-hypotheses INTEGER`: Number of hypotheses to generate. [default: 5]
+* `-a, --able`: Enrich hypotheses according to the ABLE methodology. [default: False]
+* `-q, --quiet`: Suppress output. [default: False]
+* `-w, --write-report`: Create a report file 'hunt.json'. [default: False]
+
+**Examples:**
+
+* Prepare a hunt plan from a local file:
+
+```bash
+python sensai/cli.py hunt -m qwen2.5:32b report.csv
+```
 
 ### **Environment Variables**
 
@@ -116,6 +190,8 @@ sensai --help
 
 ### **Library Usage**
 
+**WORK IN PROGRESS, API CHANGED** - not yet ready.
+
 You can also use the `thsensai` library directly within your Python code for automated threat intelligence analysis:  
 
 ```python

diff --git a/setup.py b/setup.py
@@ -11,7 +11,7 @@ def include(filename) -> str:
 
 setup(
     name="thsensai",
-    version="0.2.0",
+    version="0.2.1",
     description="A library and CLI tool for AI-aided threat hunting and intelligence analysis.",
     long_description=include("README.md"),
     long_description_content_type="text/markdown",

diff --git a/thsensai/cli.py b/thsensai/cli.py
@@ -1,4 +1,4 @@
-# pylint: disable=too-many-arguments, too-many-positional-arguments
+# pylint: disable=too-many-arguments, too-many-positional-arguments, too-many-locals
 
 """
 Sensai CLI Tool
@@ -33,7 +33,6 @@
 from thsensai.ioc import IOCs
 from thsensai.hyp import Hypotheses
 from thsensai.hunt import Hunt
-from thsensai.utils import ensure_dir_exist
 
 app = typer.Typer(help="🏹 Sensai: Threat Hunting and Intelligence Tool")
 
@@ -183,7 +182,6 @@ def analyze(
         hypotheses.generate(iocs_obj.as_csv(), model, params)
         hypotheses.display()
         hypotheses.write_report(source, params, output_dir)
-        # write_hypotheses_report(iocs, source, model, params, output_dir)
 
     if write_iocs:
         iocs_obj.write_report(source, params, output_dir)
@@ -302,15 +300,45 @@ def hunt(
         "-d",
         help="Location of the workspace directory",
     ),
-    no_hypotheses: bool = typer.Option(
+    scope_path: str = typer.Option(
+        None,
+        "--scopes",
+        "-c",
+        help="Location of the workspace directory",
+    ),
+    playbook_path: str = typer.Option(
+        None,
+        "--playbooks",
+        "-p",
+        help="Location of the workspace directory",
+    ),
+    num_hypotheses: int = typer.Option(
+        5,
+        "--num-hypotheses",
+        "-n",
+        help="Number of hypotheses to generate",
+    ),
+    enrich_able: bool = typer.Option(
         False,
-        "--no-hypotheses",
-        "-i",
-        help="Omit creation of hypotheses",
+        "--able",
+        "-a",
+        help="Enrich hypotheses according to the ABLE methodology",
+    ),
+    quiet: bool = typer.Option(
+        False,
+        "--quiet",
+        "-q",
+        help="Suppress output",
+    ),
+    write_report: bool = typer.Option(
+        False,
+        "--write-report",
+        "-w",
+        help="Create a report file 'hunt.json'",
     ),
 ):
     """
-    Prepare the hunt plan based on the given intelligence data.
+    Prepare the hunt plan template based on the given IoCs.
     """
 
     params = {
@@ -328,26 +356,21 @@ def hunt(
     # ) as progress:
     #     iocs = extract_iocs(intel, model, params, progress=progress)
 
-    with open(source, 'r', encoding='utf-8') as file:
-        intel = file.read()
-    iocs_obj = IOCs(iocs=[])
-    iocs_obj.extend_from_csv(intel)
-
-    hunt = Hunt()
-    hunt.iocs = iocs_obj
-    hunt.generate(model, params)
-    hunt.display()
-
-    # ensure_dir_exist(work_dir)
-    # hunt_meta = HuntMeta()
-    # hunt_meta.generate(intel, model, params)
-    # hunt_meta.display()
-    # if not no_hypotheses:
-    #     hypotheses = Hypotheses(hypotheses=[])
-    #     hypotheses.generate(intel, model, params)
-    #     hypotheses.display()
-        #hypotheses.write_report(source, params, work_dir)
-
+    iocs_obj = IOCs.from_csv(source)
+    hunt_obj = Hunt.from_iocs(iocs_obj)
+    hunt_obj.generate_meta(model, params)
+    if num_hypotheses > 0:
+        hunt_obj.generate_hypotheses(model, params, num_hypotheses=num_hypotheses)
+    if enrich_able:
+        hunt_obj.hypotheses.generate_able(model, params)
+    if scope_path:
+        hunt_obj.meta.scope.generate_targets(scope_path, hunt_obj, model, params)
+    if playbook_path:
+        hunt_obj.meta.scope.generate_playbooks(playbook_path, hunt_obj, model, params)
+    if not quiet:
+        hunt_obj.display()
+    if write_report:
+        hunt_obj.dump_to_file(f"{work_dir}/hunt.json")
 
 if __name__ == "__main__":
     app()