SkillScan/tests/test_integration.py at main · NMitchem/SkillScan · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""End-to-end integration tests — parse + all analyzers + score on each fixture."""

from pathlib import Path

from skillscan.parser import parse_skill_file
from skillscan.analyzers import get_all_analyzers
from skillscan.scorer import score_findings
from skillscan.models import ScanResult

FIXTURES_DIR = Path(__file__).parent / "fixtures"


def _full_scan(fixture_name: str) -> ScanResult:
    path = FIXTURES_DIR / fixture_name / "SKILL.md"
    skill = parse_skill_file(path)
    analyzers = get_all_analyzers()
    findings = []
    for a in analyzers:
        findings.extend(a.analyze(skill))
    findings.sort(key=lambda f: f.severity, reverse=True)
    score = score_findings(findings)
    return ScanResult(
        skill_name=skill.name,
        skill_path=str(path),
        findings=findings,
        risk_score=score,
        threshold=6.0,
    )


class TestCleanSkillIntegration:
    """A clean skill should pass with a low risk score."""

    def test_clean_skill_passes(self):
        result = _full_scan("clean-skill")
        assert result.passed is True
        assert result.risk_score < 3.0


class TestClawhavocIntegration:
    """The clawhavoc sample should fail with shell and url findings."""

    def test_clawhavoc_fails(self):
        result = _full_scan("clawhavoc-sample")
        assert result.passed is False
        assert result.risk_score >= 6.0
        triggered = {f.analyzer for f in result.findings}
        assert "shell" in triggered
        assert "url" in triggered


class TestAmosIntegration:
    """The amos sample should fail with macos and obfuscation findings."""

    def test_amos_fails(self):
        result = _full_scan("amos-sample")
        assert result.passed is False
        assert result.risk_score >= 6.0
        triggered = {f.analyzer for f in result.findings}
        assert "macos" in triggered
        assert "obfuscation" in triggered


class TestPromptInjectionIntegration:
    """The prompt-injection fixture should trigger the envvar analyzer."""

    def test_prompt_injection_detected(self):
        result = _full_scan("prompt-injection")
        assert result.finding_count >= 2
        triggered = {f.analyzer for f in result.findings}
        assert "envvar" in triggered


class TestTyposquatIntegration:
    """The typosquat fixture should trigger the typosquat analyzer."""

    def test_typosquat_detected(self):
        result = _full_scan("typosquat")
        triggered = {f.analyzer for f in result.findings}
        assert "typosquat" in triggered


class TestDelayedActivationIntegration:
    def test_static_audit_catches_some_signals(self):
        """Static audit should catch at least some indicators but may miss temporal logic."""
        result = _full_scan("delayed-activation")
        # Static analysis should catch at least the URL/network patterns
        assert result.finding_count >= 1