Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .ci/benchmark.txt
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ FileType FileNumber ValidLines Positives Negatives
.zsh 6 872 11
.zsh-theme 1 97 1
TOTAL: 11361 16995334 17245 53521
credsweeper result_cnt : 16960, lost_cnt : 0, true_cnt : 16862, false_cnt : 98
credsweeper result_cnt : 16975, lost_cnt : 0, true_cnt : 16877, false_cnt : 98
Rules Positives Negatives Reported TP FP TN FN FPR FNR ACC PRC RCL F1
------------------------------ ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- --------
API 244 4000 239 239 0 4000 5 0.000000 0.020492 0.998822 1.000000 0.979508 0.989648
Expand All @@ -240,7 +240,7 @@ Azure Access Token 24 0 17 17
BASE64 Private Key 22 4 22 22 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
BASE64 encoded PEM Private Key 12 0 12 12 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
Basic Authorization 688 554 688 688 0 554 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
Bearer Authorization 182 0 178 178 0 0 4 0.021978 0.978022 1.000000 0.978022 0.988889
Bearer Authorization 182 0 182 182 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
CMD ConvertTo-SecureString 13 4 13 13 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
CMD Password 33 137 33 33 0 137 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
CMD Secret 1 17 1 1 0 17 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
Expand All @@ -264,12 +264,12 @@ Jira / Confluence PAT token 0 4 0
Key 4288 20727 4283 4265 18 20709 23 0.000868 0.005364 0.998361 0.995797 0.994636 0.995216
MailGun API Key 8 0 8 8 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
NKEY Seed 60 0 59 59 0 0 1 0.016667 0.983333 1.000000 0.983333 0.991597
NTLM Token 4 0 0 0 0 4 1.000000 0.000000 0.000000
NTLM Token 4 0 4 4 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
Nonce 130 110 129 128 1 109 2 0.009091 0.015385 0.987500 0.992248 0.984615 0.988417
OTP / 2FA Secret 64 3 56 54 2 1 10 0.666667 0.156250 0.820896 0.964286 0.843750 0.900000
Other 0 20 0 0 20 0 0.000000 1.000000
PEM Private Key 1150 76 1154 1150 4 72 0 0.052632 0.000000 0.996737 0.996534 1.000000 0.998264
Password 2578 11383 2503 2492 11 11372 86 0.000966 0.033359 0.993052 0.995605 0.966641 0.980909
Password 2578 11383 2504 2493 11 11372 85 0.000966 0.032971 0.993124 0.995607 0.967029 0.981110
Perplexity API Key 2 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
Postman Credentials 2 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
SQL Password 44 14 42 42 0 14 2 0.000000 0.045455 0.965517 1.000000 0.954545 0.976744
Expand All @@ -279,8 +279,8 @@ Secret 1527 2474 1519 1517
Slack Token 15 1 15 15 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
Stripe Credentials 2 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
Tencent WeChat API App ID 47 0 47 47 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
Token 1142 5267 1061 1058 3 5264 84 0.000570 0.073555 0.986425 0.997172 0.926445 0.960508
Token 1142 5267 1067 1064 3 5264 78 0.000570 0.068301 0.987362 0.997188 0.931699 0.963332
Twilio Credentials 30 39 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
URL Credentials 225 401 221 221 0 401 4 0.000000 0.017778 0.993610 1.000000 0.982222 0.991031
UUID 2562 3671 2559 2543 16 3655 19 0.004358 0.007416 0.994385 0.993748 0.992584 0.993165
17245 53521 16963 16862 98 53423 383 0.001831 0.022209 0.993203 0.994222 0.977791 0.985938
17245 53521 16978 16877 98 53423 368 0.001831 0.021340 0.993415 0.994227 0.978660 0.986382
2 changes: 1 addition & 1 deletion .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
run: |
banner="$(python -m credsweeper --banner | head -1)"
echo "banner = '${banner}'"
if [ "CredSweeper 1.13.4 crc32:830d94c9" != "${banner}" ]; then
if [ "CredSweeper 1.13.4 crc32:404811d6" != "${banner}" ]; then
echo "Update the check for '${banner}'"
exit 1
fi
Expand Down
4 changes: 2 additions & 2 deletions credsweeper/filters/value_allowlist_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class ValueAllowlistCheck(Filter):
ALLOWED_PATTERN = re.compile(Util.get_regex_combine_or(ALLOWED), flags=re.IGNORECASE)

ALLOWED_QUOTED = [
r"\$[a-z_]+[0-9a-z_]*([$\s]|$)", #
r"\$[a-z_][0-9a-z_]+((::|->|\.)[a-z_]|\[|$)", #
r"\$\([^)]+\)", #
r".*\*\*\*", #
]
Expand All @@ -34,7 +34,7 @@ class ValueAllowlistCheck(Filter):

ALLOWED_UNQUOTED = [
r"[~a-z0-9_]+((\.|->)[a-z0-9_]+)+\(.*$", #
r"\$[a-z_]+[0-9a-z_]*\b", #
r"\$[a-z_][0-9a-z_]+((::|->|\.)[a-z_]|\[|$)", #
r"\$\([.0-9a-z_-]+", #
r".*\*\*\*\*\*", #
]
Expand Down
20 changes: 12 additions & 8 deletions credsweeper/filters/value_pattern_check.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from typing import Optional

from credsweeper.common.constants import DEFAULT_PATTERN_LEN, MAX_LINE_LENGTH
from credsweeper.common.constants import DEFAULT_PATTERN_LEN, MAX_LINE_LENGTH, MIN_DATA_LEN
from credsweeper.config.config import Config
from credsweeper.credentials.line_data import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
Expand Down Expand Up @@ -50,10 +50,13 @@ def __init__(self, config: Optional[Config] = None, pattern_len: Optional[int] =
@staticmethod
def get_pattern(pattern_len: int) -> re.Pattern:
"""Creates regex pattern to find N or more identical characters in sequence"""
if DEFAULT_PATTERN_LEN < pattern_len:
pattern = fr"(\S)\1{{{str(pattern_len - 1)},}}"
pattern_length = max(DEFAULT_PATTERN_LEN, pattern_len)
if MIN_DATA_LEN <= pattern_length:
# base64 long sequences may contain 0x00 or 0xFF inside
pattern = fr"([^\sA/_])\1{{{str(pattern_length-1)},}}"
else:
pattern = r"(\S)\1{3,}"
# up to 256 symbols length
pattern = fr"(\S)\1{{{str(pattern_length-1)},}}"
return re.compile(pattern)

def equal_pattern_check(self, value: str, bit_length: int) -> bool:
Expand All @@ -67,7 +70,7 @@ def equal_pattern_check(self, value: str, bit_length: int) -> bool:
True if contain and False if not

"""
if self.patterns[bit_length].findall(value):
if self.patterns[bit_length].search(value):
return True
return False

Expand Down Expand Up @@ -146,9 +149,10 @@ def duple_pattern_check(self, value: str, bit_length: int) -> bool:

"""
even_value = value[0::2]
odd_value = value[1::2]
if self.check_val(even_value, bit_length) and self.check_val(odd_value, bit_length):
return True
if self.check_val(even_value, bit_length):
odd_value = value[1::2]
if self.check_val(odd_value, bit_length):
return True
return False

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
Expand Down
24 changes: 15 additions & 9 deletions credsweeper/filters/value_similarity_check.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from difflib import SequenceMatcher
from typing import Optional

from credsweeper.common.constants import MIN_VALUE_LENGTH
from credsweeper.config.config import Config
from credsweeper.credentials.line_data import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters.filter import Filter


class ValueSimilarityCheck(Filter):
"""Check if candidate value is at least 70% same as candidate keyword. Like: `secret = "mysecret"`."""
"""Check if candidate value is over 75% similarity as candidate variable. Like: `secret = "mysecret"` (0.8571)."""

def __init__(self, config: Optional[Config] = None) -> None:
pass
Expand All @@ -23,12 +25,16 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
True, if need to filter candidate and False if left

"""
# Cannot evaluate if key is None
if line_data.key is None:
return False
if line_data.key.lower() in line_data.value.lower() and \
len(line_data.key) / len(line_data.value) >= 0.7:
return True
if line_data.variable is not None and line_data.value in line_data.variable:
return True
if line_data.variable and line_data.value:
variable_lower = line_data.variable.lower()
value_lower = line_data.value.lower()
if len(value_lower) <= len(variable_lower):
if value_lower in variable_lower:
return True
elif MIN_VALUE_LENGTH <= len(variable_lower):
# `api` and `key` may be in the value
if variable_lower in value_lower:
return True
if 0.75 < SequenceMatcher(None, variable_lower, value_lower).ratio():
return True
return False
6 changes: 3 additions & 3 deletions credsweeper/ml_model/ml_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,11 @@ def __init__(
self.common_feature_list = []
self.unique_feature_list = []
if logger.isEnabledFor(logging.INFO):
config_dbg = str(model_config) if logger.isEnabledFor(logging.DEBUG) else ''
config_md5 = hashlib.md5(__ml_config_data).hexdigest()
model_md5 = hashlib.md5(self.__ml_model_data).hexdigest()
logger.info("Init ML validator with providers: '%s' ; model:'%s' md5:%s ; config:'%s' md5:%s ; %s",
self.providers, ml_config_path, config_md5, ml_model_path, model_md5, config_dbg)
logger.info("Init ML validator with providers: '%s' ; model:'%s' md5:%s ; config:'%s' md5:%s",
self.providers, ml_config_path, config_md5, ml_model_path, model_md5)
logger.debug(str(model_config))
for feature_definition in model_config["features"]:
feature_class = feature_definition["type"]
kwargs = feature_definition.get("kwargs", {})
Expand Down
16 changes: 16 additions & 0 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1434,6 +1434,22 @@
- code
- doc

- name: NTLM Token
severity: medium
confidence: strong
type: pattern
values:
- (?P<value>TlRMTVNTUAADAAAA[=0-9A-Za-z_/+-]{8,8000})(?![0-9A-Za-z_/+-])
filter_type:
- ValueMorphemesCheck(2)
- ValuePatternCheck
min_line_len: 160
required_substrings:
- TlRMTVNTUAADAAAA
target:
- doc
- code

- name: Basic Authorization
severity: medium
confidence: strong
Expand Down
10 changes: 5 additions & 5 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
from pathlib import Path

# total number of files in test samples
SAMPLES_FILES_COUNT = 166
SAMPLES_FILES_COUNT = 167

# the lowest value of ML threshold is used to display possible lowest values
NEGLIGIBLE_ML_THRESHOLD = 0.0001

# with option --doc & NEGLIGIBLE_ML_THRESHOLD
SAMPLES_IN_DOC = 878
SAMPLES_IN_DOC = 879

# credentials count after scan without filters and ML validations
SAMPLES_REGEX_COUNT = 648
SAMPLES_REGEX_COUNT = 650

# credentials count after scan with filters and without ML validation
SAMPLES_FILTERED_COUNT = 535
SAMPLES_FILTERED_COUNT = 537

# credentials count after default post-processing
SAMPLES_POST_CRED_COUNT = 463
SAMPLES_POST_CRED_COUNT = 464

# archived credentials that are not found without --depth
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 138
Expand Down
21 changes: 21 additions & 0 deletions tests/data/depth_3_pedantic.json
Original file line number Diff line number Diff line change
Expand Up @@ -7573,6 +7573,27 @@
}
]
},
{
"rule": "NTLM Token",
"severity": "medium",
"confidence": "strong",
"ml_probability": null,
"line_data_list": [
{
"line": "Positive: TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
"line_num": 1,
"path": "./tests/samples/ntlm",
"info": "FILE:./tests/samples/ntlm|RAW",
"variable": null,
"variable_start": -2,
"variable_end": -2,
"value": "TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
"value_start": 10,
"value_end": 190,
"entropy": 3.79713
}
]
},
{
"rule": "NuGet API key",
"severity": "high",
Expand Down
21 changes: 21 additions & 0 deletions tests/data/doc.json
Original file line number Diff line number Diff line change
Expand Up @@ -15661,6 +15661,27 @@
}
]
},
{
"rule": "NTLM Token",
"severity": "medium",
"confidence": "strong",
"ml_probability": null,
"line_data_list": [
{
"line": "TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
"line_num": 1,
"path": "./tests/samples/ntlm",
"info": "FILE:./tests/samples/ntlm|RAW",
"variable": null,
"variable_start": -2,
"variable_end": -2,
"value": "TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
"value_start": 10,
"value_end": 190,
"entropy": 3.79713
}
]
},
{
"rule": "NuGet API key",
"severity": "high",
Expand Down
42 changes: 42 additions & 0 deletions tests/data/no_filters_no_ml.json
Original file line number Diff line number Diff line change
Expand Up @@ -9380,6 +9380,48 @@
}
]
},
{
"rule": "NTLM Token",
"severity": "medium",
"confidence": "strong",
"ml_probability": null,
"line_data_list": [
{
"line": "Positive: TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
"line_num": 1,
"path": "./tests/samples/ntlm",
"info": "",
"variable": null,
"variable_start": -2,
"variable_end": -2,
"value": "TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
"value_start": 10,
"value_end": 190,
"entropy": 3.79713
}
]
},
{
"rule": "NTLM Token",
"severity": "medium",
"confidence": "strong",
"ml_probability": null,
"line_data_list": [
{
"line": "Negative: TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmThisIsAnExamplewByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
"line_num": 2,
"path": "./tests/samples/ntlm",
"info": "",
"variable": null,
"variable_start": -2,
"variable_end": -2,
"value": "TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmThisIsAnExamplewByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
"value_start": 10,
"value_end": 190,
"entropy": 3.94022
}
]
},
{
"rule": "NuGet API key",
"severity": "high",
Expand Down
42 changes: 42 additions & 0 deletions tests/data/no_ml.json
Original file line number Diff line number Diff line change
Expand Up @@ -8071,6 +8071,27 @@
}
]
},
{
"rule": "NTLM Token",
"severity": "medium",
"confidence": "strong",
"ml_probability": null,
"line_data_list": [
{
"line": "Positive: TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
"line_num": 1,
"path": "./tests/samples/ntlm",
"info": "",
"variable": null,
"variable_start": -2,
"variable_end": -2,
"value": "TlRMTVNTUAADAAAAGAAYAFYAAAAYABgAbgAAAAAAAABIAAAADgAOAEgAAAAAAAAAVgAAAAAAAACGAAAARmFLZURhVGEAAAAPQwByAGUAZABTAHcAZQBlAHCgZQBy3wAAAAAAAAAAAAAAAAAAAAAph0MQmDQmCVaJEmhiOGSYIXNJMoc2KLo=",
"value_start": 10,
"value_end": 190,
"entropy": 3.79713
}
]
},
{
"rule": "NuGet API key",
"severity": "high",
Expand Down Expand Up @@ -9327,6 +9348,27 @@
}
]
},
{
"rule": "Salt",
"severity": "low",
"confidence": "moderate",
"ml_probability": null,
"line_data_list": [
{
"line": "var Himalayan_salt = \"$hal$1te$TnnGdhednJsdQ5nfetwZ\";",
"line_num": 1,
"path": "./tests/samples/salt.hs",
"info": "",
"variable": "Himalayan_salt",
"variable_start": 4,
"variable_end": 18,
"value": "$hal$1te$TnnGdhednJsdQ5nfetwZ",
"value_start": 22,
"value_end": 51,
"entropy": 3.9523
}
]
},
{
"rule": "Salt",
"severity": "low",
Expand Down
Loading