Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
run: |
banner="$(python -m credsweeper --banner | head -1)"
echo "banner = '${banner}'"
if [ "CredSweeper 1.14.5 crc32:da87b2ca" != "${banner}" ]; then
if [ "CredSweeper 1.14.6 crc32:765e27c6" != "${banner}" ]; then
echo "Update the check for '${banner}'"
exit 1
fi
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
"__version__"
]

__version__ = "1.14.5"
__version__ = "1.14.6"
2 changes: 1 addition & 1 deletion credsweeper/deep_scanner/png_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def yield_png_chunks(data: bytes) -> Generator[Tuple[int, str, bytes], None, Non
else:
raise ValueError(f"Unsupported compression {repr(itxt_data[:2])}")
lang_tag, itxt_data = itxt_data[2:].split(b'\0', 1)
trans_key, itxt_data = itxt_data[2:].split(b'\0', 1)
trans_key, itxt_data = itxt_data.split(b'\0', 1)
yield (offset, f"PNG_ITXT_{'1' if compression else '0'}"
f":{keyword.decode(encoding=UTF_8)}"
f":{lang_tag.decode(encoding=UTF_8)}"
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/file_handler/data_content_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def represent_as_structure(self) -> Optional[bool]:
# # # YAML - almost always recognized
try:
if ':' in self.text and (2 < self.text.count('\n') or 2 < self.text.count('\r')):
self.structure = yaml.load(self.text, Loader=yaml.FullLoader)
self.structure = yaml.safe_load(self.text)
logger.debug("CONVERTED from yaml")
else:
logger.debug("Data do not contain colon mark - weak YAML")
Expand Down
21 changes: 9 additions & 12 deletions credsweeper/ml_model/features/entropy_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,23 @@ class EntropyEvaluation(Feature):

"""

def __init__(self) -> None:
"""Class initializer"""
super().__init__()
# Max size of ML analyzed value is ML_HUNK but value may be bigger
self.hunk_size = 4 * ML_HUNK
self.log2_cache: Dict[int, float] = {x: math.log2(x) for x in range(4, self.hunk_size + 1)}
self.char_sets: List[Set[str]] = [set(x.value) for x in Chars]
# Max size of ML analyzed value is ML_HUNK but value may be bigger
HUNK_SIZE = 4 * ML_HUNK
LOG2_CACHE: Dict[int, float] = {x: math.log2(x) for x in range(4, 4 * ML_HUNK + 1)}
CHAR_SET: List[Set[str]] = [set(x.value) for x in Chars]
RESULT_SIZE = 3 + len(Chars)

def extract(self, candidate: Candidate) -> np.ndarray:
"""Returns real entropy and possible sets of characters"""
# only head of value will be analyzed
result: np.ndarray = np.zeros(shape=3 + len(self.char_sets), dtype=np.float32)
value = candidate.line_data_list[0].value[:self.hunk_size]
result: np.ndarray = np.zeros(shape=EntropyEvaluation.RESULT_SIZE, dtype=np.float32)
value = candidate.line_data_list[0].value[:EntropyEvaluation.HUNK_SIZE]
size = len(value)
uniq, counts = np.unique(list(value), return_counts=True)
if MIN_DATA_LEN <= size:
# evaluate the entropy for a value of at least 4
probabilities = counts / size
hartley_entropy = self.log2_cache.get(size, -1.0)
assert hartley_entropy, str(candidate)
hartley_entropy = EntropyEvaluation.LOG2_CACHE.get(size, -1.0)

# renyi_entropy alpha=0.5
sum_prob_05 = np.sum(probabilities**0.5)
Expand All @@ -59,7 +56,7 @@ def extract(self, candidate: Candidate) -> np.ndarray:
# check charset for non-zero value
# use the new variable to deal with mypy
uniq_set = set(uniq)
for n, i in enumerate(self.char_sets, start=3):
for n, i in enumerate(EntropyEvaluation.CHAR_SET, start=3):
if not uniq_set.difference(i):
result[n] = 1.0

Expand Down
31 changes: 27 additions & 4 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
confidence: moderate
type: pattern
values:
- (?P<variable>[\"'`]?(?i:token|secret|key|키|암호화?|토큰)[\"'`]?)((\s)*[=:](\s)*)(?P<quote>[\"'`(])?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){8,80}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)[)\"'`])
- (?P<variable>[\"'`]?(?i:token|secret|key|키|암호화?|토큰)[\"'`]?)((\s)*(?P<separator>설정은|:=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=|%3[Dd])(\s)*)(?P<quote>[\"'`(])?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){8,80}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)[)\"'`])
filter_type:
- ValueAllowlistCheck
- ValuePatternCheck(4)
Expand All @@ -84,13 +84,14 @@
- 토큰
target:
- doc
use_ml: true

- name: PASSWD_PAIR
severity: medium
confidence: moderate
type: pattern
values:
- (?P<variable>[\"'`]?(?i:(?<!id[ :/])pa[as]swo?r?ds?|pwd?|p/w|비밀번호|비번|패스워드|암호)[\"'`]?)((\s)*[=:](\s)*)(?P<quote>[\"'`(])?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){8,64}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)[)\"'`])
- (?P<variable>[\"'`]?(?i:(?<!id[ :/])pa[as]swo?r?ds?|pwd?|p/w|비밀번호|비번|패스워드|암호)[\"'`]?)((\s)*(?P<separator>설정은|:=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=|%3[Dd])(\s)*)(?P<quote>[\"'`(])?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){8,64}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)[)\"'`])
filter_type:
- ValueAllowlistCheck
- ValuePatternCheck(4)
Expand All @@ -112,6 +113,7 @@
- 암호
target:
- doc
use_ml: true

- name: IP_ID_PASSWORD_TRIPLE
severity: medium
Expand All @@ -128,6 +130,7 @@
- "."
target:
- doc
use_ml: true

- name: ID_PAIR_PASSWD_PAIR
severity: medium
Expand All @@ -151,6 +154,7 @@
- 암호
target:
- doc
use_ml: true

- name: ID_PASSWD_PAIR
severity: medium
Expand All @@ -173,6 +177,7 @@
- 암호
target:
- doc
use_ml: true

- name: UUID
severity: info
Expand Down Expand Up @@ -204,14 +209,30 @@
- code
- doc

- name: Amazon Bedrock API Key
severity: high
confidence: moderate
type: pattern
values:
- (?:^|/|[^\\0-9A-Za-z+_-]|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<value>(ABSK|bedrock-api-key-)[0-9A-Za-z/+]{28,800})(?![0-9A-Za-z/+])
filter_type: GeneralPattern
required_substrings:
- ABSK
- bedrock-api-key-
min_line_len: 44
target:
- code
- doc

- name: AWS Client ID
severity: high
confidence: moderate
type: pattern
values:
- (?:^|/|[^\\0-9A-Za-z+_-]|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<value>(ABIA|ACCA|AGPA|AIDA|AIPA|AKIA|ANPA|ANVA|AROA|APKA|ASCA|ASIA)[0-9A-Z]{16,17})(?![0-9A-Za-z_+-])
- (?:^|/|[^\\0-9A-Za-z+_-]|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<value>(A3T[0-9A-Z]|ABIA|ACCA|AGPA|AIDA|AIPA|AKIA|ANPA|ANVA|AROA|APKA|ASCA|ASIA)[0-9A-Z]{16,17})(?![0-9A-Za-z_+-])
filter_type: GeneralPattern
required_substrings:
- A3T
- ABIA
- ACCA
- AGPA
Expand Down Expand Up @@ -1000,7 +1021,7 @@
confidence: strong
type: pattern
values:
- (?P<value>(_gitlab_session=|GR1348941|gl(agent|soat|ffct|p[at]t|oas|cbt|imt|[dfr]t)-)[0-9A-Za-z_-]{20,64}(\.[0-9A-Za-z_-]{2,16}){0,2})(?![0-9A-Za-z_-])
- (?P<value>(_gitlab_session=|GR1348941|gl(agent|soat|ffct|p[at]t|oas|cbt|imt|rtr|[dfrw]t)-)[0-9A-Za-z_-]{20,64}(\.[0-9A-Za-z_-]{2,16}){0,2})(?![0-9A-Za-z_-])
filter_type:
- ValuePatternCheck
min_line_len: 25
Expand All @@ -1018,6 +1039,8 @@
- gldt-
- glft-
- glrt-
- glrtr-
- glwt-
target:
- code
- doc
Expand Down
5 changes: 3 additions & 2 deletions credsweeper/scanner/scan_type/multi_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ def run(cls, config: Config, rule: Rule, target: AnalysisTarget) -> List[Candida
Empty list (False) - otherwise.

"""
assert rule.rule_type == RuleType.MULTI, \
"Rules provided to MultiPattern.run should have pattern_type equal to MULTI_PATTERN"
if RuleType.MULTI != rule.rule_type:
raise ValueError(f"Rule `{rule}` provided to `{cls.__name__}`.run "
f"should have pattern_type equal to `{RuleType.MULTI.value}`")

candidates = cls._get_candidates(config, rule, target)

Expand Down
5 changes: 3 additions & 2 deletions credsweeper/scanner/scan_type/pem_key_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ def run(cls, config: Config, rule: Rule, target: AnalysisTarget) -> List[Candida
and filters defined in rule do not remove current line. Empty list - otherwise

"""
assert rule.rule_type == RuleType.PEM_KEY, \
"Rules provided to PemKeyPattern.run should have pattern_type equal to PEM_KEY_PATTERN"
if RuleType.PEM_KEY != rule.rule_type:
raise ValueError(f"Rule `{rule}` provided to `{cls.__name__}`.run "
f"should have pattern_type equal to `{RuleType.PEM_KEY.value}`")
if candidates := cls._get_candidates(config, rule, target):
candidate = candidates[0]
if pem_lines := PemKeyDetector.detect_pem_key(config, target):
Expand Down
5 changes: 5 additions & 0 deletions credsweeper/scanner/scan_type/single_pattern.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List

from credsweeper.common.constants import RuleType
from credsweeper.config.config import Config
from credsweeper.credentials.candidate import Candidate
from credsweeper.file_handler.analysis_target import AnalysisTarget
Expand All @@ -25,4 +26,8 @@ def run(cls, config: Config, rule: Rule, target: AnalysisTarget) -> List[Candida

"""

if RuleType.PATTERN != rule.rule_type and RuleType.KEYWORD != rule.rule_type:
raise ValueError(f"Rule `{rule}` provided to `{cls.__name__}`.run "
f"should have pattern_type equal to `{RuleType.PATTERN.value}`")

return cls._get_candidates(config, rule, target)
10 changes: 5 additions & 5 deletions credsweeper/utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def extract_element_data(element: Any, attr: str) -> str:

@staticmethod
def json_load(file_path: Union[str, Path], encoding=DEFAULT_ENCODING) -> Any:
"""Load dictionary from json file"""
"""Load dictionary from JSON file"""
try:
with open(file_path, "r", encoding=encoding) as f:
return json.load(f)
Expand All @@ -362,7 +362,7 @@ def json_load(file_path: Union[str, Path], encoding=DEFAULT_ENCODING) -> Any:

@staticmethod
def json_dump(obj: Any, file_path: Union[str, Path], encoding=DEFAULT_ENCODING, indent=4) -> None:
"""Write dictionary to json file"""
"""Write dictionary to JSON file"""
try:
with open(file_path, "w", encoding=encoding) as f:
json.dump(obj, f, indent=indent)
Expand All @@ -371,17 +371,17 @@ def json_dump(obj: Any, file_path: Union[str, Path], encoding=DEFAULT_ENCODING,

@staticmethod
def yaml_load(file_path: Union[str, Path], encoding=DEFAULT_ENCODING) -> Any:
"""Load dictionary from yaml file"""
"""Load dictionary from YAML file"""
try:
with open(file_path, "r", encoding=encoding) as f:
return yaml.load(f, Loader=yaml.FullLoader)
return yaml.safe_load(f)
except Exception as exc:
logger.error(f"Failed to read {file_path} {exc}")
return None

@staticmethod
def yaml_dump(obj: Any, file_path: Union[str, Path], encoding=DEFAULT_ENCODING) -> None:
"""Write dictionary to yaml file"""
"""Write dictionary to YAML file"""
try:
with open(file_path, "w", encoding=encoding) as f:
yaml.dump(obj, f)
Expand Down
8 changes: 4 additions & 4 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@
ZERO_ML_THRESHOLD = 0.0

# with option --doc & NEGLIGIBLE_ML_THRESHOLD
SAMPLES_IN_DOC = 916
SAMPLES_IN_DOC = 924

# credentials count after scan without filters and ML validations
SAMPLES_REGEX_COUNT = 650
SAMPLES_REGEX_COUNT = 655

# credentials count after scan with filters and without ML validation
SAMPLES_FILTERED_COUNT = 539
SAMPLES_FILTERED_COUNT = 544

# credentials count after default post-processing
SAMPLES_POST_CRED_COUNT = 492
SAMPLES_POST_CRED_COUNT = 497

# archived credentials that are not found without --depth
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 138
Expand Down
Loading