Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ jobs:
run: |
banner="$(python -m credsweeper --banner | grep CredSweeper | head -1)"
echo "banner = '${banner}'"
if [ "CredSweeper 1.14.8 crc32:8a4b3391" != "${banner}" ]; then
if [ "CredSweeper 1.15.0 crc32:439081df" != "${banner}" ]; then
echo "Update the check for '${banner}'"
exit 1
fi
Expand Down
4 changes: 2 additions & 2 deletions SECURITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

| Version | Supported |
|---------|--------------------|
| 1.14.x | :white_check_mark: |
| <1.14.x | :x: |
| 1.15.x | :white_check_mark: |
| <1.15.x | :x: |

## Reporting a Vulnerability

Expand Down
2 changes: 1 addition & 1 deletion credsweeper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
"__version__"
]

__version__ = "1.14.8"
__version__ = "1.15.0"
1 change: 1 addition & 0 deletions credsweeper/filters/value_blocklist_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class ValueBlocklistCheck(Filter):
"string",
"value",
"undefined",
"uuid",
]

def __init__(self, config: Optional[Config] = None) -> None:
Expand Down
12 changes: 6 additions & 6 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
- name: DOC_GET
severity: medium
confidence: weak
confidence: moderate
type: pattern
values:
- (?P<variable>(\w*(?i:비밀번호|비번|패스워드|키|암호화?|토큰|(?<!by)pass(?!e[dns]|ing|ion|age)|\bpwd?\b|token|secret|key|cred)\w*)\s*(설정은|[=:!]{1,3}))?\s*([._0-9A-Za-z\[\]]*get(env)?\s*\(\s*(?(variable)[^,]+|[\"'\\]*(\\*([\"']|&(quot|apos|#3[49]);)){0,4}(\w*(?i:(?<!by)pass(?!e[dns]|ing|ion|age|\s+[a-z]{3,80})|\bpwd?\b|token|secret|key|cred)\w*))(\\*([\"']|&(quot|apos|#3[49]);)){0,4})\s*(,(\s*default\s*=)?|\)\s*or)\s*([brufl@]{1,2}(?=\\*[\"'&]))?(?P<lq>(\\*([\"']|&(quot|apos|#3[49]);)){1,4})(?P<value>(.(?!(?P=lq))){4,80}.?)
- (?P<variable>(\w*(?i:비밀번호|비번|패스워드|키|암호화?|토큰|(?<!by)pass(?!e[dns]|ing|ion|age)|\bpwd?\b|token(?!ize)|secret|key(?!word|board|pad)|cred)\w*)\s*(설정은|[=:!]{1,3}))?\s*([._0-9A-Za-z\[\]]*get(env)?\s*\(\s*(?(variable)[^,]+|[\"'\\]*(\\*([\"']|&(quot|apos|#3[49]);)){0,4}(\w*(?i:(?<!by)pass(?!e[dns]|ing|ion|age|\s+[a-z]{3,64})|\bpwd?\b|token|secret|key|cred)\w*))(\\*([\"']|&(quot|apos|#3[49]);)){0,4})\s*(,(\s*default\s*=)?|\)\s*or)\s*([brufl@]{1,2}(?=\\*[\"'&]))?(?P<lq>(\\*([\"']|&(quot|apos|#3[49]);)){1,4})(?P<value>(.(?!(?P=lq))){4,8000}.?)
filter_type:
- ValueAllowlistCheck
- ValueBlocklistCheck
- LineGitBinaryCheck
- LineUUEPartCheck
- ValueFilePathCheck
- ValuePatternCheck(5)
- ValueLengthCheck(4,80)
min_line_len: 8
required_substrings:
- pass
Expand All @@ -31,17 +31,17 @@

- name: DOC_CREDENTIALS
severity: medium
confidence: weak
confidence: moderate
type: pattern
values:
- (?P<wrap>[\"'`(])?\s*(?P<variable>(\w*(?i:(?<!by)passw?o?r?d?s?(?!e[dns]|ing|ion|age)|pwd?\b|\bp/w\b|token|secret|key|credential)\w*|비밀번호|비번|패스워드|키|암호화?|토큰))[\"'`]*(\s+(?i:is|are|was|were)(\s*[:-])?\s+|\s*(?P<separator>설정은|:=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=|%3[Dd])\s*)(?P<quote>[\"'`]{1,6})?(?P<value>(?(quote)(?(wrap)[^\"'`)]{4,80}|[^\"'`]{4,80})|(?(wrap)[^\"'`)]{4,80}|\S{4,80})))
- (?P<wrap>[\"'`(])?\s*(?P<variable>(\w*(?i:(?<!by)passw?o?r?d?s?(?!e[dns]|ing|ion|age)|pwd?\b|\bp/w\b|token(?!ize)|secret|key(?!word|board|pad)|credential)\w*|비밀번호|비번|패스워드|키|암호화?|토큰))[\"'`]*(\s+(?i:is|are|was|were)(\s*[:-])?\s+|\s*(?P<separator>설정은|:=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=|%3[Dd])\s*)(?P<quote>[\"'`]{1,6})?(?P<value>(?(quote)(?(wrap)[^\"'`)]{4,8000}|[^\"'`]{4,8000})|(?(wrap)[^\"'`)]{4,8000}|\S{4,8000})))
filter_type:
- ValueAllowlistCheck
- ValueBlocklistCheck
- LineGitBinaryCheck
- LineUUEPartCheck
- ValueFilePathCheck
- ValuePatternCheck(5)
- ValueLengthCheck(4,80)
min_line_len: 8
required_substrings:
- pass
Expand Down
3 changes: 0 additions & 3 deletions experiment/hyperparameters.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
HP_DICT = {
"line_lstm_dropout_rate": ((0.4, 0.5, 0.01), 0.47),
"line_lstm_recurrent_dropout_rate": ((0.0, 0.3, 0.01), 0.21),
"variable_lstm_dropout_rate": ((0.4, 0.5, 0.01), 0.42),
"variable_lstm_recurrent_dropout_rate": ((0.0, 0.3, 0.01), 0.18),
"value_lstm_dropout_rate": ((0.4, 0.5, 0.01), 0.47),
"value_lstm_recurrent_dropout_rate": ((0.0, 0.3, 0.01), 0.05),
"dense_a_drop": ((0.0, 0.3, 0.01), 0.21),
"dense_b_drop": ((0.0, 0.3, 0.01), 0.23),
}
9 changes: 3 additions & 6 deletions experiment/ml_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,35 +38,32 @@ def get_hyperparam(self, param_name: str, hp=None) -> Any:
def build(self, hp: Optional[Any]) -> Model:
"""Get keras model with string and feature input and single binary out"""
line_lstm_dropout_rate = self.get_hyperparam("line_lstm_dropout_rate", hp)
line_lstm_recurrent_dropout_rate = self.get_hyperparam("line_lstm_recurrent_dropout_rate", hp)
variable_lstm_dropout_rate = self.get_hyperparam("variable_lstm_dropout_rate", hp)
variable_lstm_recurrent_dropout_rate = self.get_hyperparam("variable_lstm_recurrent_dropout_rate", hp)
value_lstm_dropout_rate = self.get_hyperparam("value_lstm_dropout_rate", hp)
value_lstm_recurrent_dropout_rate = self.get_hyperparam("value_lstm_recurrent_dropout_rate", hp)
dense_a_drop = self.get_hyperparam("dense_a_drop", hp)
dense_b_drop = self.get_hyperparam("dense_b_drop", hp)

line_input = Input(shape=(None, self.line_shape[2]), name="line_input", dtype=self.d_type)
line_lstm = LSTM(units=self.line_shape[1],
dtype=self.d_type,
dropout=line_lstm_dropout_rate,
recurrent_dropout=line_lstm_recurrent_dropout_rate)
recurrent_dropout=0)
line_bidirectional = Bidirectional(layer=line_lstm, name="line_bidirectional")
line_lstm_branch = line_bidirectional(line_input)

variable_input = Input(shape=(None, self.variable_shape[2]), name="variable_input", dtype=self.d_type)
variable_lstm = LSTM(units=self.variable_shape[1],
dtype=self.d_type,
dropout=variable_lstm_dropout_rate,
recurrent_dropout=variable_lstm_recurrent_dropout_rate)
recurrent_dropout=0)
variable_bidirectional = Bidirectional(layer=variable_lstm, name="variable_bidirectional")
variable_lstm_branch = variable_bidirectional(variable_input)

value_input = Input(shape=(None, self.value_shape[2]), name="value_input", dtype=self.d_type)
value_lstm = LSTM(units=self.value_shape[1],
dtype=self.d_type,
dropout=value_lstm_dropout_rate,
recurrent_dropout=value_lstm_recurrent_dropout_rate)
recurrent_dropout=0)
value_bidirectional = Bidirectional(layer=value_lstm, name="value_bidirectional")
value_lstm_branch = value_bidirectional(value_input)

Expand Down
2 changes: 1 addition & 1 deletion experiment/model_config_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def model_config_preprocess(df_all: pd.DataFrame, doc_target: bool) -> Dict[str,
model_config = Util.json_load(ML_CONFIG_PATH)
ascii_char_set = ''.join(chr(x) for x in range(0x20, 0x7F))
extra_char_set = "\x1B\t\n\r" # ESC code, tab and line end variations
doc_char_set = " ●개공기께내는님당드등로메밀번보복본비사생서석성슈스시암에용워으의이작정주지체큰키토패할호화" if doc_target else ''
doc_char_set = " ●가개공기께내는님당드등따로메면문밀방번보복본비사생서석성슈스시암에요용워으의이작정주지채체큰키토팅패필하할호화" if doc_target else ''
model_config["char_set"] = extra_char_set + ascii_char_set + doc_char_set

# check whether all extensions from meta are in ml_config.json
Expand Down
10 changes: 5 additions & 5 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@
# float value of ML threshold is used to display possible lowest values
ZERO_ML_THRESHOLD = 0.0

# with option --doc & NEGLIGIBLE_ML_THRESHOLD
SAMPLES_IN_DOC = 934
# with option --doc & ZERO_ML_THRESHOLD
SAMPLES_IN_DOC = 930

# credentials count after scan without filters and ML validations
SAMPLES_REGEX_COUNT = 670
SAMPLES_REGEX_COUNT = 675

# credentials count after scan with filters and without ML validation
SAMPLES_FILTERED_COUNT = 554
SAMPLES_FILTERED_COUNT = 556

# credentials count after default post-processing
SAMPLES_POST_CRED_COUNT = 508
SAMPLES_POST_CRED_COUNT = 510

# archived credentials that are not found without --depth
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 138
Expand Down
Loading