Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
run: |
banner="$(python -m credsweeper --banner | head -1)"
echo "banner = '${banner}'"
if [ "CredSweeper 1.14.2 crc32:76bde097" != "${banner}" ]; then
if [ "CredSweeper 1.14.3 crc32:e5cd2d86" != "${banner}" ]; then
echo "Update the check for '${banner}'"
exit 1
fi
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
"__version__"
]

__version__ = "1.14.2"
__version__ = "1.14.3"
498 changes: 1 addition & 497 deletions credsweeper/__main__.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion credsweeper/deep_scanner/byte_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from typing import List, Optional

from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
from credsweeper.file_handler.data_content_provider import DataContentProvider
from .abstract_scanner import AbstractScanner

logger = logging.getLogger(__name__)

Expand Down
11 changes: 10 additions & 1 deletion credsweeper/deep_scanner/bzip2_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
from abc import ABC
from pathlib import Path
from typing import List, Optional
from typing import List, Optional, Union

from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
Expand All @@ -15,6 +15,15 @@
class Bzip2Scanner(AbstractScanner, ABC):
"""Implements bzip2 scanning"""

@staticmethod
def match(data: Union[bytes, bytearray]) -> bool:
"""According https://en.wikipedia.org/wiki/Bzip2"""
if data.startswith(b"\x42\x5A\x68") and 10 <= len(data) \
and 0x31 <= data[3] <= 0x39 \
and 4 == data.find(b"\x31\x41\x59\x26\x53\x59", 4, 10):
return True
return False

def data_scan(
self, #
data_provider: DataContentProvider, #
Expand Down
9 changes: 8 additions & 1 deletion credsweeper/deep_scanner/deb_scanner.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import struct
from abc import ABC
from typing import List, Optional, Generator, Tuple
from typing import List, Optional, Generator, Tuple, Union

from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
from credsweeper.credentials.candidate import Candidate
Expand All @@ -17,6 +17,13 @@ class DebScanner(AbstractScanner, ABC):

__header_size = 60

@staticmethod
def match(data: Union[bytes, bytearray]) -> bool:
"""According https://en.wikipedia.org/wiki/Deb_(file_format)"""
if data.startswith(b"!<arch>\n"):
return True
return False

@staticmethod
def walk_deb(data: bytes) -> Generator[Tuple[int, str, bytes], None, None]:
"""Processes sequence of DEB archive and yields offset, name and data"""
Expand Down
100 changes: 51 additions & 49 deletions credsweeper/deep_scanner/deep_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,37 @@
from typing import List, Any, Tuple

from credsweeper.config.config import Config
from credsweeper.deep_scanner.byte_scanner import ByteScanner
from credsweeper.deep_scanner.bzip2_scanner import Bzip2Scanner
from credsweeper.deep_scanner.csv_scanner import CsvScanner
from credsweeper.deep_scanner.deb_scanner import DebScanner
from credsweeper.deep_scanner.docx_scanner import DocxScanner
from credsweeper.deep_scanner.eml_scanner import EmlScanner
from credsweeper.deep_scanner.encoder_scanner import EncoderScanner
from credsweeper.deep_scanner.gzip_scanner import GzipScanner
from credsweeper.deep_scanner.html_scanner import HtmlScanner
from credsweeper.deep_scanner.jclass_scanner import JclassScanner
from credsweeper.deep_scanner.jks_scanner import JksScanner
from credsweeper.deep_scanner.lang_scanner import LangScanner
from credsweeper.deep_scanner.lzma_scanner import LzmaScanner
from credsweeper.deep_scanner.mxfile_scanner import MxfileScanner
from credsweeper.deep_scanner.patch_scanner import PatchScanner
from credsweeper.deep_scanner.pdf_scanner import PdfScanner
from credsweeper.deep_scanner.pkcs_scanner import PkcsScanner
from credsweeper.deep_scanner.png_scanner import PngScanner
from credsweeper.deep_scanner.pptx_scanner import PptxScanner
from credsweeper.deep_scanner.rpm_scanner import RpmScanner
from credsweeper.deep_scanner.rtf_scanner import RtfScanner
from credsweeper.deep_scanner.sqlite3_scanner import Sqlite3Scanner
from credsweeper.deep_scanner.strings_scanner import StringsScanner
from credsweeper.deep_scanner.tar_scanner import TarScanner
from credsweeper.deep_scanner.tmx_scanner import TmxScanner
from credsweeper.deep_scanner.xlsx_scanner import XlsxScanner
from credsweeper.deep_scanner.xml_scanner import XmlScanner
from credsweeper.deep_scanner.zip_scanner import ZipScanner
from credsweeper.file_handler.descriptor import Descriptor
from credsweeper.scanner.scanner import Scanner
from credsweeper.utils.util import Util
from .byte_scanner import ByteScanner
from .bzip2_scanner import Bzip2Scanner
from .csv_scanner import CsvScanner
from .deb_scanner import DebScanner
from .docx_scanner import DocxScanner
from .eml_scanner import EmlScanner
from .encoder_scanner import EncoderScanner
from .gzip_scanner import GzipScanner
from .html_scanner import HtmlScanner
from .jclass_scanner import JclassScanner
from .jks_scanner import JksScanner
from .lang_scanner import LangScanner
from .lzma_scanner import LzmaScanner
from .mxfile_scanner import MxfileScanner
from .patch_scanner import PatchScanner
from .pdf_scanner import PdfScanner
from .pkcs_scanner import PkcsScanner
from .png_scanner import PngScanner
from .pptx_scanner import PptxScanner
from .rpm_scanner import RpmScanner
from .rtf_scanner import RtfScanner
from .sqlite3_scanner import Sqlite3Scanner
from .strings_scanner import StringsScanner
from .tar_scanner import TarScanner
from .tmx_scanner import TmxScanner
from .xlsx_scanner import XlsxScanner
from .xml_scanner import XmlScanner
from .zip_scanner import ZipScanner
from ..file_handler.descriptor import Descriptor

logger = logging.getLogger(__name__)

Expand All @@ -49,6 +49,8 @@ class DeepScanner(
JksScanner, #
LangScanner, #
LzmaScanner, #
MxfileScanner, #
EmlScanner, #
PatchScanner, #
PdfScanner, #
PkcsScanner, #
Expand Down Expand Up @@ -89,7 +91,7 @@ def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[
"""Returns possibly scan methods for the data depends on content and fallback scanners"""
deep_scanners: List[Any] = []
fallback_scanners: List[Any] = []
if Util.is_zip(data):
if ZipScanner.match(data):
if 0 < depth:
deep_scanners.append(ZipScanner)
# probably, there might be a docx, xlsx and so on.
Expand All @@ -106,62 +108,62 @@ def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[
deep_scanners.append(PptxScanner)
else:
fallback_scanners.append(PptxScanner)
elif Util.is_com(data):
elif XlsxScanner.match(data):
if ".xls" == descriptor.extension:
deep_scanners.append(XlsxScanner)
else:
fallback_scanners.append(XlsxScanner)
elif Util.is_bzip2(data):
elif Bzip2Scanner.match(data):
if 0 < depth:
deep_scanners.append(Bzip2Scanner)
elif Util.is_lzma(data):
elif LzmaScanner.match(data):
if 0 < depth:
deep_scanners.append(LzmaScanner)
elif Util.is_tar(data):
elif TarScanner.match(data):
if 0 < depth:
deep_scanners.append(TarScanner)
elif Util.is_deb(data):
elif DebScanner.match(data):
if 0 < depth:
deep_scanners.append(DebScanner)
elif Util.is_gzip(data):
elif GzipScanner.match(data):
if 0 < depth:
deep_scanners.append(GzipScanner)
elif Util.is_pdf(data):
elif PdfScanner.match(data):
deep_scanners.append(PdfScanner)
elif Util.is_png(data):
elif PngScanner.match(data):
deep_scanners.append(PngScanner)
elif Util.is_rpm(data):
elif RpmScanner.match(data):
if 0 < depth:
deep_scanners.append(RpmScanner)
elif Util.is_jclass(data):
elif JclassScanner.match(data):
deep_scanners.append(JclassScanner)
elif Util.is_jks(data):
elif JksScanner.match(data):
deep_scanners.append(JksScanner)
elif Util.is_sqlite3(data):
elif Sqlite3Scanner.match(data):
if 0 < depth:
deep_scanners.append(Sqlite3Scanner)
elif Util.is_asn1(data):
elif PkcsScanner.match(data):
deep_scanners.append(PkcsScanner)
elif Util.is_rtf(data):
elif RtfScanner.match(data):
deep_scanners.append(RtfScanner)
fallback_scanners.append(ByteScanner)
elif Util.is_xml(data):
if Util.is_html(data):
elif XmlScanner.match(data):
if HtmlScanner.match(data):
deep_scanners.append(HtmlScanner)
deep_scanners.append(XmlScanner)
fallback_scanners.append(ByteScanner)
elif Util.is_mxfile(data):
elif MxfileScanner.match(data):
deep_scanners.append(MxfileScanner)
deep_scanners.append(XmlScanner)
fallback_scanners.append(ByteScanner)
elif Util.is_tmx(data):
elif TmxScanner.match(data):
deep_scanners.append(TmxScanner)
fallback_scanners.append(XmlScanner)
fallback_scanners.append(ByteScanner)
else:
deep_scanners.append(XmlScanner)
fallback_scanners.append(ByteScanner)
elif Util.is_eml(data):
elif EmlScanner.match(data):
if descriptor.extension in (".eml", ".mht"):
deep_scanners.append(EmlScanner)
else:
Expand Down
12 changes: 11 additions & 1 deletion credsweeper/deep_scanner/eml_scanner.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import email
import logging
from abc import ABC
from typing import List, Optional
from typing import List, Optional, Union

from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
Expand All @@ -15,6 +15,16 @@
class EmlScanner(AbstractScanner, ABC):
"""Implements eml scanning"""

@staticmethod
def match(data: Union[bytes, bytearray]) -> bool:
"""According to https://datatracker.ietf.org/doc/html/rfc822 lookup the fields: Date, From, To or Subject"""
if (b"\nDate:" in data or data.startswith(b"Date:")) \
and (b"\nFrom:" in data or data.startswith(b"From:")) \
and (b"\nTo:" in data or data.startswith(b"To:")) \
and (b"\nSubject:" in data or data.startswith(b"Subject:")):
return True
return False

def data_scan(
self, #
data_provider: DataContentProvider, #
Expand Down
9 changes: 8 additions & 1 deletion credsweeper/deep_scanner/gzip_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
from abc import ABC
from pathlib import Path
from typing import List, Optional
from typing import List, Optional, Union

from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
Expand All @@ -16,6 +16,13 @@
class GzipScanner(AbstractScanner, ABC):
"""Realises gzip scanning"""

@staticmethod
def match(data: Union[bytes, bytearray]) -> bool:
"""According https://www.rfc-editor.org/rfc/rfc1952"""
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x1F\x8B\x08"):
return True
return False

def data_scan(
self, #
data_provider: DataContentProvider, #
Expand Down
16 changes: 15 additions & 1 deletion credsweeper/deep_scanner/html_scanner.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging
from abc import ABC
from typing import List, Optional
from typing import List, Optional, Union

from credsweeper.common.constants import MAX_LINE_LENGTH
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
Expand All @@ -13,6 +14,19 @@
class HtmlScanner(AbstractScanner, ABC):
"""Implements html scanning if possible"""

@staticmethod
def match(data: Union[bytes, bytearray]) -> bool:
"""Used to detect html format. Suppose, invocation of is_xml() was True before."""
for opening_tag, closing_tag in [(b"<html", b"</html>"), (b"<body", b"</body>"), (b"<table", b"</table>"),
(b"<p>", b"</p>"), (b"<span>", b"</span>"), (b"<div>", b"</div>"),
(b"<li>", b"</li>"), (b"<ol>", b"</ol>"), (b"<ul>", b"</ul>"),
(b"<th>", b"</th>"), (b"<tr>", b"</tr>"), (b"<td>", b"</td>")]:
opening_pos = data.find(opening_tag, 0, MAX_LINE_LENGTH)
if 0 <= opening_pos < data.find(closing_tag, opening_pos):
# opening and closing tags were found - suppose it is an HTML
return True
return False

def data_scan(
self, #
data_provider: DataContentProvider, #
Expand Down
9 changes: 8 additions & 1 deletion credsweeper/deep_scanner/jclass_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import struct
from abc import ABC
from typing import List, Optional
from typing import List, Optional, Union

from credsweeper.common.constants import UTF_8
from credsweeper.credentials.candidate import Candidate
Expand All @@ -16,6 +16,13 @@
class JclassScanner(AbstractScanner, ABC):
"""Implements java .class scanning"""

@staticmethod
def match(data: Union[bytes, bytearray]) -> bool:
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - java class"""
if data.startswith(b"\xCA\xFE\xBA\xBE"):
return True
return False

@staticmethod
def u2(stream: io.BytesIO) -> int:
"""Extracts unsigned 16 bit big-endian"""
Expand Down
9 changes: 8 additions & 1 deletion credsweeper/deep_scanner/jks_scanner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from abc import ABC
from typing import List, Optional
from typing import List, Optional, Union

import jks

Expand All @@ -15,6 +15,13 @@
class JksScanner(AbstractScanner, ABC):
"""Implements jks scanning"""

@staticmethod
def match(data: Union[bytes, bytearray]) -> bool:
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - jks"""
if data.startswith(b"\xFE\xED\xFE\xED"):
return True
return False

def data_scan(
self, #
data_provider: DataContentProvider, #
Expand Down
9 changes: 8 additions & 1 deletion credsweeper/deep_scanner/lzma_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import lzma
from abc import ABC
from pathlib import Path
from typing import List, Optional
from typing import List, Optional, Union

from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
Expand All @@ -15,6 +15,13 @@
class LzmaScanner(AbstractScanner, ABC):
"""Implements lzma scanning"""

@staticmethod
def match(data: Union[bytes, bytearray]) -> bool:
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - lzma also xz"""
if data.startswith((b"\xFD7zXZ\x00", b"\x5D\x00\x00")):
return True
return False

def data_scan(
self, #
data_provider: DataContentProvider, #
Expand Down
Loading