Source code for credsweeper.deep_scanner.deep_scanner

import logging
from typing import List, Any, Tuple

from credsweeper.config.config import Config
from credsweeper.scanner.scanner import Scanner
from credsweeper.utils.util import Util
from .byte_scanner import ByteScanner
from .bzip2_scanner import Bzip2Scanner
from .csv_scanner import CsvScanner
from .deb_scanner import DebScanner
from .docx_scanner import DocxScanner
from .eml_scanner import EmlScanner
from .encoder_scanner import EncoderScanner
from .gzip_scanner import GzipScanner
from .html_scanner import HtmlScanner
from .jclass_scanner import JclassScanner
from .jks_scanner import JksScanner
from .lang_scanner import LangScanner
from .lzma_scanner import LzmaScanner
from .mxfile_scanner import MxfileScanner
from .patch_scanner import PatchScanner
from .pdf_scanner import PdfScanner
from .pkcs_scanner import PkcsScanner
from .pptx_scanner import PptxScanner
from .rpm_scanner import RpmScanner
from .rtf_scanner import RtfScanner
from .sqlite3_scanner import Sqlite3Scanner
from .strings_scanner import StringsScanner
from .tar_scanner import TarScanner
from .tmx_scanner import TmxScanner
from .xlsx_scanner import XlsxScanner
from .xml_scanner import XmlScanner
from .zip_scanner import ZipScanner
from ..file_handler.descriptor import Descriptor

logger = logging.getLogger(__name__)


[docs] class DeepScanner( ByteScanner, # Bzip2Scanner, # DocxScanner, # CsvScanner, # EncoderScanner, # GzipScanner, # HtmlScanner, # JclassScanner, # JksScanner, # LangScanner, # LzmaScanner, # PatchScanner, # PdfScanner, # PkcsScanner, # PptxScanner, # RtfScanner, # RpmScanner, # Sqlite3Scanner, # StringsScanner, # TarScanner, # DebScanner, # XmlScanner, # XlsxScanner, # ZipScanner ): # yapf: disable """Advanced scanner with recursive exploring of data""" def __init__(self, config: Config, scanner: Scanner) -> None: """Initialize Advanced credential scanner. Args: scanner: CredSweeper scanner object config: dictionary variable, stores analyzer features """ self.__config = config self.__scanner = scanner @property def config(self) -> Config: return self.__config @property def scanner(self) -> Scanner: return self.__scanner
[docs] @staticmethod def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]: """Returns possibly scan methods for the data depends on content and fallback scanners""" deep_scanners: List[Any] = [] fallback_scanners: List[Any] = [] if Util.is_zip(data): if 0 < depth: deep_scanners.append(ZipScanner) # probably, there might be a docx, xlsx and so on. # It might be scanned with text representation in third-party libraries. if descriptor.extension in (".xlsx", ".ods"): deep_scanners.append(XlsxScanner) else: fallback_scanners.append(XlsxScanner) if ".docx" == descriptor.extension: deep_scanners.append(DocxScanner) else: fallback_scanners.append(DocxScanner) if ".pptx" == descriptor.extension: deep_scanners.append(PptxScanner) else: fallback_scanners.append(PptxScanner) elif Util.is_com(data): if ".xls" == descriptor.extension: deep_scanners.append(XlsxScanner) else: fallback_scanners.append(XlsxScanner) elif Util.is_bzip2(data): if 0 < depth: deep_scanners.append(Bzip2Scanner) elif Util.is_lzma(data): if 0 < depth: deep_scanners.append(LzmaScanner) elif Util.is_tar(data): if 0 < depth: deep_scanners.append(TarScanner) elif Util.is_deb(data): if 0 < depth: deep_scanners.append(DebScanner) elif Util.is_gzip(data): if 0 < depth: deep_scanners.append(GzipScanner) elif Util.is_pdf(data): deep_scanners.append(PdfScanner) elif Util.is_rpm(data): if 0 < depth: deep_scanners.append(RpmScanner) elif Util.is_jclass(data): deep_scanners.append(JclassScanner) elif Util.is_jks(data): deep_scanners.append(JksScanner) elif Util.is_sqlite3(data): if 0 < depth: deep_scanners.append(Sqlite3Scanner) elif Util.is_asn1(data): deep_scanners.append(PkcsScanner) elif Util.is_rtf(data): deep_scanners.append(RtfScanner) fallback_scanners.append(ByteScanner) elif Util.is_xml(data): if Util.is_html(data): deep_scanners.append(HtmlScanner) deep_scanners.append(XmlScanner) fallback_scanners.append(ByteScanner) elif Util.is_mxfile(data): deep_scanners.append(MxfileScanner) deep_scanners.append(XmlScanner) fallback_scanners.append(ByteScanner) elif Util.is_tmx(data): deep_scanners.append(TmxScanner) fallback_scanners.append(XmlScanner) fallback_scanners.append(ByteScanner) else: deep_scanners.append(XmlScanner) fallback_scanners.append(ByteScanner) elif Util.is_eml(data): if descriptor.extension in (".eml", ".mht"): deep_scanners.append(EmlScanner) else: if 0 < depth: # a formal patch looks like an eml deep_scanners.append(PatchScanner) fallback_scanners.append(EmlScanner) fallback_scanners.append(ByteScanner) elif not Util.is_binary(data): # keep ByteScanner first to apply real value position if possible deep_scanners.append(ByteScanner) if 0 < depth: deep_scanners.append(PatchScanner) deep_scanners.append(EncoderScanner) deep_scanners.append(LangScanner) deep_scanners.append(CsvScanner) else: if 0 < depth: deep_scanners.append(StringsScanner) else: logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor, repr(data[:32]), len(data)) return deep_scanners, fallback_scanners