Source code for credsweeper.deep_scanner.deep_scanner

import logging
from typing import List, Any, Tuple

from credsweeper.config.config import Config
from credsweeper.deep_scanner.byte_scanner import ByteScanner
from credsweeper.deep_scanner.bzip2_scanner import Bzip2Scanner
from credsweeper.deep_scanner.csv_scanner import CsvScanner
from credsweeper.deep_scanner.deb_scanner import DebScanner
from credsweeper.deep_scanner.docx_scanner import DocxScanner
from credsweeper.deep_scanner.eml_scanner import EmlScanner
from credsweeper.deep_scanner.encoder_scanner import EncoderScanner
from credsweeper.deep_scanner.gzip_scanner import GzipScanner
from credsweeper.deep_scanner.html_scanner import HtmlScanner
from credsweeper.deep_scanner.jclass_scanner import JclassScanner
from credsweeper.deep_scanner.jks_scanner import JksScanner
from credsweeper.deep_scanner.lang_scanner import LangScanner
from credsweeper.deep_scanner.lzma_scanner import LzmaScanner
from credsweeper.deep_scanner.mxfile_scanner import MxfileScanner
from credsweeper.deep_scanner.patch_scanner import PatchScanner
from credsweeper.deep_scanner.pdf_scanner import PdfScanner
from credsweeper.deep_scanner.pkcs_scanner import PkcsScanner
from credsweeper.deep_scanner.png_scanner import PngScanner
from credsweeper.deep_scanner.pptx_scanner import PptxScanner
from credsweeper.deep_scanner.rpm_scanner import RpmScanner
from credsweeper.deep_scanner.rtf_scanner import RtfScanner
from credsweeper.deep_scanner.sqlite3_scanner import Sqlite3Scanner
from credsweeper.deep_scanner.strings_scanner import StringsScanner
from credsweeper.deep_scanner.tar_scanner import TarScanner
from credsweeper.deep_scanner.tmx_scanner import TmxScanner
from credsweeper.deep_scanner.xlsx_scanner import XlsxScanner
from credsweeper.deep_scanner.xml_scanner import XmlScanner
from credsweeper.deep_scanner.zip_scanner import ZipScanner
from credsweeper.deep_scanner.zlib_scanner import ZlibScanner
from credsweeper.file_handler.descriptor import Descriptor
from credsweeper.scanner.scanner import Scanner
from credsweeper.utils.util import Util

logger = logging.getLogger(__name__)


[docs] class DeepScanner( ByteScanner, # Bzip2Scanner, # DocxScanner, # CsvScanner, # EncoderScanner, # GzipScanner, # HtmlScanner, # JclassScanner, # JksScanner, # LangScanner, # LzmaScanner, # MxfileScanner, # EmlScanner, # PatchScanner, # PdfScanner, # PkcsScanner, # PngScanner, # PptxScanner, # RtfScanner, # RpmScanner, # Sqlite3Scanner, # StringsScanner, # TarScanner, # DebScanner, # XmlScanner, # XlsxScanner, # ZipScanner, # ZlibScanner, # ): # yapf: disable """Advanced scanner with recursive exploring of data""" def __init__(self, config: Config, scanner: Scanner) -> None: """Initialize Advanced credential scanner. Args: scanner: CredSweeper scanner object config: dictionary variable, stores analyzer features """ self.__config = config self.__scanner = scanner @property def config(self) -> Config: return self.__config @property def scanner(self) -> Scanner: return self.__scanner
[docs] @staticmethod def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]: """Returns possibly scan methods for the data depends on content and fallback scanners""" deep_scanners: List[Any] = [] fallback_scanners: List[Any] = [] if ZipScanner.match(data): if 0 < depth: deep_scanners.append(ZipScanner) # probably, there might be a docx, xlsx and so on. # It might be scanned with text representation in third-party libraries. if descriptor.extension in (".xlsx", ".ods"): deep_scanners.append(XlsxScanner) else: fallback_scanners.append(XlsxScanner) if ".docx" == descriptor.extension: deep_scanners.append(DocxScanner) else: fallback_scanners.append(DocxScanner) if ".pptx" == descriptor.extension: deep_scanners.append(PptxScanner) else: fallback_scanners.append(PptxScanner) elif XlsxScanner.match(data): if ".xls" == descriptor.extension: deep_scanners.append(XlsxScanner) else: fallback_scanners.append(XlsxScanner) elif Bzip2Scanner.match(data): if 0 < depth: deep_scanners.append(Bzip2Scanner) elif LzmaScanner.match(data): if 0 < depth: deep_scanners.append(LzmaScanner) elif TarScanner.match(data): if 0 < depth: deep_scanners.append(TarScanner) elif DebScanner.match(data): if 0 < depth: deep_scanners.append(DebScanner) elif GzipScanner.match(data): if 0 < depth: deep_scanners.append(GzipScanner) elif PdfScanner.match(data): deep_scanners.append(PdfScanner) elif PngScanner.match(data): deep_scanners.append(PngScanner) elif RpmScanner.match(data): if 0 < depth: deep_scanners.append(RpmScanner) elif JclassScanner.match(data): deep_scanners.append(JclassScanner) elif JksScanner.match(data): deep_scanners.append(JksScanner) elif Sqlite3Scanner.match(data): if 0 < depth: deep_scanners.append(Sqlite3Scanner) elif PkcsScanner.match(data): deep_scanners.append(PkcsScanner) elif RtfScanner.match(data): deep_scanners.append(RtfScanner) fallback_scanners.append(ByteScanner) elif XmlScanner.match(data): if HtmlScanner.match(data): deep_scanners.append(HtmlScanner) deep_scanners.append(XmlScanner) fallback_scanners.append(ByteScanner) elif MxfileScanner.match(data): deep_scanners.append(MxfileScanner) deep_scanners.append(XmlScanner) fallback_scanners.append(ByteScanner) elif TmxScanner.match(data): deep_scanners.append(TmxScanner) fallback_scanners.append(XmlScanner) fallback_scanners.append(ByteScanner) else: deep_scanners.append(XmlScanner) fallback_scanners.append(ByteScanner) elif EmlScanner.match(data): if descriptor.extension in (".eml", ".mht"): deep_scanners.append(EmlScanner) else: if 0 < depth: # a formal patch looks like an eml deep_scanners.append(PatchScanner) fallback_scanners.append(EmlScanner) fallback_scanners.append(ByteScanner) elif not Util.is_binary(data): # keep ByteScanner first to apply real value position if possible deep_scanners.append(ByteScanner) if 0 < depth: deep_scanners.append(PatchScanner) deep_scanners.append(LangScanner) if CsvScanner.match(data): deep_scanners.append(CsvScanner) if EncoderScanner.match(data): deep_scanners.append(EncoderScanner) if ZlibScanner.match(data): deep_scanners.append(ZlibScanner) else: if 0 < depth: if ZlibScanner.match(data): deep_scanners.append(ZlibScanner) fallback_scanners.append(StringsScanner) else: deep_scanners.append(StringsScanner) else: logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor, repr(data[:32]), len(data)) return deep_scanners, fallback_scanners