credsweeper.file_handler package

Submodules

credsweeper.file_handler.abstract_provider module

class credsweeper.file_handler.abstract_provider.AbstractProvider(paths: Sequence[str | Path | BytesIO | Tuple[str | Path, BytesIO]])[source]

Bases: ABC

Base class for all files provider objects.

abstract get_scannable_files(config: Config) Sequence[DiffContentProvider | TextContentProvider][source]

Get list of file object for analysis based on attribute “paths”.

Parameters:

config – dict of credsweeper configuration

Returns:

file objects to analyse

property paths: Sequence[str | Path | BytesIO | Tuple[str | Path, BytesIO]]

paths getter

credsweeper.file_handler.analysis_target module

class credsweeper.file_handler.analysis_target.AnalysisTarget(line_pos: int, lines: List[str], line_nums: List[int], descriptor: Descriptor, line: str | None = None, offset: int | None = None)[source]

Bases: object

property descriptor: Descriptor

cached value

property file_path: str | None

cached value

property file_type: str | None

cached value

property info: str | None

cached value

property line: str

cached value

property line_len: int

cached value

property line_lower: str

cached value

property line_lower_strip: str

cached value

property line_num: int

cached value

property line_nums: List[int]

cached value

property line_pos: int

cached value

property line_strip: str

cached value

property line_strip_len: int

cached value

property lines: List[str]

cached value

property lines_len: int

cached value

property offset: int | None

cached value

credsweeper.file_handler.byte_content_provider module

class credsweeper.file_handler.byte_content_provider.ByteContentProvider(content: bytes, file_path: str | None = None, file_type: str | None = None, info: str | None = None)[source]

Bases: ContentProvider

Allow to scan byte sequence instead of extra reading a file

property data: bytes | None

data getter for ByteContentProvider

property lines: List[str]

lines getter for ByteContentProvider

yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Return lines to scan.

Parameters:

min_len – minimal line length to scan

Returns:

list of analysis targets based on every row in a content

credsweeper.file_handler.content_provider module

class credsweeper.file_handler.content_provider.ContentProvider(file_path: str | None = None, file_type: str | None = None, info: str | None = None)[source]

Bases: ABC

Base class to provide access to analysis targets for scanned object.

abstract property data: bytes | None

abstract data getter

property descriptor: Descriptor

descriptor getter

property file_path: str

file_path getter

property file_type: str

file_type getter

property info: str

info getter

lines_to_targets(min_len: int, lines: List[str], line_nums: List[int] | None = None) Generator[AnalysisTarget, None, None][source]

Creates list of targets with multiline concatenation

abstract yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Load and preprocess file diff data to scan.

Parameters:

min_len – minimal line length to scan

Returns:

row objects to analysing

credsweeper.file_handler.data_content_provider module

class credsweeper.file_handler.data_content_provider.DataContentProvider(data: bytes, file_path: str | None = None, file_type: str | None = None, info: str | None = None)[source]

Bases: ContentProvider

Dummy raw provider to keep bytes

property data: bytes | None

data getter for DataContentProvider

represent_as_encoded() bool[source]

Encodes data from base64. Stores result in decoded

Returns:

True if the data correctly parsed and verified

represent_as_html(depth: int, recursive_limit_size: int, keywords_required_substrings_check: Callable[[str], bool]) bool[source]

Tries to read data as html

Returns:

True if reading was successful

represent_as_structure() bool[source]

Tries to convert data with many parsers. Stores result to internal structure Return True if some structure found

represent_as_xml() bool[source]

Tries to read data as xml

Returns:

True if reading was successful

yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Return nothing. The class provides only data storage.

Parameters:

min_len – minimal line length to scan

Raises:

NotImplementedError

credsweeper.file_handler.descriptor module

class credsweeper.file_handler.descriptor.Descriptor(path: str, extension: str, info: str)[source]

Bases: object

Descriptor for file - optimize memory consumption

extension: str
info: str
path: str

credsweeper.file_handler.diff_content_provider module

class credsweeper.file_handler.diff_content_provider.DiffContentProvider(file_path: str, change_type: DiffRowType, diff: List[DiffDict])[source]

Bases: ContentProvider

Provide data from a single .patch file.

Parameters:
  • file_path – path to file

  • change_type – set added or deleted file data to scan

  • diff

    list of file row changes, with base elements represented as:

    {
        "old": line number before diff,
        "new": line number after diff,
        "line": line text,
        "hunk": diff hunk number
    }
    

property data: bytes

data getter for DiffContentProvider

parse_lines_data(lines_data: List[DiffRowData]) Tuple[List[int], List[str]][source]

Parse diff lines data.

Return list of line numbers with change type “self.change_type” and list of all lines in file

in original order(replaced all lines not mentioned in diff file with blank line)

Parameters:

lines_data – data of all rows mentioned in diff file

Returns:

tuple of line numbers with change type “self.change_type” and all file lines in original order(replaced all lines not mentioned in diff file with blank line)

yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Preprocess file diff data to scan.

Parameters:

min_len – minimal line length to scan

Returns:

list of analysis targets of every row of file diff corresponding to change type “self.change_type”

credsweeper.file_handler.file_path_extractor module

class credsweeper.file_handler.file_path_extractor.FilePathExtractor[source]

Bases: object

Util class to browse files in directories

static apply_gitignore(detected_files: List[str]) List[str][source]

Apply gitignore rules for each file.

Parameters:

detected_files – list of files to be checked

Returns:

List of files with all files ignored by git removed

static check_exclude_file(config: Config, path: str) bool[source]

Checks whether file should be excluded

Parameters:
  • config – Config

  • path – str - full path preferred

Returns:

True when the file full path should be excluded according config

static check_file_size(config: Config, reference: str | Path | BytesIO | Tuple[str | Path, BytesIO]) bool[source]

Checks whether the file is over the size limit from configuration

Parameters:
  • config – Config

  • reference – various types of a file reference

Returns:

True when the file is oversize

static get_file_paths(config: Config, path: str | Path) List[str][source]

Get all files in the directory. Automatically exclude files non-code or data files (such as .jpg).

Parameters:
  • config – credsweeper configuration

  • path – path to the file or directory to be scanned

Returns:

List all non-excluded files in the directory

static is_find_by_ext_file(config: Config, extension: str) bool[source]

Checks whether file has suspicious extension

Parameters:
  • config – Config

  • extension – str - may be only file name with extension

Returns:

True when the feature is configured and the file extension matches

classmethod is_valid_path(path: str) bool[source]

Locate nearest .git directory to the path and check if path is ignored.

Parameters:

path – path to the file or directory to check

Returns:

False if file is ignored by git. True otherwise

located_repos: Dict[Path, Repo] = {}

credsweeper.file_handler.files_provider module

class credsweeper.file_handler.files_provider.FilesProvider(paths: Sequence[str | Path | BytesIO | Tuple[str | Path, BytesIO]], skip_ignored: bool | None = None)[source]

Bases: AbstractProvider

Provider of plain os files to be analysed.

get_scannable_files(config: Config) Sequence[DiffContentProvider | TextContentProvider][source]

Get list of full text file object for analysis of files with parent paths from “paths”.

Parameters:

config – dict of credsweeper configuration

Returns:

preprocessed file objects for analysis

credsweeper.file_handler.patches_provider module

class credsweeper.file_handler.patches_provider.PatchesProvider(paths: Sequence[str | Path | BytesIO | Tuple[str | Path, BytesIO]], change_type: DiffRowType)[source]

Bases: AbstractProvider

Provide data from a list of .patch files.

get_files_sequence(raw_patches: List[List[str]]) Sequence[DiffContentProvider | TextContentProvider][source]

Returns sequence of files

get_scannable_files(config: Config) Sequence[DiffContentProvider | TextContentProvider][source]

Get files to scan. Output based on the paths field.

Parameters:

config – dict of credsweeper configuration

Returns:

file objects for analysing

load_patch_data(config: Config) List[List[str]][source]

Loads data from patch

credsweeper.file_handler.string_content_provider module

class credsweeper.file_handler.string_content_provider.StringContentProvider(lines: List[str], line_numbers: List[int] | None = None, file_path: str | None = None, file_type: str | None = None, info: str | None = None)[source]

Bases: ContentProvider

Provider performs scan simple text lines

property data: bytes

data getter for StringContentProvider

yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Return lines to scan.

Parameters:

min_len – minimal line length to scan

Returns:

list of analysis targets based on every row in file

credsweeper.file_handler.struct_content_provider module

class credsweeper.file_handler.struct_content_provider.StructContentProvider(struct: Any, file_path: str | None = None, file_type: str | None = None, info: str | None = None)[source]

Bases: ContentProvider

Content provider to keep structured data

property data: bytes

data getter for StructContentProvider

property struct: Any

obj getter

yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Return nothing. The class provides only data storage.

Parameters:

min_len – minimal line length to scan

Raises:

NotImplementedError

credsweeper.file_handler.text_content_provider module

class credsweeper.file_handler.text_content_provider.TextContentProvider(file_path: str | Path | Tuple[str | Path, BytesIO], file_type: str | None = None, info: str | None = None)[source]

Bases: ContentProvider

Provide access to analysis targets for full-text file scanning.

Parameters:

file_path – string, path to file

property data: bytes | None

data getter for TextContentProvider

property lines: List[str] | None

lines getter for TextContentProvider

yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Load and preprocess file content to scan.

Parameters:

min_len – minimal line length to scan

Returns:

list of analysis targets based on every row in file

Module contents

class credsweeper.file_handler.ByteContentProvider(content: bytes, file_path: str | None = None, file_type: str | None = None, info: str | None = None)[source]

Bases: ContentProvider

Allow to scan byte sequence instead of extra reading a file

property data: bytes | None

data getter for ByteContentProvider

property lines: List[str]

lines getter for ByteContentProvider

yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Return lines to scan.

Parameters:

min_len – minimal line length to scan

Returns:

list of analysis targets based on every row in a content

class credsweeper.file_handler.ContentProvider(file_path: str | None = None, file_type: str | None = None, info: str | None = None)[source]

Bases: ABC

Base class to provide access to analysis targets for scanned object.

abstract property data: bytes | None

abstract data getter

property descriptor: Descriptor

descriptor getter

property file_path: str

file_path getter

property file_type: str

file_type getter

property info: str

info getter

lines_to_targets(min_len: int, lines: List[str], line_nums: List[int] | None = None) Generator[AnalysisTarget, None, None][source]

Creates list of targets with multiline concatenation

abstract yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Load and preprocess file diff data to scan.

Parameters:

min_len – minimal line length to scan

Returns:

row objects to analysing

class credsweeper.file_handler.DataContentProvider(data: bytes, file_path: str | None = None, file_type: str | None = None, info: str | None = None)[source]

Bases: ContentProvider

Dummy raw provider to keep bytes

property data: bytes | None

data getter for DataContentProvider

represent_as_encoded() bool[source]

Encodes data from base64. Stores result in decoded

Returns:

True if the data correctly parsed and verified

represent_as_html(depth: int, recursive_limit_size: int, keywords_required_substrings_check: Callable[[str], bool]) bool[source]

Tries to read data as html

Returns:

True if reading was successful

represent_as_structure() bool[source]

Tries to convert data with many parsers. Stores result to internal structure Return True if some structure found

represent_as_xml() bool[source]

Tries to read data as xml

Returns:

True if reading was successful

yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Return nothing. The class provides only data storage.

Parameters:

min_len – minimal line length to scan

Raises:

NotImplementedError

class credsweeper.file_handler.DiffContentProvider(file_path: str, change_type: DiffRowType, diff: List[DiffDict])[source]

Bases: ContentProvider

Provide data from a single .patch file.

Parameters:
  • file_path – path to file

  • change_type – set added or deleted file data to scan

  • diff

    list of file row changes, with base elements represented as:

    {
        "old": line number before diff,
        "new": line number after diff,
        "line": line text,
        "hunk": diff hunk number
    }
    

property data: bytes

data getter for DiffContentProvider

parse_lines_data(lines_data: List[DiffRowData]) Tuple[List[int], List[str]][source]

Parse diff lines data.

Return list of line numbers with change type “self.change_type” and list of all lines in file

in original order(replaced all lines not mentioned in diff file with blank line)

Parameters:

lines_data – data of all rows mentioned in diff file

Returns:

tuple of line numbers with change type “self.change_type” and all file lines in original order(replaced all lines not mentioned in diff file with blank line)

yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Preprocess file diff data to scan.

Parameters:

min_len – minimal line length to scan

Returns:

list of analysis targets of every row of file diff corresponding to change type “self.change_type”

class credsweeper.file_handler.StringContentProvider(lines: List[str], line_numbers: List[int] | None = None, file_path: str | None = None, file_type: str | None = None, info: str | None = None)[source]

Bases: ContentProvider

Provider performs scan simple text lines

property data: bytes

data getter for StringContentProvider

yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Return lines to scan.

Parameters:

min_len – minimal line length to scan

Returns:

list of analysis targets based on every row in file

class credsweeper.file_handler.TextContentProvider(file_path: str | Path | Tuple[str | Path, BytesIO], file_type: str | None = None, info: str | None = None)[source]

Bases: ContentProvider

Provide access to analysis targets for full-text file scanning.

Parameters:

file_path – string, path to file

property data: bytes | None

data getter for TextContentProvider

property lines: List[str] | None

lines getter for TextContentProvider

yield_analysis_target(min_len: int) Generator[AnalysisTarget, None, None][source]

Load and preprocess file content to scan.

Parameters:

min_len – minimal line length to scan

Returns:

list of analysis targets based on every row in file