Source code for credsweeper.credentials.candidate

import copy
import re
from json.encoder import py_encode_basestring_ascii
from typing import Any, Dict, List, Optional

from credsweeper.common.constants import Severity, Confidence
from credsweeper.config.config import Config
from credsweeper.credentials.line_data import LineData


[docs] class Candidate: """Candidates that can be credentials. Class contains list of LineData, some attributes from Rule object, and config Parameters: line_data_list: List of LineData patterns: Regular expressions that can be used for detection rule_name: Name of Rule severity: critical/high/medium/low confidence: strong/moderate/weak config: user configs use_ml: Whether the candidate should be validated with ML. If not - ml_probability is set None """ DUMMY_PATTERN = re.compile(r"^") def __init__(self, line_data_list: List[LineData], patterns: List[re.Pattern], rule_name: str, severity: Severity, config: Optional[Config] = None, use_ml: bool = False, confidence: Confidence = Confidence.MODERATE) -> None: self.line_data_list = line_data_list self.patterns = patterns self.rule_name = rule_name self.severity = severity self.config = config self.use_ml = use_ml self.confidence = confidence # None - ML is not applicable or not processed yet; float - the ml decision above ml_threshold # Note: -1.0 is possible too for some activation functions in ml model, so let avoid negative values self.ml_probability: Optional[float] = None
[docs] def compare(self, other: 'Candidate') -> bool: """Comparison method - checks only result of final cred""" if self.rule_name == other.rule_name \ and self.severity == other.severity \ and self.confidence == other.confidence \ and self.use_ml == other.use_ml \ and self.ml_probability == other.ml_probability \ and len(self.line_data_list) == len(other.line_data_list): for i, j in zip(self.line_data_list, other.line_data_list): if i.compare(j): continue break else: # all line_data are equal return True return False
@staticmethod def _encode(value: Any) -> Any: """Encode value to the base string ascii Args: value: Any type of value to be encoded """ if isinstance(value, str): return py_encode_basestring_ascii(value) return value
[docs] def to_str(self, subtext: bool = False, hashed: bool = False) -> str: """Represent candidate with subtext or|and hashed values""" return f"rule: {self.rule_name}" \ f" | severity: {self.severity.value}" \ f" | confidence: {self.confidence.value}" \ f" | ml_probability: {self.ml_probability}" \ f" | line_data_list: [{', '.join([x.to_str(subtext, hashed) for x in self.line_data_list])}]"
def __str__(self): return self.to_str() def __repr__(self): return self.to_str(subtext=True)
[docs] def to_json(self, hashed: bool, subtext: bool) -> Dict: """Convert credential candidate object to dictionary. Return: Dictionary object generated from current credential candidate """ full_output = { "patterns": [pattern.pattern for pattern in self.patterns], "rule": self.rule_name, "severity": self.severity.value, "confidence": self.confidence.value, "use_ml": self.use_ml, "ml_probability": self.ml_probability, # put the array to end to make json more readable "line_data_list": [line_data.to_json(hashed, subtext) for line_data in self.line_data_list], } if self.config is not None: reported_output = {k: v for k, v in full_output.items() if k in self.config.candidate_output} else: reported_output = full_output return reported_output
[docs] def to_dict_list(self, hashed: bool, subtext: bool) -> List[dict]: """Convert credential candidate object to List[dict]. Return: List[dict] object generated from current credential candidate """ reported_output = [] json_output = self.to_json(hashed, subtext) refined_data = copy.deepcopy(json_output) del refined_data["line_data_list"] for line_data in json_output["line_data_list"]: line_data.update(refined_data) for key in line_data.keys(): line_data[key] = self._encode(line_data[key]) reported_output.append(line_data) return reported_output
[docs] @classmethod def get_dummy_candidate(cls, config: Config, file_path: str, file_type: str, info: str, rule_name: str): """Create dummy instance to use in searching file by extension""" return cls( # line_data_list=[LineData(config, '', -1, 0, file_path, file_type, info, cls.DUMMY_PATTERN)], patterns=[cls.DUMMY_PATTERN], # rule_name=rule_name, # severity=Severity.INFO, # config=config, # confidence=Confidence.WEAK)