Source code for credsweeper.utils.pem_key_detector

import contextlib
import logging
import re
import string
from typing import List

from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN, Chars
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.utils import Util
from credsweeper.utils.entropy_validator import EntropyValidator

logger = logging.getLogger(__name__)


[docs] class PemKeyDetector: """Class to detect PEM PRIVATE keys only""" base64set = set(string.ascii_uppercase) | set(string.ascii_lowercase) | set(string.digits) | {'+', '/', '='} ignore_starts = [PEM_BEGIN_PATTERN, "Proc-Type", "Version", "DEK-Info"] wrap_characters = "\\'\";,[]#*!" remove_characters = string.whitespace + wrap_characters # last line contains 4 symbols, at least re_pem_begin = re.compile(r"(?P<value>" + PEM_BEGIN_PATTERN + r"\s(?!ENCRYPTED)[^-]*PRIVATE[^-]*KEY[^-]*-----" r"(.+" + PEM_END_PATTERN + r"[^-]+KEY[^-]*-----)?)") re_value_pem = re.compile(r"(?P<value>([^-]*" + PEM_END_PATTERN + r"[^-]+-----)|(([a-zA-Z0-9/+=]{64}.*)?[a-zA-Z0-9/+=]{4})+)")
[docs] @classmethod def detect_pem_key(cls, config: Config, target: AnalysisTarget) -> List[LineData]: """Detects PEM key in single line and with iterative for next lines according https://www.rfc-editor.org/rfc/rfc7468 Args: config: Config target: Analysis target Return: List of LineData with found PEM """ line_data: List[LineData] = [] key_data = "" # get line with -----BEGIN which may contain full key first_line = LineData(config, target.line, target.line_pos, target.line_num, target.file_path, target.file_type, target.info, cls.re_pem_begin) line_data.append(first_line) # protection check for case when first line starts from 0 start_pos = target.line_pos if 0 <= target.line_pos else 0 finish_pos = min(start_pos + 200, target.lines_len) begin_pattern_not_passed = True for line_pos in range(start_pos, finish_pos): line = target.lines[line_pos] if target.line_pos != line_pos: _line = LineData(config, line, line_pos, target.line_nums[line_pos], target.file_path, target.file_type, target.info, cls.re_value_pem) line_data.append(_line) # replace escaped line ends with real and process them - PEM does not contain '\' sign while "\\\\" in line: line = line.replace("\\\\", "\\") sublines = line.replace("\\r", '\n').replace("\\n", '\n').splitlines() for subline in sublines: if begin_pattern_not_passed or cls.is_leading_config_line(subline): if PEM_BEGIN_PATTERN in subline: begin_pattern_not_passed = False continue elif PEM_END_PATTERN in subline: if "PGP" in target.line_strip: # Check if entropy is high enough for base64 set with padding sign entropy_validator = EntropyValidator(key_data, Chars.BASE64_CHARS) if entropy_validator.valid: return line_data logger.debug("Filtered with entropy %f '%s'", entropy_validator.entropy, key_data) if "OPENSSH" in target.line_strip: # Check whether the key is encrypted with contextlib.suppress(Exception): decoded = Util.decode_base64(key_data, urlsafe_detect=True) if 32 < len(decoded) and b"bcrypt" not in decoded: # 256 bits is the minimal size of Ed25519 keys # all OK - the key is not encrypted in this top level return line_data logger.debug("Filtered with size or bcrypt '%s'", key_data) else: with contextlib.suppress(Exception): decoded = Util.decode_base64(key_data, urlsafe_detect=True) if Util.is_asn1(decoded): # all OK - the key is not encrypted in this top level return line_data logger.debug("Filtered with non asn1 '%s'", key_data) return [] else: sanitized_line = cls.sanitize_line(subline) # PEM key line should not contain spaces or . (and especially not ...) for i in sanitized_line: if i not in cls.base64set: return [] key_data += sanitized_line return []
[docs] @classmethod def sanitize_line(cls, line: str, recurse_level: int = 5) -> str: """Remove common symbols that can surround PEM keys inside code. Examples:: `# ZZAWarrA1` `* ZZAWarrA1` ` "ZZAWarrA1\\n" + ` Args: line: Line to be cleaned recurse_level: to avoid infinite loop in case when removed symbol inside base64 encoded Return: line with special characters removed from both ends """ recurse_level -= 1 if 0 > recurse_level: return line # Note that this strip would remove `\n` but not `\\n` line = line.strip(string.whitespace) if line.startswith("//"): # simplify first condition for speed-up of doxygen style processing if line.startswith("// ") or line.startswith("/// "): # Assume that the commented line is to be separated from base64 code, it may be a part of PEM, otherwise line = line[3:] if line.startswith("/*"): line = line[2:] if line.endswith("*/"): line = line[:-2] if line.endswith("\\"): # line carry in many languages line = line[:-1] # remove concatenation carefully only when it is not part of base64 if line.startswith('+') and 1 < len(line) and line[1] not in cls.base64set: line = line[1:] if line.endswith('+') and 2 < len(line) and line[-2] not in cls.base64set: line = line[:-1] line = line.strip(cls.remove_characters) # check whether new iteration requires for x in string.whitespace: if line.startswith(x) or line.endswith(x): return cls.sanitize_line(line, recurse_level) for x in cls.wrap_characters: if x in line: return cls.sanitize_line(line, recurse_level) return line
[docs] @classmethod def is_leading_config_line(cls, line: str) -> bool: """Remove non-key lines from the beginning of a list. Example lines with non-key leading lines: .. code-block:: text Proc-Type: 4,ENCRYPTED DEK-Info: DEK-Info: AES-256-CBC,2AA219GG746F88F6DDA0D852A0FD3211 ZZAWarrA1... Args: line: Line to be checked Return: True if the line is not a part of encoded data but leading config """ if 0 == len(line): return True for ignore_string in cls.ignore_starts: if ignore_string in line: return True return False