Source code for credsweeper.ml_model.features.word_in_path

import os.path
from pathlib import Path
from typing import List, Any

import numpy as np

from credsweeper.credentials.candidate import Candidate
from credsweeper.ml_model.features.word_in import WordIn



[docs]
class WordInPath(WordIn):
    """Categorical feature that corresponds to words in path (POSIX, lowercase)"""

    def __call__(self, candidates: List[Candidate]) -> np.ndarray:
        # actually there must be one path because the candidates are grouped before
        if file_path := candidates[0].line_data_list[0].path:
            path = Path(file_path)
            # apply ./ for normalised path to detect "/src" for relative path
            posix_lower_path = path.as_posix().lower() if path.is_absolute() else f"./{path.as_posix().lower()}"
            # prevent extra confusion from the same word in extension
            path_without_extension, _ = os.path.splitext(posix_lower_path)
            return self.word_in_(path_without_extension)
        return np.array([self.zero])


[docs]
    def extract(self, candidate: Candidate) -> Any:
        raise NotImplementedError