from typing import List, Any
import numpy as np
from credsweeper.credentials.candidate import Candidate
from credsweeper.ml_model.features.word_in import WordIn
[docs]
class FileExtension(WordIn):
"""Categorical feature of file type.
Parameters:
extensions: extension labels
"""
def __init__(self, extensions: List[str]) -> None:
super().__init__(words=extensions)
def __call__(self, candidates: List[Candidate]) -> np.ndarray:
extension_set = set(candidate.line_data_list[0].file_type.lower() for candidate in candidates)
return self.word_in_(extension_set)