Source code for credsweeper.ml_model.features.word_in_line

from typing import List

import numpy as np

from credsweeper.common.constants import CHUNK_SIZE
from credsweeper.credentials import Candidate
from credsweeper.ml_model.features.word_in import WordIn
from credsweeper.utils import Util


[docs] class WordInLine(WordIn): """Feature is true if line contains at least one word from predefined list.""" def __init__(self, words: List[str]) -> None: """Feature returns array of matching words Args: words: list of predefined words - MUST BE IN LOWER CASE """ super().__init__(words)
[docs] def extract(self, candidate: Candidate) -> np.ndarray: """Returns true if any words in first line""" subtext = Util.subtext(candidate.line_data_list[0].line, candidate.line_data_list[0].value_start, CHUNK_SIZE) if subtext: return self.word_in_str(subtext.lower()) else: return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])