Source code for credsweeper.ml_model.features.word_in_line
from typing import List
import numpy as np
from credsweeper.common.constants import CHUNK_SIZE
from credsweeper.credentials import Candidate
from credsweeper.ml_model.features.word_in import WordIn
from credsweeper.utils import Util
[docs]
class WordInLine(WordIn):
"""Feature is true if line contains at least one word from predefined list."""
def __init__(self, words: List[str]) -> None:
"""Feature returns array of matching words
Args:
words: list of predefined words - MUST BE IN LOWER CASE
"""
super().__init__(words)
[docs]
def extract(self, candidate: Candidate) -> np.ndarray:
"""Returns true if any words in first line"""
subtext = Util.subtext(candidate.line_data_list[0].line, candidate.line_data_list[0].value_start, CHUNK_SIZE)
if subtext:
return self.word_in_str(subtext.lower())
else:
return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])