Source code for credsweeper.common.keyword_checklist

from functools import cached_property
from typing import Set, List

from credsweeper.app import APP_PATH


[docs] class KeywordChecklist: """KeywordsChecklist contains words 3 or more letters length""" __keyword_set: Set[str] __morpheme_set: Set[str] KEYWORD_PATH = APP_PATH / "common" / "keyword_checklist.txt" MORPHEME_PATH = APP_PATH / "common" / "morpheme_checklist.txt" def __init__(self) -> None: # used suggested text read style. split() is preferred because it strips 0x0A on end the file self.__keyword_list = self.KEYWORD_PATH.read_text().split() self.__keyword_list.sort(key=str.__len__, reverse=True) self.__keyword_set = set(self.KEYWORD_PATH.read_text().split()) # The list of morphemes can be combined to form words. # The value is considered a variable if at least two exist. self.__morpheme_set = set(self.MORPHEME_PATH.read_text().split()) @cached_property def keyword_set(self) -> Set[str]: """Get set with keywords""" return self.__keyword_set @cached_property def keyword_list(self) -> List[str]: """Get list with keywords in descended order of length""" return self.__keyword_list @cached_property def keyword_len(self) -> int: """Length of keyword_set""" return len(self.__keyword_set) @cached_property def morpheme_set(self) -> Set[str]: """Get extended set with keywords. Return: Extended set of strings """ return self.__morpheme_set @cached_property def morpheme_len(self) -> int: """Length of morpheme_set""" return len(self.__morpheme_set)
[docs] def check_morphemes(self, line_lower: str, threshold: int) -> bool: """Checks limit of morphemes limit in line. Args: line_lower: input line - MUST be in lower threshold: number of minimal morphemes Return: True - if number of morphemes exceeds the threshold """ matches = 0 for keyword in self.morpheme_set: if keyword in line_lower: matches += 1 if threshold < matches: return True return False