Source code for credsweeper.ml_model.features.morpheme_dense

from credsweeper.common import static_keyword_checklist
from credsweeper.credentials.candidate import Candidate
from credsweeper.ml_model.features.feature import Feature


[docs] class MorphemeDense(Feature): """Feature calculates morphemes density for a value"""
[docs] def extract(self, candidate: Candidate) -> float: density = 0.0 if value := candidate.line_data_list[0].value.lower(): morphemes_length = 0 for morpheme in static_keyword_checklist.morpheme_set: morpheme_pos = value.find(morpheme) if 0 <= morpheme_pos: morpheme_len = len(morpheme) while 0 <= morpheme_pos: morphemes_length += morpheme_len morpheme_pos += morpheme_len morpheme_pos = value.find(morpheme, morpheme_pos) # normalization: minimal morpheme length is 3 density = morphemes_length / len(value) if 1.0 < density: # overlap morpheme case density = 1.0 return density