Edit the condition filter on words length to include 4-letter words in index

This commit is contained in:
Tissevert 2024-03-16 15:45:59 +01:00
parent db5a9dabf9
commit 8eae217d17

View file

@ -23,7 +23,8 @@ def keepOnlyAlphaChars(word):
def index(text):
words = re.split('\s', text)
normalized_words = [keepOnlyAlphaChars(word) for word in words]
important_words = set([w for w in normalized_words if len(w) > WORD_THRESHOLD])
important_words = set([w for w in normalized_words
if len(w) >= WORD_THRESHOLD])
return important_words
def insert(db, row):