Turn indexed words lowercase to gain flexibility (to be restored when we have a real indexation mechanism)

This commit is contained in:
Tissevert 2024-03-16 15:48:28 +01:00
parent f2caf77510
commit b9066f0933

View file

@ -22,7 +22,7 @@ def keepOnlyAlphaChars(word):
def index(text):
words = re.split('\s', text)
normalized_words = [keepOnlyAlphaChars(word) for word in words]
normalized_words = [keepOnlyAlphaChars(word).lower() for word in words]
important_words = set([w for w in normalized_words
if len(w) >= WORD_LENGTH_THRESHOLD])
return important_words