Turn indexed words lowercase to gain flexibility (to be restored when we have a real indexation mechanism)
This commit is contained in:
parent
f2caf77510
commit
b9066f0933
1 changed files with 1 additions and 1 deletions
|
@ -22,7 +22,7 @@ def keepOnlyAlphaChars(word):
|
|||
|
||||
def index(text):
|
||||
words = re.split('\s', text)
|
||||
normalized_words = [keepOnlyAlphaChars(word) for word in words]
|
||||
normalized_words = [keepOnlyAlphaChars(word).lower() for word in words]
|
||||
important_words = set([w for w in normalized_words
|
||||
if len(w) >= WORD_LENGTH_THRESHOLD])
|
||||
return important_words
|
||||
|
|
Loading…
Reference in a new issue