Turn indexed words lowercase to gain flexibility (to be restored when we have a real indexation mechanism)
This commit is contained in:
parent
f2caf77510
commit
b9066f0933
1 changed files with 1 additions and 1 deletions
|
@ -22,7 +22,7 @@ def keepOnlyAlphaChars(word):
|
||||||
|
|
||||||
def index(text):
|
def index(text):
|
||||||
words = re.split('\s', text)
|
words = re.split('\s', text)
|
||||||
normalized_words = [keepOnlyAlphaChars(word) for word in words]
|
normalized_words = [keepOnlyAlphaChars(word).lower() for word in words]
|
||||||
important_words = set([w for w in normalized_words
|
important_words = set([w for w in normalized_words
|
||||||
if len(w) >= WORD_LENGTH_THRESHOLD])
|
if len(w) >= WORD_LENGTH_THRESHOLD])
|
||||||
return important_words
|
return important_words
|
||||||
|
|
Loading…
Reference in a new issue