Edit the condition filter on words length to include 4-letter words in index
This commit is contained in:
parent
db5a9dabf9
commit
8eae217d17
1 changed files with 2 additions and 1 deletions
|
@ -23,7 +23,8 @@ def keepOnlyAlphaChars(word):
|
||||||
def index(text):
|
def index(text):
|
||||||
words = re.split('\s', text)
|
words = re.split('\s', text)
|
||||||
normalized_words = [keepOnlyAlphaChars(word) for word in words]
|
normalized_words = [keepOnlyAlphaChars(word) for word in words]
|
||||||
important_words = set([w for w in normalized_words if len(w) > WORD_THRESHOLD])
|
important_words = set([w for w in normalized_words
|
||||||
|
if len(w) >= WORD_THRESHOLD])
|
||||||
return important_words
|
return important_words
|
||||||
|
|
||||||
def insert(db, row):
|
def insert(db, row):
|
||||||
|
|
Loading…
Reference in a new issue