import csv import json WORD_THRESHOLD = 4 def build_db(inputCSV, outputJSON): db = [] with open(inputCSV, 'r') as file: csv_reader = csv.reader(file, delimiter=',') data = False for row in csv_reader: if data: db.append(row + (index(row[1]),)) else: data = True with open(outputJSON, 'w') as file: json.dump(serialize(db), file) return db def serialize(db): return list(map(lambda row: (row[0], row[1], list(row[2])), db)) def unserialize(db): return list(map(lambda row: (row[0], row[1], set(row[2])), db)) def open_db(filePath): with open(filePath, 'r') as file: return unserialize(json.load(file)) def scalar(a, b): return len(a.intersection(b)) def find_best_quote(db, indexed_input) max_score = None for entry in db: def index(text): words = map(lambda w: ''.join([c for c in w if c.isalpha()]), text.split(' ')) important_words = set([w for w in words if len(w) > WORD_THRESHOLD]) return important_words