From 144532201381e0c6822a0dd121a69173919a117c Mon Sep 17 00:00:00 2001
From: Tissevert <tissevert+devel@marvid.fr>
Date: Sat, 3 Feb 2024 11:45:39 +0100
Subject: [PATCH] Normalise the scalar product to get comparable results
 unbiased by the particular length of a given (word) set

---
 memory.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/memory.py b/memory.py
index 3a283b8..88216fc 100644
--- a/memory.py
+++ b/memory.py
@@ -1,5 +1,6 @@
 import csv
 import json
+from math import sqrt
 
 WORD_THRESHOLD = 4
 
@@ -52,7 +53,7 @@ We define a similarity measure on sets which counts the number of elements
 they have in common
 """
 def scalar(a, b):
-    return len(a.intersection(b))
+    return len(a.intersection(b))/sqrt(len(a)*len(b))
 
 def find_best_quote(db, user_input):
     indexed_input = index(user_input)