Commit 38991e0b authored by Dos Santos David's avatar Dos Santos David

use document norm

parent 208d7d7f
...@@ -51,7 +51,6 @@ class VectorialQuery(Query): ...@@ -51,7 +51,6 @@ class VectorialQuery(Query):
dot_product = 0 dot_product = 0
squared_norm_query = 0 squared_norm_query = 0
squared_norm_doc = 0
for token, token_df in df.items(): for token, token_df in df.items():
doc_weight = 0 doc_weight = 0
...@@ -64,10 +63,11 @@ class VectorialQuery(Query): ...@@ -64,10 +63,11 @@ class VectorialQuery(Query):
query_weight = (1 + math.log10(tf_query[token])) query_weight = (1 + math.log10(tf_query[token]))
squared_norm_query += (query_weight**2) squared_norm_query += (query_weight**2)
squared_norm_doc += (doc_weight**2)
dot_product += doc_weight * query_weight dot_product += doc_weight * query_weight
similarities[doc_id] = dot_product / (math.sqrt(squared_norm_query) + math.sqrt(squared_norm_doc))
norm_doc = self.collection.indexer.document_norms[doc_id]
similarities[doc_id] = dot_product / (norm_doc + math.sqrt(squared_norm_query))
return similarities return similarities
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment