Commit 160e0ef2 authored by Dos Santos David's avatar Dos Santos David

fix potential bugs

parent fdfa406a
......@@ -5,7 +5,7 @@ from gogole.parser import QRelsParser
def run(collection, args):
# Runs the CACM Parser on the queries file with the same structure
cacm_parser = CACMParser("data/query.text")
nrequests = int(args.nrequests[0])
......
......@@ -127,7 +127,7 @@ class BSBIIndexer(Indexer):
for token, frequency in counted_tokens.items():
token_id = self.token_to_token_id[token]
norm += (1+math.log10(frequency))**2 * (math.log10(N/self.token_id_to_df[token_id]))**2
norm += ((1+math.log10(frequency)) * (math.log10(N/self.token_id_to_df[token_id]))) ** 2
return math.sqrt(norm)
......@@ -193,7 +193,8 @@ class BSBIIndexer(Indexer):
def save_document_metadata(self):
with open(self.DOCUMENT_METADATA_FILE, 'wb') as f:
for _, counted_tokens in self.index.items():
for doc_id in sorted(self.index.keys()):
counted_tokens = self.index[doc_id]
norm = self.compute_document_norm(counted_tokens)
_, max_frequency = counted_tokens.most_common(1)[0]
b = bytearray()
......@@ -227,6 +228,7 @@ class BSBIIndexer(Indexer):
:param token: token to search in documents
"""
token = token.lower()
document_ids = dict()
if token not in self.token_to_token_id:
......@@ -284,12 +286,13 @@ class BSBIIndexer(Indexer):
self.token_to_token_id = pickle.load(f)
self.status = self.INDEX_STATUS_CREATED
self.init_token_id_seq(max(self.token_to_token_id.keys()))
return True
except FileNotFoundError:
return False
self.init_token_id_seq(max(self.token_to_token_id.keys()))
def get_index_size(self):
return os.stat(self.INVERTED_INDEX_FILE).st_size
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment