Commit ee070a38 authored by Dos Santos David's avatar Dos Santos David

count documents in indexer

parent cc3cd962
......@@ -25,6 +25,8 @@ class BSBIIndexer:
self.tmp_filenames = []
self.tmp_file_id_seq = itertools.count()
self.count_documents = 0
self.INDEX_FILE = '.cache/{}_index'.format(collection_name)
self.TOKENS_MAP_FILE = '.cache/{}_tokens_map'.format(collection_name)
self.DOCUMENTS_MAP_FILE = '.cache/{}_documents_map'.format(collection_name)
......@@ -68,6 +70,7 @@ class BSBIIndexer:
# convert tokens to token ids
token_ids = set()
self.count_documents += 1
for token, frequency in counted_tokens.items():
token_id = self.find_or_create_token_id(token)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment