Commit b72b5236 authored by Dos Santos David's avatar Dos Santos David

fix ids

parent c4454c7c
......@@ -5,7 +5,7 @@ def run_index_command(collection, args):
indexer = collection.indexer
if args.index_command == 'build':
build_index = False
build_index = args.build_no_cache
if not args.build_no_cache:
# try to load index from the disk
......@@ -18,11 +18,11 @@ def run_index_command(collection, args):
print('... loading the documents...')
count_documents = 0
for doc_id, document in enumerate(parser.find_documents(limit=None)):
for document in parser.find_documents(limit=None):
count_documents += 1
tokens = tokenizer.get_tokens(document)
indexer.add_document_tokens(doc_id, tokens)
indexer.add_document_tokens(document.document_id, tokens)
print('... {} documents loaded'.format(count_documents))
......
......@@ -117,7 +117,7 @@ class BSBIIndexer:
"""
if token not in self.tokens_map:
return []
return set()
token_id = self.tokens_map[token]
......
from gogole.document import StanfordDocument
from os import listdir
import itertools
class StanfordParser:
......@@ -7,16 +8,17 @@ class StanfordParser:
def find_documents(self, limit=None):
counter = 0 # count documents found
counter = itertools.count() # count documents found
for collection_index in range(10):
collection_dir = self.DIRECTORY + "/" + str(collection_index)
for filename in listdir(collection_dir):
with open(collection_dir + "/" + filename, 'r') as f:
current_document_id = str(collection_index) + filename
url = str(collection_index) + filename
current_document_id = next(counter)
current_document = StanfordDocument(current_document_id)
counter += 1
content = ""
for line in f:
......
......@@ -62,7 +62,7 @@ def build_cli_search_parser(root_parser):
search_parser.add_argument('--vectorial', action='store_const', const='vectorial', dest='search_query_type', help="use the vectorial model")
search_parser.add_argument('query', help="your query")
search_parser.add_argument('query', help="your query", nargs='*')
search_parser.set_defaults(search_query_type='boolean')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment