Commit 78e8f3b4 authored by Prot Alexandre's avatar Prot Alexandre

adding details flag to cacm searches to show articles

parent cdb05e19
......@@ -16,5 +16,6 @@ def run(collection, args):
weight=args.weight_type
))
display_details = args.display_details == "details"
query_browser.search(q)
query_browser.search(q, display_details)
......@@ -70,4 +70,7 @@ class CACMParser:
def parse_all(self, limit=None):
return {doc.document_id: doc for doc in self.find_documents(limit)}
# Naive way because the cacm collection is very small
def retrieve_documents_by_id(self, doc_ids, limit=None):
return [doc for doc in self.find_documents(limit) if doc.document_id in doc_ids]
......@@ -37,11 +37,18 @@ class BooleanQuery(Query):
return set.intersection(*doc_ids_by_conjunction)
def search(self, query):
def search(self, query, display_details):
"""
Parse a boolean query
and return a list of documents relevant for this query
"""
doc_ids = self.search_documents(query.lower())
if self.collection.NAME == "cacm" and display_details:
docs = self.collection.parser.retrieve_documents_by_id(doc_ids)
for doc in docs:
print("\n\n{:*^50}\n".format(" {} ".format(doc.title)))
print("Content: \t{}".format(doc.abstract))
return None
print("Document ids : {}".format(", ".join(str(x) for x in doc_ids)))
......@@ -20,13 +20,24 @@ class VectorialQuery(Query):
sorted_docs = [x[0] for x in sorted(similarities.items(), key=lambda x: x[1], reverse=True)]
return sorted_docs[:n]
def search(self,query):
def search(self, query, display_details):
results,t = self.timed_search(query)
print("Found {count_results} results in {elapsed_time:.2f} ms".format(
elapsed_time=t,
count_results=len(results)
))
doc_ids = [doc_id for position, doc_id in enumerate(self.find_n_first_elements(results, n=10), start=1)]
if self.collection.NAME == "cacm" and display_details:
docs = self.collection.parser.retrieve_documents_by_id(doc_ids)
for doc in docs:
print("\n\n{:*^50}\n".format(" {} ".format(doc.title)))
print("Similarity measure: \t{}".format(results[doc.document_id]))
print("Content: \t\t{}".format(doc.abstract))
return None
for position, doc_id in enumerate(self.find_n_first_elements(results, n=10), start=1):
print('{} [cos: {}]: doc id {}\n'.format(position, results[doc_id], doc_id))
......
......@@ -74,6 +74,7 @@ def build_cli_search_parser(root_parser):
search_parser = root_parser.add_parser('search', description='search for documents')
search_parser.add_argument('-b', '--boolean', action='store_const', const='boolean', dest='search_query_type', help="use the booolean model")
search_parser.add_argument('-v', '--vectorial', action='store_const', const='vectorial', dest='search_query_type', help="use the vectorial model")
search_parser.add_argument('-d', '--detail', action='store_const', const='details', dest='display_details', help="display details about the articles")
search_parser.add_argument('--tf-idf', action='store_const', const='tf-idf', dest='weight_type', help="use the tf-idf weight type")
search_parser.add_argument('--norm-tf-idf', action='store_const', const='norm-tf-idf', dest='weight_type', help="use the normalized tf-idf weight type")
search_parser.add_argument('--norm-freq', action='store_const', const='norm-freq', dest='weight_type', help="use the normalized frequency weight type")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment