Commit 283f342f authored by Dos Santos David's avatar Dos Santos David

handle not operator in boolean query

parent c92a6e24
...@@ -3,18 +3,36 @@ from gogole.query import Query ...@@ -3,18 +3,36 @@ from gogole.query import Query
class BooleanQuery(Query): class BooleanQuery(Query):
OPERATOR_AND = ' and ' OPERATOR_AND = ' and '
OPERATOR_OR = ' or ' OPERATOR_OR = ' or '
OPERATOR_NOT = ' not ' OPERATOR_NOT = 'not '
def search_documents(self, query): def search_documents(self, query):
# Assume the expression # Assume the expression
# is in the conjunctive normal form # is in the conjunctive normal form
last_doc_id = self.collection.indexer.count_documents-1
and_queries = query.split(self.OPERATOR_AND) and_queries = query.split(self.OPERATOR_AND)
doc_ids_by_conjunction = list() doc_ids_by_conjunction = list()
for and_query in and_queries: for and_query in and_queries:
doc_ids = set.union(*(self.collection.indexer.token_lookup(token) for token in and_query.split(self.OPERATOR_OR))) doc_ids_disjonction = set()
doc_ids_by_conjunction.append(doc_ids) for query_term in and_query.split(self.OPERATOR_OR):
query_term = query_term.strip()
is_not_query = query_term.startswith(self.OPERATOR_NOT)
if is_not_query:
query_term = query_term[len(self.OPERATOR_NOT):]
doc_ids = self.collection.indexer.token_lookup(query_term)
if is_not_query:
doc_ids_disjonction.update(set(range(last_doc_id)) - doc_ids)
else:
doc_ids_disjonction.update(doc_ids)
doc_ids_by_conjunction.append(doc_ids_disjonction)
return set.intersection(*doc_ids_by_conjunction) return set.intersection(*doc_ids_by_conjunction)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment