diff --git a/gogole/query/boolean_query.py b/gogole/query/boolean_query.py index ed7565dceb909e2fd90de832570d20d9a39afcb7..12703161d33e135b5bcdfca6293795ff710712a2 100644 --- a/gogole/query/boolean_query.py +++ b/gogole/query/boolean_query.py @@ -3,18 +3,36 @@ from gogole.query import Query class BooleanQuery(Query): OPERATOR_AND = ' and ' OPERATOR_OR = ' or ' - OPERATOR_NOT = ' not ' + OPERATOR_NOT = 'not ' def search_documents(self, query): # Assume the expression # is in the conjunctive normal form + last_doc_id = self.collection.indexer.count_documents-1 + and_queries = query.split(self.OPERATOR_AND) doc_ids_by_conjunction = list() for and_query in and_queries: - doc_ids = set.union(*(self.collection.indexer.token_lookup(token) for token in and_query.split(self.OPERATOR_OR))) - doc_ids_by_conjunction.append(doc_ids) + doc_ids_disjonction = set() + for query_term in and_query.split(self.OPERATOR_OR): + query_term = query_term.strip() + + is_not_query = query_term.startswith(self.OPERATOR_NOT) + + if is_not_query: + query_term = query_term[len(self.OPERATOR_NOT):] + + doc_ids = self.collection.indexer.token_lookup(query_term) + + if is_not_query: + doc_ids_disjonction.update(set(range(last_doc_id)) - doc_ids) + else: + doc_ids_disjonction.update(doc_ids) + + + doc_ids_by_conjunction.append(doc_ids_disjonction) return set.intersection(*doc_ids_by_conjunction)