Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Commit fa5cbd52 authored by Dos Santos David's avatar Dos Santos David

rework on timeit decorator

parent 4af00efd
......@@ -26,7 +26,8 @@ def run_index_command(collection, args):
print('... {} documents loaded'.format(count_documents))
_,t = indexer.build_index()
print('... index created in {elapsed_time:.2f} ms'.format(elapsed_time=t))
if args.index_command == 'lookup':
......@@ -88,7 +88,7 @@ class BSBIIndexer:
yield data
@timeit("index built in")
def build_index(self):
# 1/ flush the buffer
......@@ -113,7 +113,6 @@ class BSBIIndexer:
token_id = struct.unpack('i', file.read(4))[0]
return token_id
@timeit("lookup done in")
def token_lookup_with_frequency(self, token):
Returns a list of documents
......@@ -2,6 +2,7 @@ from collections import defaultdict
import math
from gogole.query import Query
from gogole.utils import timeit
class VectorialQuery(Query):
......@@ -9,7 +10,20 @@ class VectorialQuery(Query):
sorted_docs = [x[0] for x in sorted(similarities.items(), key=lambda x: x[1], reverse=True)]
return sorted_docs[:n]
def search(self, query):
def search(self,query):
results,t = self.timed_search(query)
print("Found {count_results} results in {elapsed_time:.2f} ms".format(
for position, doc_id in enumerate(self.find_n_first_elements(results, n=10), start=1):
print('{}: doc id {}\n'.format(position, doc_id))
def timed_search(self, query):
tokens = query.split(' ')
tf_query = defaultdict(int)
......@@ -56,5 +70,4 @@ class VectorialQuery(Query):
similarities[doc_id] = dot_product / (math.sqrt(squared_norm_query) + math.sqrt(squared_norm_doc))
for position, doc_id in enumerate(self.find_n_first_elements(similarities, n=10), start=1):
print('{}: doc id {}'.format(position, doc_id))
return similarities
import time
def timeit(text):
def timeit_decorator(method):
def timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
print('... {name} {time:2.2f} ms\n'.format(name=text,time=(te-ts)*1000))
return result
return timed
return timeit_decorator
def timeit(method):
def timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
elapsed_time = (te-ts)*1000 # ms
return (result, elapsed_time)
return timed
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment