Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

main.py 5.49 KB
Newer Older
Dos Santos David's avatar
Dos Santos David committed
1
import argparse
2 3
import sys
import traceback
Dos Santos David's avatar
Dos Santos David committed
4

Dos Santos David's avatar
Dos Santos David committed
5 6
from gogole import commands
from gogole.config import COLLECTIONS
Dos Santos David's avatar
Dos Santos David committed
7

Dos Santos David's avatar
Dos Santos David committed
8
from gogole.tokenizer.simple_tokenizer import SimpleTokenizer
Dos Santos David's avatar
Dos Santos David committed
9
from gogole.tokenizer.no_tokenizer import NoTokenizer
Dos Santos David's avatar
Dos Santos David committed
10
from gogole.indexer import BSBIIndexer
Dos Santos David's avatar
Dos Santos David committed
11

Dos Santos David's avatar
Dos Santos David committed
12

Dos Santos David's avatar
Dos Santos David committed
13 14 15 16 17 18 19 20 21
WELCOME_MESSAGE="""
*************************
   Welcome to gogole !
*************************

You don't know where to start ? Let me help you !

* analyze all           : I'll do some analyzes on the collection you gave me

22
* index stats           : I'll show you some statistics on the index
Dos Santos David's avatar
Dos Santos David committed
23 24 25
* index build           : I'll build the reversed index for you
* index lookup <token>  : I'll tell you in where documents your token is

Dos Santos David's avatar
Dos Santos David committed
26
* search <query>                  : I'll show you the documents you need
27
  search -b,--boolean <query>          Your query must be in the Conjunctive normal form
Dos Santos David's avatar
Dos Santos David committed
28
                                    Like "a OR b OR c AND d" is "(a OR b OR c) AND d"
29
* search -v,--vectorial <query>   : Search using the vectorial model
Dos Santos David's avatar
Dos Santos David committed
30 31 32 33

* eval <n> : evaluate performances and limit evaluation to the n first requests

* quit
Dos Santos David's avatar
Dos Santos David committed
34 35 36
"""


Dos Santos David's avatar
Dos Santos David committed
37 38
def build_cli_analyze_parser(root_parser):
    args_parser_analyze = root_parser.add_parser(
Dos Santos David's avatar
Dos Santos David committed
39 40 41 42
        'analyze',
        description="Run an analyze like tokens count or find heap's law parameters",
        help="additional help for analyze",
    )
Dos Santos David's avatar
Dos Santos David committed
43 44

    args_parser_analyze.add_argument("analyze_command",
Dos Santos David's avatar
Dos Santos David committed
45
        nargs='*',
Dos Santos David's avatar
Dos Santos David committed
46 47 48
        choices=commands.analyze_command.COMMANDS,
        metavar="command",
        help="can be any of {}".format(", ".join(commands.analyze_command.COMMANDS))
Dos Santos David's avatar
Dos Santos David committed
49
    )
Dos Santos David's avatar
Dos Santos David committed
50

51 52 53 54 55 56 57 58 59
def build_cli_eval_parser(root_parser):
    eval_parser = root_parser.add_parser('eval', description='evaluate for documents')
    eval_parser.add_argument('nrequests', nargs=1)
    eval_parser.add_argument('--tf-idf', action='store_const', const='tf-idf', dest='weight_type', help="use the tf-idf weight type")
    eval_parser.add_argument('--norm-tf-idf', action='store_const', const='norm-tf-idf', dest='weight_type', help="use the normalized tf-idf weight type")
    eval_parser.add_argument('--norm-freq', action='store_const', const='norm-freq', dest='weight_type', help="use the normalized frequency weight type")

    eval_parser.set_defaults(weight_type='tf-idf')

Dos Santos David's avatar
Dos Santos David committed
60

Dos Santos David's avatar
Dos Santos David committed
61 62 63 64 65 66 67 68 69
def build_cli_index_parser(root_parser):

    index_parser = root_parser.add_parser('index', description="run commands on the index", aliases=['i'])
    index_subparser = index_parser.add_subparsers(dest="index_command")
    lookup_parser = index_subparser.add_parser('lookup', help="find the documents where a token is")

    lookup_parser.add_argument('token', nargs=1)

    build_parser = index_subparser.add_parser('build', help="build the index")
Dos Santos David's avatar
Dos Santos David committed
70 71 72 73 74
    build_parser.add_argument('--no-cache',
        help='do not use the cache when building the index',
        action='store_true',
        dest='build_no_cache'
    )
Dos Santos David's avatar
Dos Santos David committed
75

76 77
    stats_parser = index_subparser.add_parser('stats', help='show stats about the index')

Dos Santos David's avatar
Dos Santos David committed
78 79
def build_cli_search_parser(root_parser):
    search_parser = root_parser.add_parser('search', description='search for documents')
80 81
    search_parser.add_argument('-b', '--boolean', action='store_const', const='boolean', dest='search_query_type', help="use the booolean model")
    search_parser.add_argument('-v', '--vectorial', action='store_const', const='vectorial', dest='search_query_type', help="use the vectorial model")
82 83 84
    search_parser.add_argument('--tf-idf', action='store_const', const='tf-idf', dest='weight_type', help="use the tf-idf weight type")
    search_parser.add_argument('--norm-tf-idf', action='store_const', const='norm-tf-idf', dest='weight_type', help="use the normalized tf-idf weight type")
    search_parser.add_argument('--norm-freq', action='store_const', const='norm-freq', dest='weight_type', help="use the normalized frequency weight type")
Dos Santos David's avatar
Dos Santos David committed
85

Dos Santos David's avatar
Dos Santos David committed
86
    search_parser.add_argument('query', help="your query", nargs='*')
Dos Santos David's avatar
Dos Santos David committed
87

88
    search_parser.set_defaults(weight_type='tf-idf')
Dos Santos David's avatar
Dos Santos David committed
89
    search_parser.set_defaults(search_query_type='boolean')
Dos Santos David's avatar
Dos Santos David committed
90

91 92 93
def build_cli_quit_parser(root_parser):
    quit_parser = root_parser.add_parser('quit', description='quit')

Dos Santos David's avatar
Dos Santos David committed
94 95 96 97
def build_cli_parser():
     # cli parser
    cli_parser = argparse.ArgumentParser(prog="", add_help=False)
    cli_subparser = cli_parser.add_subparsers(dest="main_command")
Dos Santos David's avatar
Dos Santos David committed
98

Dos Santos David's avatar
Dos Santos David committed
99
    build_cli_analyze_parser(cli_subparser)
100
    build_cli_eval_parser(cli_subparser)
Dos Santos David's avatar
Dos Santos David committed
101
    build_cli_index_parser(cli_subparser)
Dos Santos David's avatar
Dos Santos David committed
102
    build_cli_search_parser(cli_subparser)
103
    build_cli_quit_parser(cli_subparser)
Dos Santos David's avatar
Dos Santos David committed
104

Dos Santos David's avatar
Dos Santos David committed
105 106 107 108
    return cli_parser

def main():
    print('not supported yet')
Dos Santos David's avatar
Dos Santos David committed
109 110 111


if __name__ == "__main__":
Dos Santos David's avatar
Dos Santos David committed
112 113 114 115 116 117 118 119 120
    # top-level parser
    # mainly collection information and stop-words
    main_parser = argparse.ArgumentParser(prog="gogole")

    main_parser.add_argument(
        "-c", "--collection",
        help="collection to use")

    main_args = main_parser.parse_args()
121
    collection = COLLECTIONS[main_args.collection]()
Dos Santos David's avatar
Dos Santos David committed
122

Dos Santos David's avatar
Dos Santos David committed
123
    print(WELCOME_MESSAGE)
Dos Santos David's avatar
Dos Santos David committed
124 125 126 127
    cli_parser = build_cli_parser()

    while True:
        try:
128 129
            print('')
            raw_input = input("gogole > ")
Dos Santos David's avatar
Dos Santos David committed
130
            args = cli_parser.parse_args(raw_input.split(' '))
131
            commands.MAIN_COMMANDS_MAP[args.main_command](collection, args)
Dos Santos David's avatar
Dos Santos David committed
132 133 134

        except SystemExit:
            pass
135 136 137 138 139 140 141 142 143 144

        except (KeyboardInterrupt, EOFError):
            print('\nBye !\n\n\n')
            sys.exit(0) # exit successfuly

        except:
            print('\nOuuups. Something is broken :/')
            print('-'*60)
            print(traceback.print_exc(file=sys.stdout))
            print('-'*60)