main.py 4.92 KB
Newer Older
Dos Santos David's avatar
Dos Santos David committed
1 2
import argparse

Dos Santos David's avatar
Dos Santos David committed
3 4
from gogole import commands
from gogole.config import COLLECTIONS
Dos Santos David's avatar
Dos Santos David committed
5

Dos Santos David's avatar
Dos Santos David committed
6
from gogole.tokenizer.simple_tokenizer import SimpleTokenizer
Dos Santos David's avatar
Dos Santos David committed
7
from gogole.tokenizer.no_tokenizer import NoTokenizer
Dos Santos David's avatar
Dos Santos David committed
8
from gogole.indexer import BSBIIndexer
Dos Santos David's avatar
Dos Santos David committed
9

Dos Santos David's avatar
Dos Santos David committed
10

Dos Santos David's avatar
Dos Santos David committed
11 12 13 14 15 16 17 18 19
WELCOME_MESSAGE="""
*************************
   Welcome to gogole !
*************************

You don't know where to start ? Let me help you !

* analyze all           : I'll do some analyzes on the collection you gave me

20
* index stats           : I'll show you some statistics on the index
Dos Santos David's avatar
Dos Santos David committed
21 22 23
* index build           : I'll build the reversed index for you
* index lookup <token>  : I'll tell you in where documents your token is

Dos Santos David's avatar
Dos Santos David committed
24
* search <query>                  : I'll show you the documents you need
25
  search -b,--boolean <query>          Your query must be in the Conjunctive normal form
Dos Santos David's avatar
Dos Santos David committed
26
                                    Like "a OR b OR c AND d" is "(a OR b OR c) AND d"
27

28
* search -v,--vectorial <query>   : Search using the vectorial model
Dos Santos David's avatar
Dos Santos David committed
29 30 31
"""


Dos Santos David's avatar
Dos Santos David committed
32 33
def build_cli_analyze_parser(root_parser):
    args_parser_analyze = root_parser.add_parser(
Dos Santos David's avatar
Dos Santos David committed
34 35 36 37
        'analyze',
        description="Run an analyze like tokens count or find heap's law parameters",
        help="additional help for analyze",
    )
Dos Santos David's avatar
Dos Santos David committed
38 39

    args_parser_analyze.add_argument("analyze_command",
Dos Santos David's avatar
Dos Santos David committed
40
        nargs='*',
Dos Santos David's avatar
Dos Santos David committed
41 42 43
        choices=commands.analyze_command.COMMANDS,
        metavar="command",
        help="can be any of {}".format(", ".join(commands.analyze_command.COMMANDS))
Dos Santos David's avatar
Dos Santos David committed
44
    )
Dos Santos David's avatar
Dos Santos David committed
45

46 47 48 49 50 51 52 53 54
def build_cli_eval_parser(root_parser):
    eval_parser = root_parser.add_parser('eval', description='evaluate for documents')
    eval_parser.add_argument('nrequests', nargs=1)
    eval_parser.add_argument('--tf-idf', action='store_const', const='tf-idf', dest='weight_type', help="use the tf-idf weight type")
    eval_parser.add_argument('--norm-tf-idf', action='store_const', const='norm-tf-idf', dest='weight_type', help="use the normalized tf-idf weight type")
    eval_parser.add_argument('--norm-freq', action='store_const', const='norm-freq', dest='weight_type', help="use the normalized frequency weight type")

    eval_parser.set_defaults(weight_type='tf-idf')

Dos Santos David's avatar
Dos Santos David committed
55

Dos Santos David's avatar
Dos Santos David committed
56 57 58 59 60 61 62 63 64
def build_cli_index_parser(root_parser):

    index_parser = root_parser.add_parser('index', description="run commands on the index", aliases=['i'])
    index_subparser = index_parser.add_subparsers(dest="index_command")
    lookup_parser = index_subparser.add_parser('lookup', help="find the documents where a token is")

    lookup_parser.add_argument('token', nargs=1)

    build_parser = index_subparser.add_parser('build', help="build the index")
Dos Santos David's avatar
Dos Santos David committed
65 66 67 68 69
    build_parser.add_argument('--no-cache',
        help='do not use the cache when building the index',
        action='store_true',
        dest='build_no_cache'
    )
Dos Santos David's avatar
Dos Santos David committed
70

71 72
    stats_parser = index_subparser.add_parser('stats', help='show stats about the index')

Dos Santos David's avatar
Dos Santos David committed
73 74
def build_cli_search_parser(root_parser):
    search_parser = root_parser.add_parser('search', description='search for documents')
75 76
    search_parser.add_argument('-b', '--boolean', action='store_const', const='boolean', dest='search_query_type', help="use the booolean model")
    search_parser.add_argument('-v', '--vectorial', action='store_const', const='vectorial', dest='search_query_type', help="use the vectorial model")
77 78 79
    search_parser.add_argument('--tf-idf', action='store_const', const='tf-idf', dest='weight_type', help="use the tf-idf weight type")
    search_parser.add_argument('--norm-tf-idf', action='store_const', const='norm-tf-idf', dest='weight_type', help="use the normalized tf-idf weight type")
    search_parser.add_argument('--norm-freq', action='store_const', const='norm-freq', dest='weight_type', help="use the normalized frequency weight type")
Dos Santos David's avatar
Dos Santos David committed
80

Dos Santos David's avatar
Dos Santos David committed
81
    search_parser.add_argument('query', help="your query", nargs='*')
Dos Santos David's avatar
Dos Santos David committed
82

83
    search_parser.set_defaults(weight_type='tf-idf')
Dos Santos David's avatar
Dos Santos David committed
84
    search_parser.set_defaults(search_query_type='boolean')
Dos Santos David's avatar
Dos Santos David committed
85 86 87 88 89

def build_cli_parser():
     # cli parser
    cli_parser = argparse.ArgumentParser(prog="", add_help=False)
    cli_subparser = cli_parser.add_subparsers(dest="main_command")
Dos Santos David's avatar
Dos Santos David committed
90

Dos Santos David's avatar
Dos Santos David committed
91
    build_cli_analyze_parser(cli_subparser)
92
    build_cli_eval_parser(cli_subparser)
Dos Santos David's avatar
Dos Santos David committed
93
    build_cli_index_parser(cli_subparser)
Dos Santos David's avatar
Dos Santos David committed
94
    build_cli_search_parser(cli_subparser)
Dos Santos David's avatar
Dos Santos David committed
95

Dos Santos David's avatar
Dos Santos David committed
96 97 98 99
    return cli_parser

def main():
    print('not supported yet')
Dos Santos David's avatar
Dos Santos David committed
100 101 102


if __name__ == "__main__":
Dos Santos David's avatar
Dos Santos David committed
103 104 105 106 107 108 109 110 111
    # top-level parser
    # mainly collection information and stop-words
    main_parser = argparse.ArgumentParser(prog="gogole")

    main_parser.add_argument(
        "-c", "--collection",
        help="collection to use")

    main_args = main_parser.parse_args()
112
    collection = COLLECTIONS[main_args.collection]()
Dos Santos David's avatar
Dos Santos David committed
113

Dos Santos David's avatar
Dos Santos David committed
114
    print(WELCOME_MESSAGE)
Dos Santos David's avatar
Dos Santos David committed
115 116 117
    cli_parser = build_cli_parser()

    while True:
Dos Santos David's avatar
Dos Santos David committed
118 119
        print('')
        raw_input = input("gogole > ")
Dos Santos David's avatar
Dos Santos David committed
120 121
        try:
            args = cli_parser.parse_args(raw_input.split(' '))
122
            commands.MAIN_COMMANDS_MAP[args.main_command](collection, args)
Dos Santos David's avatar
Dos Santos David committed
123 124 125

        except SystemExit:
            pass