main.py 3.74 KB
Newer Older
Dos Santos David's avatar
Dos Santos David committed
1 2
import argparse

Dos Santos David's avatar
Dos Santos David committed
3 4
from gogole import commands
from gogole.config import COLLECTIONS
Dos Santos David's avatar
Dos Santos David committed
5

Dos Santos David's avatar
Dos Santos David committed
6
from gogole.tokenizer.simple_tokenizer import SimpleTokenizer
Dos Santos David's avatar
Dos Santos David committed
7
from gogole.tokenizer.no_tokenizer import NoTokenizer
Dos Santos David's avatar
Dos Santos David committed
8
from gogole.indexer import BSBIIndexer
Dos Santos David's avatar
Dos Santos David committed
9

Dos Santos David's avatar
Dos Santos David committed
10

Dos Santos David's avatar
Dos Santos David committed
11 12 13 14 15 16 17 18 19
WELCOME_MESSAGE="""
*************************
   Welcome to gogole !
*************************

You don't know where to start ? Let me help you !

* analyze all           : I'll do some analyzes on the collection you gave me

20
* index stats           : I'll show you some statistics on the index
Dos Santos David's avatar
Dos Santos David committed
21 22 23
* index build           : I'll build the reversed index for you
* index lookup <token>  : I'll tell you in where documents your token is

Dos Santos David's avatar
Dos Santos David committed
24
* search <query>                  : I'll show you the documents you need
25
  search -b,--boolean <query>          Your query must be in the Conjunctive normal form
Dos Santos David's avatar
Dos Santos David committed
26
                                    Like "a OR b OR c AND d" is "(a OR b OR c) AND d"
27

28
* search -v,--vectorial <query>   : Search using the vectorial model
Dos Santos David's avatar
Dos Santos David committed
29 30 31
"""


Dos Santos David's avatar
Dos Santos David committed
32 33
def build_cli_analyze_parser(root_parser):
    args_parser_analyze = root_parser.add_parser(
Dos Santos David's avatar
Dos Santos David committed
34 35 36 37
        'analyze',
        description="Run an analyze like tokens count or find heap's law parameters",
        help="additional help for analyze",
    )
Dos Santos David's avatar
Dos Santos David committed
38 39

    args_parser_analyze.add_argument("analyze_command",
Dos Santos David's avatar
Dos Santos David committed
40
        nargs='*',
Dos Santos David's avatar
Dos Santos David committed
41 42 43
        choices=commands.analyze_command.COMMANDS,
        metavar="command",
        help="can be any of {}".format(", ".join(commands.analyze_command.COMMANDS))
Dos Santos David's avatar
Dos Santos David committed
44
    )
Dos Santos David's avatar
Dos Santos David committed
45 46


Dos Santos David's avatar
Dos Santos David committed
47 48 49 50 51 52 53 54 55
def build_cli_index_parser(root_parser):

    index_parser = root_parser.add_parser('index', description="run commands on the index", aliases=['i'])
    index_subparser = index_parser.add_subparsers(dest="index_command")
    lookup_parser = index_subparser.add_parser('lookup', help="find the documents where a token is")

    lookup_parser.add_argument('token', nargs=1)

    build_parser = index_subparser.add_parser('build', help="build the index")
Dos Santos David's avatar
Dos Santos David committed
56 57 58 59 60
    build_parser.add_argument('--no-cache',
        help='do not use the cache when building the index',
        action='store_true',
        dest='build_no_cache'
    )
Dos Santos David's avatar
Dos Santos David committed
61

62 63
    stats_parser = index_subparser.add_parser('stats', help='show stats about the index')

Dos Santos David's avatar
Dos Santos David committed
64 65
def build_cli_search_parser(root_parser):
    search_parser = root_parser.add_parser('search', description='search for documents')
66 67
    search_parser.add_argument('-b', '--boolean', action='store_const', const='boolean', dest='search_query_type', help="use the booolean model")
    search_parser.add_argument('-v', '--vectorial', action='store_const', const='vectorial', dest='search_query_type', help="use the vectorial model")
Dos Santos David's avatar
Dos Santos David committed
68 69


Dos Santos David's avatar
Dos Santos David committed
70
    search_parser.add_argument('query', help="your query", nargs='*')
Dos Santos David's avatar
Dos Santos David committed
71 72

    search_parser.set_defaults(search_query_type='boolean')
Dos Santos David's avatar
Dos Santos David committed
73 74 75 76 77

def build_cli_parser():
     # cli parser
    cli_parser = argparse.ArgumentParser(prog="", add_help=False)
    cli_subparser = cli_parser.add_subparsers(dest="main_command")
Dos Santos David's avatar
Dos Santos David committed
78

Dos Santos David's avatar
Dos Santos David committed
79 80
    build_cli_analyze_parser(cli_subparser)
    build_cli_index_parser(cli_subparser)
Dos Santos David's avatar
Dos Santos David committed
81
    build_cli_search_parser(cli_subparser)
Dos Santos David's avatar
Dos Santos David committed
82

Dos Santos David's avatar
Dos Santos David committed
83 84 85 86
    return cli_parser

def main():
    print('not supported yet')
Dos Santos David's avatar
Dos Santos David committed
87 88 89


if __name__ == "__main__":
Dos Santos David's avatar
Dos Santos David committed
90 91 92 93 94 95 96 97 98
    # top-level parser
    # mainly collection information and stop-words
    main_parser = argparse.ArgumentParser(prog="gogole")

    main_parser.add_argument(
        "-c", "--collection",
        help="collection to use")

    main_args = main_parser.parse_args()
99
    collection = COLLECTIONS[main_args.collection]()
Dos Santos David's avatar
Dos Santos David committed
100

Dos Santos David's avatar
Dos Santos David committed
101
    print(WELCOME_MESSAGE)
Dos Santos David's avatar
Dos Santos David committed
102 103 104
    cli_parser = build_cli_parser()

    while True:
Dos Santos David's avatar
Dos Santos David committed
105 106
        print('')
        raw_input = input("gogole > ")
Dos Santos David's avatar
Dos Santos David committed
107 108
        try:
            args = cli_parser.parse_args(raw_input.split(' '))
109
            commands.MAIN_COMMANDS_MAP[args.main_command](collection, args)
Dos Santos David's avatar
Dos Santos David committed
110 111 112

        except SystemExit:
            pass