main.py 3.58 KB
Newer Older
Dos Santos David's avatar
Dos Santos David committed
1 2
import argparse

Dos Santos David's avatar
Dos Santos David committed
3 4
from gogole import commands
from gogole.config import COLLECTIONS
Dos Santos David's avatar
Dos Santos David committed
5

Dos Santos David's avatar
Dos Santos David committed
6
from gogole.tokenizer.simple_tokenizer import SimpleTokenizer
Dos Santos David's avatar
Dos Santos David committed
7
from gogole.tokenizer.no_tokenizer import NoTokenizer
Dos Santos David's avatar
Dos Santos David committed
8
from gogole.indexer import BSBIIndexer
Dos Santos David's avatar
Dos Santos David committed
9

Dos Santos David's avatar
Dos Santos David committed
10

Dos Santos David's avatar
Dos Santos David committed
11 12 13 14 15 16 17 18 19 20 21 22
WELCOME_MESSAGE="""
*************************
   Welcome to gogole !
*************************

You don't know where to start ? Let me help you !

* analyze all           : I'll do some analyzes on the collection you gave me

* index build           : I'll build the reversed index for you
* index lookup <token>  : I'll tell you in where documents your token is

Dos Santos David's avatar
Dos Santos David committed
23
* search <query>                  : I'll show you the documents you need
24
  search -b,--boolean <query>          Your query must be in the Conjunctive normal form
Dos Santos David's avatar
Dos Santos David committed
25
                                    Like "a OR b OR c AND d" is "(a OR b OR c) AND d"
26 27

* search -v,--vectorial <query>      : Search using the vectorial model
Dos Santos David's avatar
Dos Santos David committed
28 29 30
"""


Dos Santos David's avatar
Dos Santos David committed
31 32
def build_cli_analyze_parser(root_parser):
    args_parser_analyze = root_parser.add_parser(
Dos Santos David's avatar
Dos Santos David committed
33 34 35 36
        'analyze',
        description="Run an analyze like tokens count or find heap's law parameters",
        help="additional help for analyze",
    )
Dos Santos David's avatar
Dos Santos David committed
37 38

    args_parser_analyze.add_argument("analyze_command",
Dos Santos David's avatar
Dos Santos David committed
39
        nargs='*',
Dos Santos David's avatar
Dos Santos David committed
40 41 42
        choices=commands.analyze_command.COMMANDS,
        metavar="command",
        help="can be any of {}".format(", ".join(commands.analyze_command.COMMANDS))
Dos Santos David's avatar
Dos Santos David committed
43
    )
Dos Santos David's avatar
Dos Santos David committed
44 45


Dos Santos David's avatar
Dos Santos David committed
46 47 48 49 50 51 52 53 54
def build_cli_index_parser(root_parser):

    index_parser = root_parser.add_parser('index', description="run commands on the index", aliases=['i'])
    index_subparser = index_parser.add_subparsers(dest="index_command")
    lookup_parser = index_subparser.add_parser('lookup', help="find the documents where a token is")

    lookup_parser.add_argument('token', nargs=1)

    build_parser = index_subparser.add_parser('build', help="build the index")
Dos Santos David's avatar
Dos Santos David committed
55 56 57 58 59
    build_parser.add_argument('--no-cache',
        help='do not use the cache when building the index',
        action='store_true',
        dest='build_no_cache'
    )
Dos Santos David's avatar
Dos Santos David committed
60

Dos Santos David's avatar
Dos Santos David committed
61 62
def build_cli_search_parser(root_parser):
    search_parser = root_parser.add_parser('search', description='search for documents')
63 64
    search_parser.add_argument('-b', '--boolean', action='store_const', const='boolean', dest='search_query_type', help="use the booolean model")
    search_parser.add_argument('-v', '--vectorial', action='store_const', const='vectorial', dest='search_query_type', help="use the vectorial model")
Dos Santos David's avatar
Dos Santos David committed
65 66


Dos Santos David's avatar
Dos Santos David committed
67
    search_parser.add_argument('query', help="your query", nargs='*')
Dos Santos David's avatar
Dos Santos David committed
68 69

    search_parser.set_defaults(search_query_type='boolean')
Dos Santos David's avatar
Dos Santos David committed
70 71 72 73 74

def build_cli_parser():
     # cli parser
    cli_parser = argparse.ArgumentParser(prog="", add_help=False)
    cli_subparser = cli_parser.add_subparsers(dest="main_command")
Dos Santos David's avatar
Dos Santos David committed
75

Dos Santos David's avatar
Dos Santos David committed
76 77
    build_cli_analyze_parser(cli_subparser)
    build_cli_index_parser(cli_subparser)
Dos Santos David's avatar
Dos Santos David committed
78
    build_cli_search_parser(cli_subparser)
Dos Santos David's avatar
Dos Santos David committed
79

Dos Santos David's avatar
Dos Santos David committed
80 81 82 83
    return cli_parser

def main():
    print('not supported yet')
Dos Santos David's avatar
Dos Santos David committed
84 85 86


if __name__ == "__main__":
Dos Santos David's avatar
Dos Santos David committed
87 88 89 90 91 92 93 94 95
    # top-level parser
    # mainly collection information and stop-words
    main_parser = argparse.ArgumentParser(prog="gogole")

    main_parser.add_argument(
        "-c", "--collection",
        help="collection to use")

    main_args = main_parser.parse_args()
96
    collection = COLLECTIONS[main_args.collection]()
Dos Santos David's avatar
Dos Santos David committed
97

Dos Santos David's avatar
Dos Santos David committed
98
    print(WELCOME_MESSAGE)
Dos Santos David's avatar
Dos Santos David committed
99 100 101
    cli_parser = build_cli_parser()

    while True:
Dos Santos David's avatar
Dos Santos David committed
102 103
        print('')
        raw_input = input("gogole > ")
Dos Santos David's avatar
Dos Santos David committed
104 105
        try:
            args = cli_parser.parse_args(raw_input.split(' '))
106
            commands.MAIN_COMMANDS_MAP[args.main_command](collection, args)
Dos Santos David's avatar
Dos Santos David committed
107 108 109

        except SystemExit:
            pass