cacm_collection.py 466 Bytes
Newer Older
1 2 3 4 5 6 7
from gogole.collection import Collection

from gogole.indexer.bsbi_indexer import BSBIIndexer
from gogole.parser.cacm_parser import CACMParser
from gogole.tokenizer.simple_tokenizer import SimpleTokenizer

class CACMCollection(Collection):
8
    NAME = 'cacm'
9 10
    def __init__(self):
        # BSBI indexer with single block
11
        self._indexer = BSBIIndexer(self.NAME, maxsize=None)
12 13 14 15

        self._parser = CACMParser()

        self._tokenizer = SimpleTokenizer()