no_tokenizer.py 194 Bytes
Newer Older
1
import collections
Dos Santos David's avatar
Dos Santos David committed
2

3 4 5
from gogole.tokenizer.tokenizer import Tokenizer

class NoTokenizer(Tokenizer):
Dos Santos David's avatar
Dos Santos David committed
6 7
    def get_tokens(self, document):
        return document.get_raw_content().strip().split()