Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
G
gogole
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Container Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dos Santos David
gogole
Commits
adbd962b
Commit
adbd962b
authored
Jan 29, 2018
by
Dos Santos David
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
prefix graph filenames with the collection
parent
d5248def
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
7 additions
and
4 deletions
+7
-4
cacm_collection.py
gogole/collection/cacm_collection.py
+2
-1
stanford_collection.py
gogole/collection/stanford_collection.py
+3
-1
analyze_command.py
gogole/commands/analyze_command.py
+2
-2
No files found.
gogole/collection/cacm_collection.py
View file @
adbd962b
...
@@ -5,9 +5,10 @@ from gogole.parser.cacm_parser import CACMParser
...
@@ -5,9 +5,10 @@ from gogole.parser.cacm_parser import CACMParser
from
gogole.tokenizer.simple_tokenizer
import
SimpleTokenizer
from
gogole.tokenizer.simple_tokenizer
import
SimpleTokenizer
class
CACMCollection
(
Collection
):
class
CACMCollection
(
Collection
):
NAME
=
'cacm'
def
__init__
(
self
):
def
__init__
(
self
):
# BSBI indexer with single block
# BSBI indexer with single block
self
.
_indexer
=
BSBIIndexer
(
'cacm'
,
maxsize
=
None
)
self
.
_indexer
=
BSBIIndexer
(
self
.
NAME
,
maxsize
=
None
)
self
.
_parser
=
CACMParser
()
self
.
_parser
=
CACMParser
()
...
...
gogole/collection/stanford_collection.py
View file @
adbd962b
...
@@ -5,9 +5,11 @@ from gogole.parser.stanford_parser import StanfordParser
...
@@ -5,9 +5,11 @@ from gogole.parser.stanford_parser import StanfordParser
from
gogole.tokenizer.no_tokenizer
import
NoTokenizer
from
gogole.tokenizer.no_tokenizer
import
NoTokenizer
class
StanfordCollection
(
Collection
):
class
StanfordCollection
(
Collection
):
NAME
=
'stanford'
def
__init__
(
self
):
def
__init__
(
self
):
# BSBI indexer with single block
# BSBI indexer with single block
self
.
_indexer
=
BSBIIndexer
(
'stanford'
,
maxsize
=
16
*
1024
*
1024
)
self
.
_indexer
=
BSBIIndexer
(
self
.
NAME
,
maxsize
=
16
*
1024
*
1024
)
self
.
_parser
=
StanfordParser
()
self
.
_parser
=
StanfordParser
()
...
...
gogole/commands/analyze_command.py
View file @
adbd962b
...
@@ -67,7 +67,7 @@ def run(collection, args):
...
@@ -67,7 +67,7 @@ def run(collection, args):
plot_bar
(
plot_bar
(
x
,
x
,
heights
,
heights
,
filename
=
'
zipf_law.png'
,
filename
=
'
graphs/{}_zipf_law.png'
.
format
(
collection
.
NAME
)
,
xlabel
=
"rank"
,
xlabel
=
"rank"
,
ylabel
=
"frequency"
ylabel
=
"frequency"
)
)
...
@@ -75,7 +75,7 @@ def run(collection, args):
...
@@ -75,7 +75,7 @@ def run(collection, args):
plot_bar
(
plot_bar
(
list
(
map
(
math
.
log10
,
x
)),
list
(
map
(
math
.
log10
,
x
)),
list
(
map
(
math
.
log10
,
heights
)),
list
(
map
(
math
.
log10
,
heights
)),
filename
=
'
zipf_law_logs.png'
,
filename
=
'
graphs/{}_zipf_law_logs.png'
.
format
(
collection
.
NAME
)
,
xlabel
=
'log10(rank)'
,
xlabel
=
'log10(rank)'
,
ylabel
=
'log10(frequency)'
ylabel
=
'log10(frequency)'
)
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment