Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Neeme Kahusk
Lightewn
Commits
1a2f57b3
Commit
1a2f57b3
authored
Feb 18, 2015
by
Neeme Kahusk
Browse files
encoding
parent
514fff3c
Changes
1
Hide whitespace changes
Inline
Side-by-side
tools/visdic.py
View file @
1a2f57b3
...
...
@@ -15,6 +15,7 @@ import timing
# import argparse
import
sys
import
os
import
codecs
# print os.environ['HOME']
from
lxml
import
etree
...
...
@@ -128,7 +129,7 @@ def parse_wn_index(path):
"""
SYNSET_TYPE
=
{
'1'
:
'n'
,
'2'
:
'v'
,
'3'
:
'a'
,
'4'
:
'r'
,
'5'
:
's'
}
FILE
=
path
+
'/'
+
'index.sense'
with
open
(
FILE
,
'r'
)
as
f
:
with
codecs
.
open
(
FILE
,
'r'
,
'utf8'
)
as
f
:
lines
=
map
(
lambda
x
:
x
.
split
(),
f
.
readlines
())
oDict
=
{}
for
i
in
lines
:
...
...
@@ -251,7 +252,7 @@ def parse_dataline(iList,indexDict=WNI):
# of 0 is the default, and therefore is not present in
# lexicographer files.
while
wordCounter
:
oDict
[
'synonym'
].
append
({
'word'
:
first
.
pop
(
0
),
oDict
[
'synonym'
].
append
({
'word'
:
first
.
pop
(
0
)
.
decode
(
'utf8'
)
,
'lex_id'
:
first
.
pop
(
0
)}
)
wordCounter
-=
1
...
...
@@ -356,7 +357,7 @@ def wnparser(path,pos,start=0,end=-1):
"""Parses Princeton wordnet data file"""
FILE
=
path
+
'/'
+
'data.'
+
pos
oList
=
[]
with
open
(
FILE
,
'r'
)
as
f
:
with
codecs
.
open
(
FILE
,
'r'
,
'utf8'
)
as
f
:
lines
=
f
.
readlines
()
licenceList
=
filter
(
lambda
x
:
x
.
startswith
(
' '
),
lines
)
contentList
=
map
(
lambda
x
:
x
.
strip
().
split
(
'|'
),
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment