Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Neeme Kahusk
Lightewn
Commits
51127a4c
Commit
51127a4c
authored
Feb 26, 2015
by
Neeme Kahusk
Browse files
teisendused
parent
93de599d
Changes
2
Hide whitespace changes
Inline
Side-by-side
tools/visdic.py
View file @
51127a4c
...
...
@@ -20,6 +20,7 @@ import codecs
from
lxml
import
etree
from
settings
import
PWN_PATH
from
settings
import
FIN_PATH
POINTER_SYMBOLS
=
{
# (symbol, pos):
(
'!'
,
'n'
):{
'gloss'
:
'Antonym'
,
'vis'
:
'antonym'
},
...
...
@@ -114,6 +115,15 @@ def deadj(iStr):
else
:
return
iStr
def
translate
(
orig
,
table
=
None
):
"""Translation of orig to replacement in table
"""
if
table
:
return
map
(
lambda
x
:
x
[
-
1
],
filter
(
lambda
y
:
y
[
0
]
==
orig
,
table
))[
0
]
else
:
return
orig
def
parse_wn_index
(
path
):
"""parses index.sense file of Princeton WN
...
...
@@ -456,18 +466,20 @@ def process_file(f):
return
out
def
make_visdic_xml
(
dataDict
,
xml
):
def
make_visdic_xml
(
dataDict
,
xml
,
translation
=
None
):
ID_PREFIX
=
'eng-30'
# do not care about what xml has already
for
i
in
dataDict
:
try
:
idText
=
'{}-{}-{}'
.
format
(
ID_PREFIX
,
i
[
'synset_offset'
],
translate
(
i
[
'synset_offset'
],
translation
),
i
[
'ss_type'
]
)
except
ValueError
:
idText
=
'%s-%s-%s'
%
(
ID_PREFIX
,
i
[
'synset_offset'
],
translate
(
i
[
'synset_offset'
],
translation
),
i
[
'ss_type'
]
)
posText
=
i
[
'ss_type'
]
...
...
@@ -496,12 +508,14 @@ def make_visdic_xml(dataDict,xml):
ilr
=
etree
.
SubElement
(
synset
,
'ILR'
)
try
:
ilrText
=
'{}-{}-{}'
.
format
(
ID_PREFIX
,
j
[
'synset_offset'
],
translate
(
j
[
'synset_offset'
],
translation
),
j
[
'pos'
]
)
except
ValueError
:
ilrText
=
'%s-%s-%s'
%
(
ID_PREFIX
,
j
[
'synset_offset'
],
translate
(
j
[
'synset_offset'
],
translation
),
j
[
'pos'
]
)
...
...
@@ -551,10 +565,27 @@ def oldermain():
parser
.
add_option
(
'-r'
,
'--prefix'
,
help
=
'outfile prefix'
)
parser
.
add_option
(
'-l'
,
'--lang'
,
help
=
'wordnet language'
)
parser
.
add_option
(
'-t'
,
'--trans'
,
help
=
'Translation tables for ILI'
)
(
options
,
args
)
=
parser
.
parse_args
()
b
=
WN
()
if
options
.
lang
and
options
.
lang
==
'fin'
:
PWN_PATH
=
FIN_PATH
transD
=
None
if
options
.
trans
:
f
=
open
(
options
.
trans
,
'r'
)
transD
=
map
(
lambda
x
:
x
.
strip
().
split
(
':'
),
f
.
readlines
())
f
.
close
()
print
transD
oList
=
wnparser
(
PWN_PATH
,
options
.
pos
,
int
(
options
.
start
),
int
(
options
.
end
)
)
...
...
@@ -595,6 +626,8 @@ def main():
parser
.
add_argument
(
'-p'
,
'--pos'
,
dest
=
'pos'
,
required
=
True
,
help
=
'part of speech'
)
parser
.
add_argument
(
'-t'
,
'--trans'
,
help
=
'Translation tables for ILI'
)
args
=
parser
.
parse_args
()
...
...
tools/vistnoun.sh
View file @
51127a4c
...
...
@@ -9,9 +9,11 @@ i=$1
if
[
"
$3
"
==
fin
]
;
then
wnpath
=
$HOME
/Wordnet/FinWN/fiwn-2.0/dict
prefix
=
"fin-20"
lang
=
"
$3
"
else
wnpath
=
$HOME
/Wordnet/WordNet-3.0/dict/
prefix
=
"pwn-30"
lang
=
"eng"
fi
ulimit
=
$[
$(
cat
$wnpath
/data.
$2
|egrep
-v
'^ '
|wc
-l
)
-1
]
...
...
@@ -24,6 +26,6 @@ do
then
j
=
$[$ulimit
+1]
fi
echo
$i
$j
python visdic.py
-p
$2
-r
$prefix
-s
$i
-e
$j
python visdic.py
-p
$2
-r
$prefix
-s
$i
-e
$j
-l
$lang
-t
fin2pwn-
$2
.txt
i
=
$[$i
+200]
done
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment