Commit 51127a4c authored by Neeme Kahusk's avatar Neeme Kahusk
Browse files

teisendused

parent 93de599d
......@@ -20,6 +20,7 @@ import codecs
from lxml import etree
from settings import PWN_PATH
from settings import FIN_PATH
POINTER_SYMBOLS = { # (symbol, pos):
('!','n'):{'gloss':'Antonym','vis':'antonym'},
......@@ -114,6 +115,15 @@ def deadj(iStr):
else:
return iStr
def translate(orig,table=None):
"""Translation of orig to replacement in table
"""
if table:
return map(lambda x: x[-1],
filter(lambda y: y[0]==orig,table))[0]
else:
return orig
def parse_wn_index(path):
"""parses index.sense file of Princeton WN
......@@ -456,18 +466,20 @@ def process_file(f):
return out
def make_visdic_xml(dataDict,xml):
def make_visdic_xml(dataDict,xml,translation=None):
ID_PREFIX = 'eng-30'
# do not care about what xml has already
for i in dataDict:
try:
idText = '{}-{}-{}'.format(ID_PREFIX,
i['synset_offset'],
translate(i['synset_offset'],
translation),
i['ss_type']
)
except ValueError:
idText = '%s-%s-%s' % (ID_PREFIX,
i['synset_offset'],
translate(i['synset_offset'],
translation),
i['ss_type']
)
posText = i['ss_type']
......@@ -496,12 +508,14 @@ def make_visdic_xml(dataDict,xml):
ilr = etree.SubElement(synset, 'ILR')
try:
ilrText = '{}-{}-{}'.format(ID_PREFIX,
j['synset_offset'],
translate(j['synset_offset'],
translation),
j['pos']
)
except ValueError:
ilrText = '%s-%s-%s' % (ID_PREFIX,
j['synset_offset'],
translate(j['synset_offset'],
translation),
j['pos']
)
......@@ -551,10 +565,27 @@ def oldermain():
parser.add_option('-r','--prefix',
help='outfile prefix'
)
parser.add_option('-l','--lang',
help='wordnet language'
)
parser.add_option('-t','--trans',
help='Translation tables for ILI')
(options, args) = parser.parse_args()
b = WN()
if options.lang and options.lang == 'fin':
PWN_PATH = FIN_PATH
transD = None
if options.trans:
f = open(options.trans, 'r')
transD = map(lambda x: x.strip().split(':'),f.readlines())
f.close()
print transD
oList = wnparser(PWN_PATH,options.pos,
int(options.start),int(options.end)
)
......@@ -595,6 +626,8 @@ def main():
parser.add_argument('-p','--pos', dest='pos',
required=True,
help='part of speech')
parser.add_argument('-t','--trans',
help='Translation tables for ILI')
args = parser.parse_args()
......
......@@ -9,9 +9,11 @@ i=$1
if [ "$3" == fin ]; then
wnpath=$HOME/Wordnet/FinWN/fiwn-2.0/dict
prefix="fin-20"
lang="$3"
else
wnpath=$HOME/Wordnet/WordNet-3.0/dict/
prefix="pwn-30"
lang="eng"
fi
ulimit=$[$(cat $wnpath/data.$2|egrep -v '^ '|wc -l)-1]
......@@ -24,6 +26,6 @@ do
then j=$[$ulimit+1]
fi
echo $i $j
python visdic.py -p $2 -r $prefix -s $i -e $j
python visdic.py -p $2 -r $prefix -s $i -e $j -l $lang -t fin2pwn-$2.txt
i=$[$i+200]
done
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment