Commit f1ea9b11 authored by Neeme Kahusk's avatar Neeme Kahusk
Browse files

trans ja settings

parent 2de6e7b4
# -*- coding: utf-8 -*-
"""DebVisDic tools
"""DebVisDic tools settings
"""
__author__ = 'Neeme Kahusk <neeme.kahusk@ut.ee>'
......@@ -13,4 +13,4 @@ import os
# PWN_PATH = '{}/WordNet-3.0/dict'.format(os.environ['HOME'])
PWN_PATH = '%s/Wordnet/WordNet-3.0/dict' % (os.environ['HOME'])
# PWN_PATH = '%s/Wordnet/FinWN/fiwn-2.0/dict' % (os.environ['HOME'])
FIN_PATH = '%s/Wordnet/FinWN/fiwn-2.0/dict' % (os.environ['HOME'])
# -*- coding: utf-8 -*-
"""Translations from one PWN file to another
"""
__author__ = 'Neeme Kahusk <neeme.kahusk@ut.ee>'
__version__ = '0.1'
__date__ = 'E veebr 23 15:12:59 EET 2015'
__maintainer__ = "Neeme Kahusk"
__email__ = "neeme.kahusk@ut.ee"
__status__ = "Development" # or "Development" or "Production" or "Prototype"
import timing
# import argparse
import sys
import os
import codecs
# print os.environ['HOME']
from lxml import etree
from settings import PWN_PATH
from settings import FIN_PATH
from visdic import parse_dataline
def wnparser(path,pos,start=0,end=-1):
"""Parses Princeton wordnet data file"""
FILE = path + '/' + 'data.' + pos
oList = []
with codecs.open(FILE, 'r','utf8') as f:
lines = f.readlines()
licenceList = filter(lambda x: x.startswith(' '),lines)
contentList = map(lambda x: x.strip().split('|'),
filter(lambda x: not x.startswith(' '),lines)
)
return map(lambda x: parse_dataline(x), contentList[start:end])
def process_file(f):
tree = etree.parse(f)
f.close()
out = etree.tostring(tree, pretty_print=True)
return out
def main():
"""For python ver. 2.6.6, no argparse but optparse
"""
from optparse import OptionParser
parser = OptionParser()
parser.add_option('-s','--start',dest='start',
help='start postition'
)
parser.add_option('-e','--end',dest='end',
help='end postition'
)
parser.add_option('-p','--pos',dest='pos',
help='part of speech'
)
parser.add_option('-r','--prefix',
help='outfile prefix'
)
(options, args) = parser.parse_args()
b = WN()
oList = wnparser(PWN_PATH,options.pos,
int(options.start),int(options.end)
)
visdic_xml = make_visdic_xml(oList,b.root)
# print 'VISDIC XML:'
out_xml = etree.tostring(visdic_xml, pretty_print=True,
encoding='UTF-8',
xml_declaration=True)
oFileName = '%s-%s-%s-%s.xml' % (options.prefix,
options.pos,
options.start,
options.end
)
# print out_xml
f = open(oFileName,'w')
f.write(out_xml)
f.close
if __name__ == "__main__":
main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment