...
 
Commits (3)
#!/usr/bin/env python
"""Converts ewn Polaris format to xml
"""
import argparse
from lxml import etree
def format_line(level: int, element: etree._Element) -> str:
olist = []
ident = 2
olist.append(ident*level*' ' + str(level))
if element.attrib:
olist.append('@{}@'.format(element.attrib['id']))
olist.append(element.tag)
if element.text and element.text.strip():
olist.append('"{}"'.format(element.text))
return ' '.join(olist)
def rewrite(infilename: str):
level = -1
for event, element in etree.iterparse(infilename, events=('start','end')):
if element.tag == 'LEXICON':
pass
else:
if event == 'start':
level +=1
print(format_line(level, element))
elif event == 'end':
level -=1
def main():
parser = argparse.ArgumentParser(description='Converts EuroWordNet XML file to Polaris I/O file.')
parser.add_argument('infile')
parser.add_argument('outfile')
args = parser.parse_args()
rewrite(args.infile)
if __name__ == '__main__':
main()
\ No newline at end of file