...
 
Commits (3)
......@@ -3,7 +3,7 @@
"""Converts ewn Polaris format to xml
for i in ../kb*/kb*utf8.norm ; do j=`echo $i|sed -E 's/[.][.][/]kb[0-9][0-9][/](kb[0-9][0-9][-]utf8)[.]norm/\1.xml/g'`;n=`echo $j|sed -E 's/kb([0-9][0-9])[-].*/\1/1'` ; echo $i $j $n ;~/GIT/newn/src/newn/polaris2xml.py -n kb -v $n -l est $i $j ; done
for i in ../kb[0-9][0-9]/kb[0-9][0-9]-utf8.txt ; do j=`echo $i|sed -E 's/[.][.][/]kb[0-9]{2}[/](kb[0-9]{2}[-]utf8)[.]txt/\1.xml/g'`;n=`echo $j|sed -E 's/kb([0-9]{2})[-].*/\1/1'` ; echo $i $j $n ;~/GIT/newn/src/newn/polaris2xml.py -n kb -v $n -l est $i $j ; done
"""
......@@ -19,7 +19,13 @@ def read2xml(read: list, lexattrs: dict = {}) -> etree._ElementTree:
level = -1
attrs = {}
for rida in read:
if len(rida) >= 2:
if len(rida) == 1:
tag = 'COMMENT'
element = etree.Element(tag)
element.text = rida[0].strip('# ')
parent.append(element)
currentlevel = level
elif len(rida) >= 2:
currentlevel = int(rida[0])
if currentlevel == 0:
parent = root
......@@ -38,7 +44,11 @@ def read2xml(read: list, lexattrs: dict = {}) -> etree._ElementTree:
txt = rida[-1]
if txt.startswith('"'):
txt = txt[1:-1].strip()
element.text = txt
try:
element.text = txt
except ValueError:
print(txt)
exit()
if currentlevel == level:
parent = parent.getparent()
elif currentlevel < level:
......@@ -53,7 +63,10 @@ def read2xml(read: list, lexattrs: dict = {}) -> etree._ElementTree:
def parse_line(rida: str):
realist = [x.strip() for x in rida.strip().split(maxsplit=2)]
if rida.startswith('#'):
realist = [rida]
else:
realist = [x.strip() for x in rida.strip().split(maxsplit=2)]
return realist
......