Commit f30ecc1b authored by Neeme Kahusk's avatar Neeme Kahusk

xpath test

parent 4c3b3746
......@@ -111,37 +111,15 @@ def readwrite_csvfile(outfilename: str, data: list, headers: list = ATS):
writer.writerow(i)
def add_csvdata(xmlfilename: str, csvfilename: str):
with open(csvfilename) as csvfile:
reader = csv.DictReader(csvfile, dialect=csv.excel_tab)
csvdata = [x for x in reader]
header = [x[0] for x in csvdata[0].items()]
simplepaths = [x['simplepath'] for x in csvdata]
root = read_xml(xmlfilename)
outdata = {}
for element in root.iter():
if element.text:
path = root.getelementpath(element)
simplepath = make_simplepath(path)
if simplepath not in simplepaths:
print('Teen uue:')
print(simplepath)
def main():
parser = argparse.ArgumentParser(description='Makes csv file ')
parser.add_argument('infile')
parser.add_argument('outfile')
parser.add_argument('-a', '--append', action='store_true')
args = parser.parse_args()
if args.append:
add_csvdata(args.infile, args.outfile)
else:
csvdata = make_csvdata(args.infile)
readwrite_csvfile(args.outfile, csvdata)
csvdata = make_csvdata(args.infile)
readwrite_csvfile(args.outfile, csvdata)
if __name__ == '__main__':
......
......@@ -9,72 +9,21 @@ import re
from zipfile import ZipFile
from lxml import etree
#from lxml import objectify
import csv
from prepare import read_xml
csv.register_dialect('vrt_dialect', delimiter = '\t',
quotechar = '#',
lineterminator = '\n',
escapechar='\\',
quoting=csv.QUOTE_NONE)
def get_xpath(root: etree._ElementTree, xpath: str):
return root.xpath(xpath)
def make_simplepath(path: str):
out = re.sub('[{][^}]+[}]','',path)
return out
def read_xml(filename: str):
raw_tabdata = {}
label_tabel = {'label': None}
labelid = []
ats = ['label', 'example', 'simplepath', 'path']
outfilename = 'labelitabel.csv'
parser = etree.XMLParser(remove_blank_text=True)
root = etree.parse(filename, parser)
for element in root.iter("*"):
if element.text is not None and not element.text.strip():
element.text = None
elif element.text is not None and element.text.strip():
element.text = element.text.strip()
for element in root.iter():
# print('{} - {}'.format(element.tag, element.text))
# print('{} - {}'.format(root.getelementpath(element), element.attrib))
raw_tabdata['filename'] = filename
raw_tabdata['filenumber'] = os.path.splitext((os.path.split(filename)[-1]))[0].split('-')[-1]
raw_tabdata['path'] = root.getelementpath(element)
raw_tabdata['attrib'] = element.attrib
raw_tabdata['tag'] = element.tag
raw_tabdata['text'] = element.text
raw_tabdata['simplepath'] = make_simplepath(raw_tabdata['path'])
# print(raw_tabdata)
if raw_tabdata['text'] and not raw_tabdata['attrib']:
if not '[' in raw_tabdata['path']:
label_tabel['path'] = raw_tabdata['path']
label_tabel['example'] = raw_tabdata['text']
label_tabel['simplepath'] = raw_tabdata['simplepath']
# print('{t[path]} = {t[text]}'.format(t=raw_tabdata))
labelid.append(label_tabel)
label_tabel = {'label': None}
with open(outfilename, 'w') as outfile:
writer = csv.DictWriter(outfile, ats, dialect=csv.excel_tab)
writer.writeheader()
for i in labelid:
writer.writerow(i)
def main():
print('Tere maailm!')
parser = argparse.ArgumentParser(description='Avab zip faili')
parser.add_argument('infile')
parser.add_argument('-t', '--tablefile')
parser.add_argument('-x', '--xpath')
parser.add_argument("-z", "--zip", action="store_true")
args = parser.parse_args()
......@@ -83,7 +32,11 @@ def main():
with ZipFile(args.infile, 'r') as myzip:
print(myzip.namelist())
else:
a = read_xml(args.infile)
r = read_xml(args.infile)
print(args.xpath)
print(get_xpath(r, args.xpath))
if __name__ == '__main__':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment