Commit 7cfe96d3 authored by Neeme Kahusk's avatar Neeme Kahusk

Merge branch 'dev'

parents 45852748 b291668d
SIMPLE = ['ressursi nimed','doi']
\ No newline at end of file
SIMPLE = ['ressursi nimed','doi']
NAMES = ['eestikeelne ressursi nimi','ingliskeelne ressursi nimi','kontaktisiku asutus','doi']
......@@ -13,9 +13,50 @@ from pathlib import Path
from lxml import etree
from prepare import read_xml
from configure_columns import SIMPLE
from configure_columns import SIMPLE, NAMES
XPATHLIST = NAMES
class Name(object):
def __init__(self):
self.value = ''
self.language = ''
class Project(object):
def __init__(self):
self.names = []
self.shortNames = []
class Organization(object):
def __init__(self):
self.names = []
self.shortNames = []
self.departmentNames = []
self.communicationInfo = None
class CommunicationInfo(object):
def __init__(self):
self.email = ''
self.url = ''
self.address = ''
self.zipCode = ''
self.city = ''
self.country = ''
self.telephoneNumber = ''
self.faxNumber = ''
class Person(object):
def __init__(self):
self.surname = []
self.givenName = []
self.sex = None
self.position = ''
self.communicationInfo = None
XPATHLIST = SIMPLE
def make_filter(tabelifail: str = 'tulbad.csv') -> list:
"""Reads "columns file" (columns 'label', 'simplepath', 'xpath' etc. and returns
......@@ -31,6 +72,18 @@ def make_filter(tabelifail: str = 'tulbad.csv') -> list:
labels_et = [x for x in csvdata if x['label_et'] in XPATHLIST]
return labels_et
def text_or_subtexts(element: etree._Element):
out = []
if element.text:
a = element.text
if a.startswith('10.15155/'):
a = 'http://doi.org/' + a
return a
else:
for el in element.xpath('./descendant-or-self::*/text()'):
out.append(el)
return ', '.join(out)
def get_xpath(root: etree._ElementTree, xpath: str) -> str:
"""Returns text attribute(s) of XML Elements returned by xpath query.
......@@ -41,7 +94,8 @@ def get_xpath(root: etree._ElementTree, xpath: str) -> str:
"""
namespaces = {'x':'http://www.ilsp.gr/META-XMLSchema'}
elements = root.xpath(xpath, namespaces=namespaces)
texts = [x.text for x in elements]
texts = [text_or_subtexts(x) for x in elements]
#return texts
return ', '.join(texts)
......@@ -116,7 +170,7 @@ def main():
tulem = [read_xml(args.infile)]
tulemitabel = featured_rows(tulem, labels)
csv_out(args.outfile, tulemitabel, SIMPLE)
csv_out(args.outfile, tulemitabel, XPATHLIST)
if __name__ == '__main__':
main()
label_et simplepath attrs xpath example source path
ressursi nimed identificationInfo/resourceName .//x:identificationInfo/x:resourceName
eestikeelne ressursi nimi identificationInfo/resourceName lang=et ".//x:identificationInfo/x:resourceName[@lang=""et""]" Inglise-eesti masintõlkesõnastik 224 {http://www.ilsp.gr/META-XMLSchema}identificationInfo/{http://www.ilsp.gr/META-XMLSchema}resourceName[1]
ingliskeelne ressursi nimi identificationInfo/resourceName lang=en ".//x:identificationInfo/x:resourceName[@lang=""en""]" English-Estonian Machine Translation Dictionary 224 {http://www.ilsp.gr/META-XMLSchema}identificationInfo/{http://www.ilsp.gr/META-XMLSchema}resourceName[2]
eestikeelne ressursi nimi "identificationInfo/resourceName[@lang=""et""]" lang=et ".//x:identificationInfo/x:resourceName[@lang=""et""]" Inglise-eesti masintõlkesõnastik 224 {http://www.ilsp.gr/META-XMLSchema}identificationInfo/{http://www.ilsp.gr/META-XMLSchema}resourceName[1]
ingliskeelne ressursi nimi "identificationInfo/resourceName[@lang=""en""]" lang=en ".//x:identificationInfo/x:resourceName[@lang=""en""]" English-Estonian Machine Translation Dictionary 224 {http://www.ilsp.gr/META-XMLSchema}identificationInfo/{http://www.ilsp.gr/META-XMLSchema}resourceName[2]
eestikeelne ressursi kirjeldus identificationInfo/description lang=et ".//x:identificationInfo/x:description[@lang=""et""]" Jooksvalt täienev inglise-eesti veebisõnastik, mis on abiks tõlkimisel ja toetab ka masintõlget. Sõnastikus on praegu ligi 90 000 ingliskeelset sõna ja püsiühendit Sõnastik on loodud hobi korras ning ei pruugi vastata Eesti Keele Instituudi kvaliteedistandardile. 224 {http://www.ilsp.gr/META-XMLSchema}identificationInfo/{http://www.ilsp.gr/META-XMLSchema}description[1]
ingliskeelne ressursi kirjeldus identificationInfo/description lang=en ".//x:identificationInfo/x:description[@lang=""en""]" A constantly incremented English-Estonian online dictionary, which is helpful in both human and machine translation. Currently, there are nearly 90,000 English words and phrases. Being created as a hobby, the dictionary need not meet all the quality standards of the Institute of the Estonian Language. 224 {http://www.ilsp.gr/META-XMLSchema}identificationInfo/{http://www.ilsp.gr/META-XMLSchema}description[2]
url identificationInfo/url .//x:identificationInfo/x:url http://portaal.eki.ee/dict/ies/ 224 {http://www.ilsp.gr/META-XMLSchema}identificationInfo/{http://www.ilsp.gr/META-XMLSchema}url
......@@ -14,9 +14,11 @@ kasutuspiirangud distributionInfo/licenceInfo/restrictionsOfUse .//x:distributi
levitamise vahend distributionInfo/licenceInfo/distributionAccessMedium .//x:distributionInfo/x:licenceInfo/x:distributionAccessMedium accessibleThroughInterface 224 {http://www.ilsp.gr/META-XMLSchema}distributionInfo/{http://www.ilsp.gr/META-XMLSchema}licenceInfo/{http://www.ilsp.gr/META-XMLSchema}distributionAccessMedium[1]
distributionInfo/licenceInfo/distributionAccessMedium .//x:distributionInfo/x:licenceInfo/x:distributionAccessMedium downloadable 224 {http://www.ilsp.gr/META-XMLSchema}distributionInfo/{http://www.ilsp.gr/META-XMLSchema}licenceInfo/{http://www.ilsp.gr/META-XMLSchema}distributionAccessMedium[2]
allalaadimiskoht distributionInfo/licenceInfo/downloadLocation .//x:distributionInfo/x:licenceInfo/x:downloadLocation ftp://ftp.eki.ee/pub/keeletehnoloogia/inglise-eesti/ 224 {http://www.ilsp.gr/META-XMLSchema}distributionInfo/{http://www.ilsp.gr/META-XMLSchema}licenceInfo/{http://www.ilsp.gr/META-XMLSchema}downloadLocation
kontaktisik contactPerson .//x:contactPerson
kontaktisiku perekonnanimi contactPerson/surname lang=en ".//x:contactPerson/x:surname[@lang=""en""]" Hein 224 {http://www.ilsp.gr/META-XMLSchema}contactPerson/{http://www.ilsp.gr/META-XMLSchema}surname
kontaktisiku eesnimi contactPerson/givenName lang=en ".//x:contactPerson/x:givenName[@lang=""en""]" Indrek 224 {http://www.ilsp.gr/META-XMLSchema}contactPerson/{http://www.ilsp.gr/META-XMLSchema}givenName
kontaktisiku meiliaadress contactPerson/communicationInfo/email .//x:contactPerson/x:communicationInfo/x:email Indrek.Hein@eki.ee 224 {http://www.ilsp.gr/META-XMLSchema}contactPerson/{http://www.ilsp.gr/META-XMLSchema}communicationInfo/{http://www.ilsp.gr/META-XMLSchema}email
kontaktisiku asutus contactPerson/affiliation .//x:contactPerson/x:affiliation
Eestikeelne kontaktisiku koduasutuse nimi contactPerson/affiliation/organizationName lang=et ".//x:contactPerson/x:affiliation/x:organizationName[@lang=""et""]" Eesti Keele Instituut 224 {http://www.ilsp.gr/META-XMLSchema}contactPerson/{http://www.ilsp.gr/META-XMLSchema}affiliation/{http://www.ilsp.gr/META-XMLSchema}organizationName[1]
ingliskeelne kontaktisiku koduasutuse nimi contactPerson/affiliation/organizationName lang=en ".//x:contactPerson/x:affiliation/x:organizationName[@lang=""en""]" Institute of the Estonian Language 224 {http://www.ilsp.gr/META-XMLSchema}contactPerson/{http://www.ilsp.gr/META-XMLSchema}affiliation/{http://www.ilsp.gr/META-XMLSchema}organizationName[2]
kontaktisiku koduasutuse meiliaadress contactPerson/affiliation/communicationInfo/email .//x:contactPerson/x:affiliation/x:communicationInfo/x:email eki@eki.ee 224 {http://www.ilsp.gr/META-XMLSchema}contactPerson/{http://www.ilsp.gr/META-XMLSchema}affiliation/{http://www.ilsp.gr/META-XMLSchema}communicationInfo/{http://www.ilsp.gr/META-XMLSchema}email
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment