Commit 348aae6d authored by Neeme Kahusk's avatar Neeme Kahusk

blokiga v.0.4.0

parent c017ecfd
0.3.4
\ No newline at end of file
0.4.0
\ No newline at end of file
SIMPLE = ['ressursi nimed','doi']
NAMES = ['eestikeelne ressursi nimi','ingliskeelne ressursi nimi','kontaktisiku asutus','doi']
#NAMES = ['eestikeelne ressursi nimi','ingliskeelne ressursi nimi','kontaktisiku asutus','doi']
NAMES_ATA = ['eestikeelne ressursi nimi','ingliskeelne ressursi nimi','kontaktisik asutuseta','doi']
NAMES = ['eestikeelne ressursi nimi','ingliskeelne ressursi nimi','kontaktisik asutuseta','kontaktisiku asutus','doi']
\ No newline at end of file
......@@ -15,7 +15,8 @@ from lxml import etree
from prepare import read_xml
from configure_columns import SIMPLE, NAMES, NAMES_ATA
XPATHLIST = NAMES_ATA
#XPATHLIST = NAMES_ATA
XPATHLIST = NAMES
class Name(object):
def __init__(self):
......@@ -73,7 +74,7 @@ def make_filter(tabelifail: str = 'tulbad.csv') -> list:
return labels_et
def text_or_subtexts(element: etree._Element):
def text_or_subtexts(element: etree._Element, block: str = None) -> str:
out = []
if element.text:
a = element.text
......@@ -81,13 +82,19 @@ def text_or_subtexts(element: etree._Element):
a = 'http://doi.org/' + a
return a
else:
#for el in element.xpath('./descendant-or-self::*/text()'):
for el in element.xpath('./*[not(descendant::affiliation) and not(ancestor-or-self::affiliation)]/text()'):
if not block:
for el in element.xpath('./descendant-or-self::*/text()'):
out.append(el)
else:
for el in element.xpath(
'./*[not(descendant::{block}) and not(ancestor-or-self::{block})]/text()'.format(
block=block)
):
out.append(el)
return ', '.join(out)
def get_xpath(root: etree._ElementTree, xpath: str) -> str:
def get_xpath(root: etree._ElementTree, xpath: str, block: str) -> str:
"""Returns text attribute(s) of XML Elements returned by xpath query.
:param root: etree._ElementTree xml root
......@@ -96,7 +103,7 @@ def get_xpath(root: etree._ElementTree, xpath: str) -> str:
"""
namespaces = {'x':'http://www.ilsp.gr/META-XMLSchema'}
elements = root.xpath(xpath, namespaces=namespaces)
texts = [text_or_subtexts(x) for x in elements]
texts = [text_or_subtexts(x, block) for x in elements]
#return texts
return ', '.join(texts)
......@@ -129,7 +136,7 @@ def featured_rows(list_of_roots: list, labels: list) -> list:
for i in list_of_roots:
rida = {}
for t in labels:
rida[t['label_et']] = get_xpath(i, t['xpath'])
rida[t['label_et']] = get_xpath(i, t['xpath'], t['block'])
out.append(rida)
rida = {}
return out
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment