Commit 348aae6d authored by Neeme Kahusk's avatar Neeme Kahusk

blokiga v.0.4.0

parent c017ecfd
0.3.4 0.4.0
\ No newline at end of file \ No newline at end of file
SIMPLE = ['ressursi nimed','doi'] SIMPLE = ['ressursi nimed','doi']
NAMES = ['eestikeelne ressursi nimi','ingliskeelne ressursi nimi','kontaktisiku asutus','doi'] #NAMES = ['eestikeelne ressursi nimi','ingliskeelne ressursi nimi','kontaktisiku asutus','doi']
NAMES_ATA = ['eestikeelne ressursi nimi','ingliskeelne ressursi nimi','kontaktisik asutuseta','doi'] NAMES_ATA = ['eestikeelne ressursi nimi','ingliskeelne ressursi nimi','kontaktisik asutuseta','doi']
NAMES = ['eestikeelne ressursi nimi','ingliskeelne ressursi nimi','kontaktisik asutuseta','kontaktisiku asutus','doi']
\ No newline at end of file
...@@ -15,7 +15,8 @@ from lxml import etree ...@@ -15,7 +15,8 @@ from lxml import etree
from prepare import read_xml from prepare import read_xml
from configure_columns import SIMPLE, NAMES, NAMES_ATA from configure_columns import SIMPLE, NAMES, NAMES_ATA
XPATHLIST = NAMES_ATA #XPATHLIST = NAMES_ATA
XPATHLIST = NAMES
class Name(object): class Name(object):
def __init__(self): def __init__(self):
...@@ -73,7 +74,7 @@ def make_filter(tabelifail: str = 'tulbad.csv') -> list: ...@@ -73,7 +74,7 @@ def make_filter(tabelifail: str = 'tulbad.csv') -> list:
return labels_et return labels_et
def text_or_subtexts(element: etree._Element): def text_or_subtexts(element: etree._Element, block: str = None) -> str:
out = [] out = []
if element.text: if element.text:
a = element.text a = element.text
...@@ -81,13 +82,19 @@ def text_or_subtexts(element: etree._Element): ...@@ -81,13 +82,19 @@ def text_or_subtexts(element: etree._Element):
a = 'http://doi.org/' + a a = 'http://doi.org/' + a
return a return a
else: else:
#for el in element.xpath('./descendant-or-self::*/text()'): if not block:
for el in element.xpath('./*[not(descendant::affiliation) and not(ancestor-or-self::affiliation)]/text()'): for el in element.xpath('./descendant-or-self::*/text()'):
out.append(el) out.append(el)
else:
for el in element.xpath(
'./*[not(descendant::{block}) and not(ancestor-or-self::{block})]/text()'.format(
block=block)
):
out.append(el)
return ', '.join(out) return ', '.join(out)
def get_xpath(root: etree._ElementTree, xpath: str) -> str: def get_xpath(root: etree._ElementTree, xpath: str, block: str) -> str:
"""Returns text attribute(s) of XML Elements returned by xpath query. """Returns text attribute(s) of XML Elements returned by xpath query.
:param root: etree._ElementTree xml root :param root: etree._ElementTree xml root
...@@ -96,7 +103,7 @@ def get_xpath(root: etree._ElementTree, xpath: str) -> str: ...@@ -96,7 +103,7 @@ def get_xpath(root: etree._ElementTree, xpath: str) -> str:
""" """
namespaces = {'x':'http://www.ilsp.gr/META-XMLSchema'} namespaces = {'x':'http://www.ilsp.gr/META-XMLSchema'}
elements = root.xpath(xpath, namespaces=namespaces) elements = root.xpath(xpath, namespaces=namespaces)
texts = [text_or_subtexts(x) for x in elements] texts = [text_or_subtexts(x, block) for x in elements]
#return texts #return texts
return ', '.join(texts) return ', '.join(texts)
...@@ -129,7 +136,7 @@ def featured_rows(list_of_roots: list, labels: list) -> list: ...@@ -129,7 +136,7 @@ def featured_rows(list_of_roots: list, labels: list) -> list:
for i in list_of_roots: for i in list_of_roots:
rida = {} rida = {}
for t in labels: for t in labels:
rida[t['label_et']] = get_xpath(i, t['xpath']) rida[t['label_et']] = get_xpath(i, t['xpath'], t['block'])
out.append(rida) out.append(rida)
rida = {} rida = {}
return out return out
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment