estnltk.database.database module
.. automodule:: estnltk.database.database
estnltk.grammar module
estnltk.grammar.grammar module
.. automodule:: estnltk.grammar.grammar
estnltk.grammar.match module
.. automodule:: estnltk.grammar.match
estnltk.prettyprinter.prettyprinter module
.. automodule:: estnltk.prettyprinter.prettyprinter
......@@ -143,8 +143,11 @@ API reference
......@@ -49,6 +49,10 @@ def prepare_text(text):
class Database(object):
"""Database class represents a single index in Elastic
and helps with inserting and querying Estnltk documents.
def __init__(self, index, doc_type='document', **kwargs):
self.__es = Elasticsearch(maxKeepAliveTime=0, timeout=30, **kwargs)
self.__index = index
......@@ -19,6 +19,7 @@ class Symbol(object):
def name(self):
"""The name of the symbol. If there is no name, this is None"""
return self.__name
def annotate(self, text, conflict_resolver=resolve_using_maximal_coverage):
......@@ -82,6 +83,7 @@ class IRegex(Regex):
class Lemmas(Symbol):
"""Symbol that matches a list of lemmas."""
def __init__(self, *lemmas, **kwargs):
super(Lemmas, self).__init__(kwargs.get('name'))
......@@ -108,6 +110,7 @@ class Lemmas(Symbol):
class Postags(Symbol):
"""Symbol that matches a list of part-of-speech tags."""
def __init__(self, *postags, **kwargs):
super(Postags, self).__init__(kwargs.get('name'))
......@@ -134,6 +137,7 @@ class Postags(Symbol):
class Suffix(Symbol):
"""Symbol that matches word suffixes."""
def __init__(self, suffix, **kwargs):
super(Suffix, self).__init__(kwargs.get('name'))
......@@ -155,6 +159,7 @@ class Suffix(Symbol):
class Layer(Symbol):
"""Symbol that matches elements of given layer."""
def __init__(self, layer_name, **kwargs):
super(Layer, self).__init__(kwargs.get('name'))
......@@ -169,6 +174,7 @@ class Layer(Symbol):
class LayerRegex(Symbol):
"""Symbol that matches regular expressions on texts of the given layer."""
def __init__(self, layer_name, regex, **kwargs):
super(LayerRegex, self).__init__(kwargs.get('name'))
......@@ -195,6 +201,7 @@ class LayerRegex(Symbol):
class Union(Symbol):
"""Symbol that unions two other symbols."""
def __init__(self, *symbols, **kwargs):
super(Union, self).__init__(kwargs.get('name'))
......@@ -214,6 +221,7 @@ class Union(Symbol):
class Intersection(Symbol):
"""Symbol that intersects two different symbols."""
def __init__(self, *symbols, **kwargs):
super(Intersection, self).__init__(kwargs.get('name'))
......@@ -248,6 +256,7 @@ def concat(matches_a, matches_b, text, name=None):
class Concatenation(Symbol):
"""Concatenate symbols."""
def __init__(self, *symbols, **kwargs):
......@@ -289,6 +298,7 @@ def allgaps(matches_a, matches_b, text, name=None):
class AllGaps(Symbol):
"""Concatenate symbols, but allow gaps of any size between the symbols."""
def __init__(self, *symbols, **kwargs):
super(AllGaps, self).__init__(kwargs.get('name'))
......@@ -12,6 +12,7 @@ TEXT = 'text'
class Match(dict):
"""Match of a grammar symbol."""
def __init__(self, start, end, text, name=None):
super(Match, self).__init__()
......@@ -26,26 +27,32 @@ class Match(dict):
def name(self):
"""The name of the match."""
return self.get(NAME, None)
def start(self):
"""The start position of the match."""
return self[START]
def end(self):
"""The end position of the match."""
return self[END]
def text(self):
"""Matched text."""
return self[TEXT]
def matches(self):
"""Matches of child symbols."""
return self[MATCHES]
def dict(self):
"""Dictionary representing this match and all child symbol matches."""
res = copy(self)
if MATCHES in res:
del res[MATCHES]
......@@ -107,7 +107,11 @@ class PrettyPrinter(object):
def css(self):
"""Get the CSS of the PrettyPrinter."""
The CSS.
css_list = []
for aes in self.aesthetics:
css_list.extend(get_mark_css(aes, self.values[aes]))
......@@ -115,6 +119,19 @@ class PrettyPrinter(object):
return '\n'.join(css_list)
def render(self, text, add_header=False):
"""Render the HTML.
add_header: boolean (default: False)
If True, add HTML5 header and footer.
The rendered HTML.
html = mark_text(text, self.aesthetics, self.rules)
html = html.replace('\n', '<br/>')
if add_header:
