Commit 2382bf8d authored by Timo P's avatar Timo P
Browse files

added wheel files

parent 70bfff3c
......@@ -50,13 +50,40 @@ We assume you have Python3.4 installed and run a 64-bit operating system.
Head out to Christoph Gohlke's website: http://www.lfd.uci.edu/~gohlke/pythonlibs/ and download following files:
numpy1.10.0b1+mklcp34nonewin_amd64.whl
numpy-1.10.0b1+mkl-cp34-none-win_amd64.whl
Also download Pycrfsuite from https://github.com/estnltk/estnltk/blob/version1.3/dist/python-crfsuite/python_crfsuite-0.8.3-cp34-none-win_amd64.whl .
Install the dependencies::
python.exe -m pip install "numpy-1.10.0b1+mkl-cp34-none-win_amd64.whl"
python.exe -m pip install python_crfsuite-0.8.3-cp34-none-win_amd64.whl
Now, download the Estnltk Wheel file:
As a first test, try to run this line of code in your terminal::
python.exe -c "import estnltk; print( estnltk.Text('Tere estnltk').lemmas )"
It should print::
[nltk_data] Downloading package punkt to /home/user/nltk_data...
[nltk_data] Unzipping tokenizers/punkt.zip.
['tere', 'estnltk']
You see that NLTK data is being dowloaded on first use of the library.
Then, run the unittest suite::
python3 -m estnltk.run_tests
This should report the number of tests run and the status. If it is "OK", then you are good to go::
Ran 157 tests in 35.207s
OK
Full list of dependencies
-------------------------
=========================
**Python with development headers.** https://www.python.org/ .
The most obvious dependency of course is Python itself.
......@@ -119,6 +146,7 @@ that contains an marvellous list of pre-built binaries, including the ones requi
* **cached-property (version 1.2.0)**
* **beautifulsoup4 (version 4.4.0)**
* **elasticsearch (1.6.0)**
* **html5lib (0.9999999)**
Building
......@@ -126,7 +154,7 @@ Building
Open Visual Studio SDK terminal and type::
python3.4 -m pip install estnltk
python3.4 -m pip build estnltk
Running the tests
......@@ -134,7 +162,7 @@ Running the tests
After you have installed the library, you should run the unit tests::
python -m unittest discover estnltk.tests
python -m estnltk.run_tests
Note that when you built directly from cloned Estnltk repository, navigate away from it as
running the command in the same directory can cause problems.
......@@ -161,23 +189,6 @@ Note that the same commands work when building in Windows, but you need to execu
If you want to set up estnltk for development, see :ref:`developer_guide`.
Windows installers
==================
You can use pre-built windows installers for Estnltk.
Note that you still need to install the dependencies separately.
32-bit:
* https://github.com/estnltk/estnltk/blob/master/dist/estnltk-1.2.win32-py2.7.msi
* https://github.com/estnltk/estnltk/blob/master/dist/estnltk-1.2.win32-py3.4.msi
64-bit:
* https://github.com/estnltk/estnltk/blob/master/dist/estnltk-1.2.win-amd64-py2.7.msi
* https://github.com/estnltk/estnltk/blob/master/dist/estnltk-1.2.win-amd64-py3.4.msi
Post-installation steps
=======================
......
# -*- coding: utf-8 -*-
"""Functionality for using Java-based components.
Attributes
----------
JAVARES_PATH: str
The root path for Java components of Estnltk library.
"""
from __future__ import unicode_literals, print_function
from estnltk.core import PACKAGE_PATH, as_unicode, as_binary
import subprocess
import os
JAVARES_PATH = os.path.join(PACKAGE_PATH, 'java-res')
class JavaProcess(object):
"""Base class for Java-based components.
It opens a pipe to a Java VM running the component and interacts with
it using standard input and standard output.
The data is encoded as a single line and then flushed down the pipe.
The Java component receives the input, processes it and writes the
output also encoded on a single line and flushes it.
This line-based approach is easy to implement and debug.
To implement a Java component, inherit from this class and use
`process_line` method to interact with the process.
It deals with input/output and errors.
"""
def __init__(self, runnable_jar, args=[]):
"""Initialize a Java VM.
Parameters
----------
runnable_jar: str
Path of the JAR file to be run. The java program is expected
to reside in `java-res` folder of the estnltk project.
args: list of str
The list of arguments given to the Java program.
"""
self._process = subprocess.Popen(['java', '-jar', os.path.join(JAVARES_PATH, runnable_jar)] + args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
def process_line(self, line):
"""Process a line of data.
Sends the data through the pipe to the process and flush it. Reads a resulting line
and returns it.
Parameters
----------
line: str
The data sent to process. Make sure it does not contain any newline characters.
Returns
-------
str: The line returned by the Java process
Raises
------
Exception
In case of EOF is encountered.
IoError
In case it was impossible to read or write from the subprocess standard input / output.
"""
assert isinstance(line, str)
try:
self._process.stdin.write(as_binary(line))
self._process.stdin.write(as_binary('\n'))
self._process.stdin.flush()
result = as_unicode(self._process.stdout.readline())
if result == '':
stderr = as_unicode(self._process.stderr.read())
raise Exception('EOF encountered while reading stream. Stderr is {0}.'.format(stderr))
return result
except Exception:
self._process.terminate()
raise
# -*- coding: utf-8 -*-
"""Functionality for using Java-based components.
Attributes
----------
JAVARES_PATH: str
The root path for Java components of Estnltk library.
"""
from __future__ import unicode_literals, print_function
from estnltk.core import PACKAGE_PATH, as_unicode, as_binary
import subprocess
import os
JAVARES_PATH = os.path.join(PACKAGE_PATH, 'java-res')
class JavaProcess(object):
"""Base class for Java-based components.
It opens a pipe to a Java VM running the component and interacts with
it using standard input and standard output.
The data is encoded as a single line and then flushed down the pipe.
The Java component receives the input, processes it and writes the
output also encoded on a single line and flushes it.
This line-based approach is easy to implement and debug.
To implement a Java component, inherit from this class and use
`process_line` method to interact with the process.
It deals with input/output and errors.
"""
def __init__(self, runnable_jar, args=[]):
"""Initialize a Java VM.
Parameters
----------
runnable_jar: str
Path of the JAR file to be run. The java program is expected
to reside in `java-res` folder of the estnltk project.
args: list of str
The list of arguments given to the Java program.
"""
self._process = subprocess.Popen(['java', '-jar', os.path.join(JAVARES_PATH, runnable_jar)] + args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
def process_line(self, line):
"""Process a line of data.
Sends the data through the pipe to the process and flush it. Reads a resulting line
and returns it.
Parameters
----------
line: str
The data sent to process. Make sure it does not contain any newline characters.
Returns
-------
str: The line returned by the Java process
Raises
------
Exception
In case of EOF is encountered.
IoError
In case it was impossible to read or write from the subprocess standard input / output.
"""
assert isinstance(line, str)
try:
self._process.stdin.write(as_binary(line))
self._process.stdin.write(as_binary('\n'))
self._process.stdin.flush()
result = as_unicode(self._process.stdout.readline())
if result == '':
stderr = as_unicode(self._process.stderr.read())
raise Exception('EOF encountered while reading stream. Stderr is {0}.'.format(stderr))
return result
except Exception:
self._process.terminate()
raise
......@@ -73,6 +73,7 @@ setup(
'python-crfsuite==0.8.3', # Conditional random fields library
'cached-property==1.2.0', # Simple property for caching results
'beautifulsoup4==4.4.0', # HTML parsing library
'html5lib==0.9999999', # HTML parsing library
'elasticsearch==1.6.0' # database support
],
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment