Commit d67b988d authored by Indrek Jentson's avatar Indrek Jentson
Browse files

Muudetud konfiguratsiooni vastavaks CI vajadustele.

parent 5712abc6
Pipeline #305 passed with stage
in 2 minutes and 13 seconds
image:
name: gcr.io/kaniko-project/executor:debug
entrypoint: [""]
stages:
- build
- deploy
build:
stage: build
only:
- master
script:
- mkdir -p /kaniko/.docker
- echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json
- echo CI_REGISTRY_IMAGE $CI_REGISTRY_IMAGE
- /kaniko/executor --context $CI_PROJECT_DIR --cache --dockerfile $CI_PROJECT_DIR/Dockerfile --destination $CI_REGISTRY_IMAGE:$CI_BUILD_REF --destination $CI_REGISTRY_IMAGE:latest
# DOCKERi skript Tokenizeri ehitamiseks
FROM debian:buster
# Kui Debianil pole vaikimisi kaasas GCC vajalikke teeke vms, siis tuleb need ka lasta installida.
RUN apt-get update && \
apt-get -y install curl sudo gnupg apt-utils && \
curl -sL https://deb.nodesource.com/setup_10.x | sudo bash - && \
apt-get -y install build-essential nodejs wget && \
apt-get -y install git && echo "Installed 1"
RUN npm install -g forever
RUN apt-get -y install g++ python3-dev python-dev libpython3-dev libpython-dev python3-pip python3-wheel python3-numpy swig && \
apt-get -y install git && echo "Installed 2"
# Tagame värskeima PIPi olemasolu
RUN wget https://bootstrap.pypa.io/get-pip.py && \
python get-pip.py && \
pip install -U pip
# Valmistame ette koodi, mida me DOCKERis kasutama hakkame. Siinkohal peaks plaanima, et mis failid kus kataloogis peaks olema
RUN git clone 'https://gitlab.keeleressursid.ee/keeleliin/keeleliin-wrapper.git' --single-branch wrapper && \
mkdir -p /config && mkdir -p /wrapper/files && mkdir -p /wrapper/tmp && mkdir -p /wrapper/logs && \
cd wrapper && npm install && \
echo "NPM is installed"
FROM registry.gitlab.keeleressursid.ee/keeleliin/keeleliin-service-base:estnltk
# Määratleme aktiivse töökataloogi
......@@ -35,21 +9,14 @@ WORKDIR /wrapper
# SIIA ALLA lisa kõik installeerimise käsud, mis on vajalikud sisulist tööd tegeva tarkvara X paigaldamiseks.
# Tarkvara X paigaldamine algab
COPY config_dist.js /wrapper/config.js
COPY config_dist.js /wrapper/config_dist.js
COPY tokenizer.py /wrapper
# tokenizer.py jaoks on vajalik estnltk teek
RUN pip install estnltk==1.4.1.1
# Tarkvara X paigaldamine lõppeb
# Expose port
EXPOSE 3003
VOLUME ["/config"]
VOLUME ["/wrapper/logs"]
VOLUME ["/wrapper/files"]
VOLUME ["/wrapper/tmp"]
VOLUME ["/config", "/wrapper/logs", "/wrapper/files", "/wrapper/tmp"]
CMD /wrapper/docker_start.sh
# Teenus KL-PY-Tokenizer
Käesolev teenus kasutab teeki ESTNLTK ja tagastab etteantava tekstifaili kohta
faili, milles ...
Käesolev teenus kasutab teeki ESTNLTK 1.6 ja tagastab etteantava tekstifaili kohta
json-faili, milles on nimistu tokenite kohta käivatest objektidest.
## Komponendi docker-compose'ga kasutamise ettevalmistamine (Linuxis)
* Eeldame, et serveris on juba olemas järgmised projektid:
......
NAME=$(grep -Po " id: '\K.*?(?=')" config_dist.js)
echo "Building image for service $NAME"
docker build --no-cache -t $NAME:latest .
\ No newline at end of file
PORT=$(grep -Po 'port: \K.*?(?=,)' config_dist.js)
NAME=$(grep -Po " id: '\K.*?(?=')" config_dist.js)
echo "Running service $NAME on port $PORT"
docker run --name kl_redis -d redis || true
mkdir -p /srv/kl/$NAME/config
mkdir -p /srv/kl/$NAME/logs
mkdir -p /srv/kl/$NAME/files
mkdir -p /srv/kl/$NAME/tmp
docker run --name $NAME -it -d --link kl_redis:redis \
-p $PORT:$PORT \
-v /srv/kl/$NAME/config:/config \
-v /srv/kl/$NAME/logs:/wrapper/logs \
-v /srv/kl/$NAME/files:/wrapper/files \
-v /srv/kl/$NAME/tmp:/wrapper/tmp \
$NAME
NAME=$(grep -Po " id: '\K.*?(?=')" config_dist.js)
echo "Stopping service $NAME"
docker stop $NAME
docker rm $NAME
PORT=$(grep -Po 'port: \K.*?(?=,)' config_dist.js)
NAME=$(grep -Po " id: '\K.*?(?=')" config_dist.js)
echo "Testing service $NAME on port $PORT"
echo "Service description"
curl -I http://127.0.0.1:$PORT > $NAME-test.txt
grep "HTTP/" $NAME-test.txt
echo "Service check"
curl -I http://127.0.0.1:$PORT/api/v1/check > $NAME-test.txt
grep "HTTP/" $NAME-test.txt
echo "Service statistics"
curl -I http://127.0.0.1:$PORT/api/v1/statistics > $NAME-test.txt
grep "HTTP/" $NAME-test.txt
echo "Service config"
curl -I http://127.0.0.1:$PORT/api/v1/service/config > $NAME-test.txt
grep "HTTP/" $NAME-test.txt
echo "Functional test"
cp README.MD $NAME-test-input.txt
curl --form content=@$NAME-test-input.txt http://127.0.0.1:$PORT/api/v1/service > $NAME-test.txt
echo ""
SID=$(grep -Po '"sessionId":"\K.*?(?=")' $NAME-test.txt)
status=$(grep -Po '"message":"\K.*?(?=")' $NAME-test.txt)
echo "Session $SID - $status"
rm $NAME-test.txt
rm $NAME-test-input.txt
#!/usr/bin/python
#!/usr/bin/python3
#coding: utf-8
import sys, getopt
......@@ -15,24 +15,29 @@ def json_repr(obj):
Return:
String that represent JSON-encoded object.
"""
def serialize(obj):
# Recursively walk object's hierarchy.
if isinstance(obj, (bool, int, long, float, basestring)):
def serialize(obj, l):
# Recursively walk object's hierarchy.
if isinstance(obj, (bool, int, float)):
return obj
elif isinstance(obj, dict):
elif isinstance(obj, str):
return obj
elif isinstance(obj, bytes):
return obj.decode("utf-8")
elif isinstance(obj, dict) and l > 0:
obj = obj.copy()
for key in obj:
obj[key] = serialize(obj[key])
obj[key] = serialize(obj[key], l-1)
return obj
elif isinstance(obj, list):
return [serialize(item) for item in obj]
elif isinstance(obj, tuple):
return tuple(serialize([item for item in obj]))
elif hasattr(obj, '__dict__'):
return serialize(obj.__dict__)
elif isinstance(obj, list) and l > 0:
return [serialize(item, l-1) for item in obj]
elif isinstance(obj, tuple) and l > 0:
return tuple(serialize([item for item in obj], l-1))
elif hasattr(obj, '__dict__') and l > 0:
return serialize(obj.__dict__, l-1)
else:
return repr(obj) # Don't know how to handle, convert to string
return json.dumps(serialize(obj), ensure_ascii=False)
return json.dumps(serialize(obj, 20))
def write_header(outfile, inputfile):
header = {'content':inputfile, 'coding':'utf-8', 'layer':'words',
......@@ -57,18 +62,18 @@ def write_mapping(outfile):
def main(argv):
inputfile = ''
outputfile = ''
helptext = 'Usage: python tokenizer.py -i <inputfile> -o <outputfile>'
helptext = 'Usage: python3 tokenizer.py -i <inputfile> -o <outputfile>'
if len(argv) == 0:
print helptext
print (helptext)
sys.exit()
try:
opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="])
except getopt.GetoptError:
print helptext
print (helptext)
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print helptext
print (helptext)
sys.exit()
elif opt in ("-i", "--ifile"):
inputfile = arg
......@@ -87,13 +92,13 @@ def main(argv):
outfile.write(', "data":[\n')
for line in infile:
doc = Text(line)
doc.tokenize_words()
doc.tag_layer()
tokens = doc['words']
last_end = 0
for t in tokens:
outfile.write(sep+'{"i":'+str(idx)+', "t":"'+t['text']+'", "s":'+str(t['start']+offset)+', "e":'+str(t['end']+offset)+'}\n')
outfile.write(sep+'{"i":'+str(idx)+', "t":"'+t.text+'", "s":'+str(t.start+offset)+', "e":'+str(t.end+offset)+'}\n')
idx += 1
last_end = t['end']
last_end = t.end
sep = ','
offset += last_end + 1
outfile.write(']\n}\n')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment