Commit 33a0dd59 authored by Indrek Jentson's avatar Indrek Jentson

Lisatud EstCG Perli skriptide parandused

parent ad82ab1c
......@@ -43,11 +43,12 @@ RUN cd /wrapper/vabamorf && \
JUUR=$(pwd) && \
cd ${JUUR}/dct/sh && \
chmod +x *.sh && \
./nullist-uus-sonastik.sh
RUN cd /wrapper && \
git clone https://github.com/EstSyntax/EstCG.git && \
cp EstCG/tmorftrtabel.txt .
./nullist-uus-sonastik.sh && \
cd /wrapper && \
git clone 'https://github.com/EstSyntax/EstCG.git' && \
cp EstCGFix/json2mrf.pl EstCG/json2mrf.pl && \
cp EstCGFix/rtolkija.pl EstCG/rtolkija.pl && \
apt-get -y install nano
# Tarkvara X paigaldamine lõppeb
......
#!/usr/bin/perl
use utf8;
use open qw(:std :utf8);
my $s = 0;
my $word = "";
my $read = "";
my $reads = "";
my $ending = "";
my $form = "";
my $pos = "";
my $lemma = "";
while(<>){
#s/[\\]n/\n/g; #vabamorfi bugi
chomp;
if (/^\t\t\t\t\{/){ # sentence
$s = 1;
print "<s>\n";
}
if (/^\t\t\t\t\}/){ # sentence
$s = 0;
print "</s>\n";
}
if ($s == 1) {
if (/^\t\t\t\t\t\t\t"text":/) { # 7 "text": "Kes"
s/^\t\t\t\t\t\t\t"text": "(.*)"$/$1/g;
$word = $_;
if ($reads eq "") {
if ($word eq '\"' or $word eq '\\"' or $word eq '\\' or $word eq '\\\\"') {
print "\"\n \"\"\" //_Z_ //\n";
}
else {
print $word."\n ".$word." //_Z_ //\n";
}
}
else {
print $word."\n".$reads; # word and readings
}
$word = "";
$reads = "";
next;
}
if (/^\t\t\t\t\t\t\t\t\{/) { # 8 { reading starts
$gi = "";
$ending = "";
$form = "";
$pos = "";
$lemma = "";
next;
}
if (/^\t\t\t\t\t\t\t\t\}/) { # 8 { reading ends
if ($pos eq "Z") {
$read = " ".$lemma." //_".$pos."_ //\n";
}
else {
$read = " ".$lemma."+".$ending.$gi." //_".$pos."_ ".$form." //\n";
}
$reads = $reads.$read;
next;
}
if (/^\t\t\t\t\t\t\t\t\t"clitic":/) { # 9 "clitic": "",
s/^\t\t\t\t\t\t\t\t\t"clitic": "(.*)",/$1/g;
$gi = $_;
next;
}
if (/^\t\t\t\t\t\t\t\t\t"ending":/) { # 9 "ending": "",
s/^\t\t\t\t\t\t\t\t\t"ending": "(.*)",/$1/g;
$ending = $_;
next;
}
if (/^\t\t\t\t\t\t\t\t\t"form":/) { # 9 "form": "",
s/^\t\t\t\t\t\t\t\t\t"form": "(.*)",/$1/g;
$form = $_;
next;
}
if (/^\t\t\t\t\t\t\t\t\t\"partofspeech\":/) { # 9 "partofspeech": "",
s/^\t\t\t\t\t\t\t\t\t\"partofspeech\": "(.*)",/$1/g;
$pos = $_;
next;
}
if (/^\t\t\t\t\t\t\t\t\t"root":/) { # 9 "root": "",
s/^\t\t\t\t\t\t\t\t\t"root": "(.*)"/$1/g;
$lemma = $_;
}
}
}
#!/usr/bin/perl -w
use utf8;
use open qw(:std :utf8);
open(TABLE,"< /wrapper/EstCG/tmorftrtabel.txt")
or die "rtolkija viga, näita tmorftrtabel rada: $!\n";
while(<TABLE>){
if (/^[^¤]+/) {
chomp;
@r = split(/@/);
push(@tabel,[@r]);
}
}
close(TABLE);
#for ($i=0;$i<$#tabel;$i++){
# print $tabel[$i][1],"\n";
#}
$lipp=0;
my $cap=0;
while(<>){
chomp;
if (/^[^ ]+/) { #sõnavormirida
s/^\\"/"/g;
print;
print "\n";
if (/^[A-ZÕÄÖÜŽŠ]+/) {
$cap=1;
} else {
$cap=0;
}
next;
}
$tolgendus=$_;
$tolgendus=~s/…([\+0]*) \/\/_Z_ \/\//…$1 \/\/_Z_ Ell \/\//;
$tolgendus=~s/\.\.\.([\+0]*) \/\/_Z_ \/\//\.\.\.$1 \/\/_Z_ Ell \/\//;
$tolgendus=~s/\.\.([\+0]*) \/\/_Z_ \/\//\.\.$1 \/\/_Z_ Els \/\//;
$tolgendus=~s/\.([\+0]*) \/\/_Z_ \/\//\.$1 \/\/_Z_ Fst \/\//;
$tolgendus=~s/,([\+0]*) \/\/_Z_ \/\//,$1 \/\/_Z_ Com \/\//;
$tolgendus=~s/:([\+0]*) \/\/_Z_ \/\//:$1 \/\/_Z_ Col \/\//;
$tolgendus=~s/;([\+0]*) \/\/_Z_ \/\//;$1 \/\/_Z_ Scl \/\//;
$tolgendus=~s/\?([\+0]*) \/\/_Z_ \/\//\?$1 \/\/_Z_ Int \/\//;
$tolgendus=~s/\!([\+0]*) \/\/_Z_ \/\//\!$1 \/\/_Z_ Exc \/\//;
$tolgendus=~s/--([\+0]*) \/\/_Z_ \/\//--$1 \/\/_Z_ Dsd \/\//;
$tolgendus=~s/-([\+0]*) \/\/_Z_ \/\//-$1 \/\/_Z_ Dsh \/\//;
$tolgendus=~s/\(([\+0]*) \/\/_Z_ \/\//\($1 \/\/_Z_ Opr \/\//;
$tolgendus=~s/\)([\+0]*) \/\/_Z_ \/\//\)$1 \/\/_Z_ Cpr \/\//;
$tolgendus=~s:\\"\s+//_Z_ //:" //_Z_ Quo //:g;
$tolgendus=~s/«([\+0]*) \/\/_Z_ \/\//«$1 \/\/_Z_ Oqu \/\//;
$tolgendus=~s/»([\+0]*) \/\/_Z_ \/\//»$1 \/\/_Z_ Cqu \/\//;
$tolgendus=~s/“([\+0]*) \/\/_Z_ \/\//“$1 \/\/_Z_ Oqu \/\//; #E2 80 9C
$tolgendus=~s/”([\+0]*) \/\/_Z_ \/\//”$1 \/\/_Z_ Cqu \/\//; #E2 80 9D
$tolgendus=~s/<([\+0]*) \/\/_Z_ \/\//<$1 \/\/_Z_ Grt \/\//;
$tolgendus=~s/>([\+0]*) \/\/_Z_ \/\//>$1 \/\/_Z_ Sml \/\//;
$tolgendus=~s/\[([\+0]*) \/\/_Z_ \/\//\[$1 \/\/_Z_ Osq \/\//;
$tolgendus=~s/\]([\+0]*) \/\/_Z_ \/\//\]$1 \/\/_Z_ Csq \/\//;
$tolgendus=~s/\/([\+0]*) \/\/_Z_ \/\//\/$1 \/\/_Z_ Sla \/\//;
$tolgendus=~s/\=([\+0]*) \/\/_Z_ \/\//\= \/\/_Z_ \/\//;
$tolgendus=~s/\+([\+0]*) \/\/_Z_ \/\//\+ \/\/_Z_ \/\//;
$tolgendus=~s/\&[\+0]* \/\/_\S_ .*$/& \/\/_Y_ \/\//;
$tolgendus=~s/\%[\+0]* \/\/_\S_ .*/% \/\/_Y_ \/\//;
if ($tolgendus =~ /_Z_/) { print $tolgendus,"\n"; next; }
#if ($tolgendus =~ /(.*)\s+\/\/(_._) (.*)\/\/(.*)/){
if ($tolgendus =~ /(.*)\s+\/\/(_._) (.*)\/\//){
$root=$1 ;
$pos=$2 ;
@inf=split(/,/,$3);
#$eki=$4 ;
#print $1 ,"X" ,$2 ,"X", $3 ,"X" ,$4 ,"X";
#print ">",@inf,"<";
foreach $m (@inf){
#print "\n=",$m;
$m =~ s/\s+/ /g;
$m =~ s/^\s+//g;
next if ($m=~/^\s*$/);
$morf=$pos." ".$m;
$morf=~s/(.*)\s+$/$1/g;
$j=0;$lipp=0;
foreach $rida (@tabel){
if ($morf eq $rida->[1]) {
$m2=$morf;
$morf=~ s/$rida->[1]$/$rida->[3]/;
$morf=~ s/$rida->[1] \?$/$rida->[3]/;
# oli: $morf=~ s/$rida->[1]$/$rida->[3]/;
# ei teisendanud ?-ga lõppevaid ridu, nt "_N_ ?"
$morf=~s/ \?/ \#?/;
if ($cap) {
$morf .= " cap";
}
print $root." //".$morf." //\n";
$morf=$m2; #last;
$lipp++;
}
$j++;
}
if ($lipp==0) {
if ($cap) {
$morf .= " cap";
}
print $root." //".$morf." //\n"; $lipp=0;
}
}
if ($3=~/^\s*$/) {
$morf=$pos;
foreach $rida (@tabel){
if ($morf =~ /$rida->[1]/){
$m2=$morf;
# oli: $morf=~ s/$rida->[1]$/$rida->[3]/;
$morf=~ s/$rida->[1]/$rida->[3]/;
$morf=~s/ \?/ \#?/;
#print "$root"; print "$morf"; print "2";
if ($cap) {
$morf .= " cap";
}
print $root." //".$morf." //\n";
$morf=$m2; }
}
}
# $tolgendus=" ".$root." //".$morf." //";
}
else { print $tolgendus,"\n"; }
#{print $tolgendus,"\n"; }
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment