Commit 232348c1 authored by osboxes.org's avatar osboxes.org
Browse files

korpuse teisendused

parent 5f43d5b4
python3 wrapSH.py -s ./sh/cg.sh -r ./korpus.cg
#!/bin/sh
ELEMENT_ID=$1
ID=$1
SENTENCE=$2
echo "$SENTENCE"
\ No newline at end of file
RADA=/home/osboxes/bin/EstCG-master
RADAMRF=~/bin/vabamorf-master/apps/cmdline/project/unix
RADADCT=~/bin/vabamorf-master/dct/binary/
echo $SENTENCE \
| $RADA/rlausestaja.pl \
| $RADA/wr2json.pl \
| $RADAMRF/etana analyze -lex $RADADCT/et.dct -guess \
| sed -e "s/[\\]n/\n/g" | $RADA/json2mrf.pl \
| $RADA/rtolkija.pl \
| $RADA/pron17.pl \
| $RADA/tcopyrem_kaili.pl \
| $RADA/tkms2cg3.pl \
| vislcg3 -g $RADA/preprocess.rul\
| vislcg3 -o -g $RADA/clo.rul \
| vislcg3 -o -g $RADA/morfyhe17.rul \
| vislcg3 -o -g $RADA/PhVerbs17.rul \
| vislcg3 -o -g $RADA/pindsyn17.rul \
| vislcg3 -o -g $RADA/strukt.rul \
| perl ./util/cgAddId.pl $ID
#!/bin/sh
ELEMENT_ID=$1
SENTENCE=$2
echo "$SENTENCE"
\ No newline at end of file
#!/bin/perl
# argument [0] on ID
#std.in sisendist loeme teksti
#asendame kõik <s> -> <s id="ID_0"> <s id="ID_1"> jne
#<s id="ID_0"> asendame <s id="ID">
use utf8;
use strict;
binmode(STDOUT, ":utf8");
binmode(STDIN, ":utf8");
our $ID = $ARGV[0];
our $text = '';
while (<STDIN>) {
$text .= $_;
}
our $altered_text = '';
our $i = 0;
while ($altered_text ne $text) {
$altered_text = $text;
my $subst = '<s id="'.$ID.'_'.$i.'">';
if ($i==0)
{
$subst = '<s id="'.$ID.'">';
}
$text =~ s/<s>/$subst/;
$i++;
}
print STDOUT $text;
......@@ -85,7 +85,7 @@ for row in tsv_file.read().split('\n'):
arr = row.split('\t')
if len(arr)>1:
if len(arr[0]) and len(arr[1]):
print ([options['sh_script'], '"%s"'%arr[0] , '"%s"'%arr[1]] )
subprocess.run([options['sh_script'], '"%s"'%arr[0] , '"%s"'%arr[1]], stdout=outfile)
print ([options['sh_script'], '%s'%arr[0] , '%s'%arr[1]] )
subprocess.run([options['sh_script'], '%s'%arr[0] , '%s'%arr[1]], stdout=outfile)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment