Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Krista Liin
TÜveakorpus
Commits
2a4609bc
Commit
2a4609bc
authored
Sep 13, 2019
by
rabauti
Browse files
nimi muutunud
parent
63cee48b
Changes
1
Hide whitespace changes
Inline
Side-by-side
vealiigitus/morfDiff.py
deleted
100644 → 0
View file @
63cee48b
#/usr/bin/python
# -*- coding: utf-8 -*-
import
os
import
sys
import
re
import
difflib
import
copy
from
difflib
import
Differ
from
estnltk
import
Text
originals
=
{}
corrections
=
{}
script_name
=
(
os
.
path
.
realpath
(
__file__
))
script_dir
=
os
.
path
.
dirname
(
script_name
)
filename
=
script_dir
+
'/../korpus_tsv/korpus.tsv'
outdir
=
script_dir
+
'/morf'
file_input
=
open
(
filename
,
'r'
)
for
line
in
file_input
:
line
=
line
.
rstrip
()
line_arr
=
line
.
split
(
'
\t
'
)
if
not
len
(
line_arr
)
==
2
:
print
(
'ERROR :'
,
line
)
exit
(
1
)
uid
=
line_arr
[
0
]
text
=
line_arr
[
1
]
uid_arr
=
uid
.
split
(
'_'
)
uid_ending
=
uid_arr
.
pop
()
uid2
=
'_'
.
join
(
uid_arr
)
el_type
=
uid_ending
[
0
]
if
el_type
==
'a'
:
originals
[
uid2
]
=
{
'id'
:
uid
,
'text'
:
text
}
if
el_type
==
'p'
:
if
not
uid2
in
corrections
:
corrections
[
uid2
]
=
[]
corrections
[
uid2
].
append
({
'id'
:
uid
,
'text'
:
text
})
d
=
difflib
.
HtmlDiff
(
tabsize
=
4
,
wrapcolumn
=
40
)
for
uid
in
corrections
.
keys
():
for
(
i
,
correction
)
in
enumerate
(
corrections
[
uid
]):
filename2
=
'%s/%s.html'
%
(
outdir
,
corrections
[
uid
][
i
][
'id'
])
#correction = corrections[uid][i]
#print (originals[uid])
#print (correction)
finished
=
0
text1_copy
=
Text
(
originals
[
uid
][
'text'
])
text2_copy
=
Text
(
correction
[
'text'
])
text1_lemmas
=
text1_copy
.
lemmas
text2_lemmas
=
text2_copy
.
lemmas
text1_word_texts
=
text1_copy
.
word_texts
text2_word_texts
=
text2_copy
.
word_texts
text1_postags
=
text1_copy
.
postags
text2_postags
=
text2_copy
.
postags
text1_forms
=
text1_copy
.
forms
text2_forms
=
text2_copy
.
forms
html_diff_result
=
d
.
make_table
(
"
\n
"
.
join
(
text1_word_texts
).
splitlines
(
1
),
"
\n
"
.
join
(
text2_word_texts
).
splitlines
(
1
))
text1_tsv_rows
=
[]
for
(
ind
,
word
)
in
enumerate
(
text1_word_texts
):
text1_tsv_rows
.
append
(
"
\t
"
.
join
([
text1_word_texts
[
ind
],
text1_lemmas
[
ind
],
text1_postags
[
ind
],
text1_forms
[
ind
]
]))
text2_tsv_rows
=
[]
for
(
ind
,
word
)
in
enumerate
(
text2_word_texts
):
text2_tsv_rows
.
append
(
"
\t
"
.
join
([
text2_word_texts
[
ind
],
text2_lemmas
[
ind
],
text2_postags
[
ind
],
text2_forms
[
ind
]]))
text1_tsv
=
''
html_diff_result
+=
d
.
make_file
(
"
\n
"
.
join
(
text1_tsv_rows
).
splitlines
(
1
),
"
\n
"
.
join
(
text2_tsv_rows
).
splitlines
(
1
))
#html_diff_result += d.make_table("\n".join(text1_lemmas).splitlines(1), "\n".join(text2_lemmas).splitlines(1))
#html_diff_result += d.make_table("\n".join(text1_postags).splitlines(1), "\n".join(text2_postags).splitlines(1))
#html_diff_result += d.make_table("\n".join(text1_forms).splitlines(1), "\n".join(text2_forms).splitlines(1))
file_out
=
open
(
filename2
,
'w'
)
file_out
.
write
(
html_diff_result
)
#file_out.write(html_diff_result)
#file_out.write('</body>')
#file_out.write('</html>')
file_out
.
close
()
#print ( ''.join(unified_diff_result))
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment