Commit c54b3555 authored by Neeme Kahusk's avatar Neeme Kahusk
Browse files

unique synset numbers

parent 3d582add
......@@ -25,7 +25,7 @@ if [ "$NO_OF_ORIGSYNSETS" -eq "$NO_OF_UNIQESYNSETS" ] ; then
else
echo "Synset numbers ARE NOT unique"
cat $1 |grep '^0'|sed -E 's/(0 @)([0-9]+)(@.+$)/\2/g'|sort|uniq > all.numbers
cat $1 |grep '^0'|sed -E 's/(0 @)([0-9]+)(@.+$)/\2/g'|sort|uniq -c|sort -nr|egrep '^ {3}2'|sed -E 's/^ {3}2 //g' > doubled.numbers
cat $1 |grep '^0'|sed -E 's/(0 @)([0-9]+)(@.+$)/\2/g'|sort|uniq -c|sort -nr|egrep '^ {3}[2-9]'|sed -E 's/^ {3}[2-9] //g' > doubled.numbers
# rm all.numbers
# rm doubled.numbers
......
......@@ -34,14 +34,27 @@ def read_raw(filename):
al = read_raw(ALL_NUMBERS_FN)
maximal = max(al)
doub = read_raw(DOUBLED_NUMBERS_FN)
taken = []
print al
print doub
# print al
# print doub
print len(al)
print len(doub)
# print len(al)
# print len(doub)
p = eurown.Parser(WN_FN)
s = p.parse_synset()
print s.polarisText
# s = p.parse_synset()
# print s.polarisText
sl = p.parse_wordnet()
for i in sl:
if i.number in doub:
if i.number in taken:
i.number = maximal + 1
maximal = i.number
taken.append(i.number)
print i.polarisText
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment