Commit 3d582add authored by Neeme Kahusk's avatar Neeme Kahusk
Browse files

unique synset numbers

parent b1121a1d
eq_be_in_state
eq_causes
eq_generalization
eq_has_holonym
eq_has_hyperonym
eq_has_hyponym
eq_has_meronym
eq_has_subevent
eq_involved
eq_is_caused_by
eq_is_state_of
eq_is_subevent_of
eq_metonym
eq_near_synonym
eq_role
eq_synonym
This diff is collapsed.
......@@ -21,6 +21,9 @@ EX_EXT=`echo $EXPORTFILE |sed -r "s/^[.]*[^.]+[.]([^.]+)$/\1/1"`
echo 'Do we have exportfile extension?'
echo $EX_EXT
# Do we have unique Synset numbers?
NO_OF_ORIGSYNSETS=( cat $1 |egrep '^0'|wc -l )
# all operations are done on NORMFILE!
# extensions of index files
NORMEXT='norm' # normfile
......
......@@ -26,6 +26,9 @@ else
echo "Synset numbers ARE NOT unique"
cat $1 |grep '^0'|sed -E 's/(0 @)([0-9]+)(@.+$)/\2/g'|sort|uniq > all.numbers
cat $1 |grep '^0'|sed -E 's/(0 @)([0-9]+)(@.+$)/\2/g'|sort|uniq -c|sort -nr|egrep '^ {3}2'|sed -E 's/^ {3}2 //g' > doubled.numbers
# rm all.numbers
# rm doubled.numbers
fi
echo "$NO_OF_ORIGSYNSETS"
echo "$NO_OF_UNIQESYNSETS"
......
antonym
be_in_state
belongs_to_class
causes
fuzzynym
has_holo_location
has_holo_madeof
has_holo_member
has_holo_part
has_holo_portion
has_holonym
has_hyperonym
has_hyponym
has_instance
has_mero_location
has_mero_madeof
has_mero_member
has_mero_part
has_mero_portion
has_meronym
has_subevent
has_xpos_hyperonym
has_xpos_hyponym
involved
involved_agent
involved_instrument
involved_location
involved_patient
involved_target_direction
is_caused_by
is_subevent_of
near_antonym
near_synonym
role
role_agent
role_instrument
role_location
role_patient
role_target_direction
state_of
xpos_fuzzynym
xpos_near_antonym
xpos_near_synonym
......@@ -3,8 +3,9 @@
__author__ = 'Neeme Kahusk <neeme.kahusk@ut.ee>'
__version__ = '1.0'
__date__ = 'T nov 20 15:24:18 EET 2012'
__date__ = 'Thu Jun 25 22:59:28 EEST 2015'
import eurown
from optparse import OptionParser
......@@ -18,53 +19,29 @@ parser = OptionParser(usage="%prog [options]",
(options, args) = parser.parse_args()
if len(args) != 2:
if len(args) != 3:
parser.error("incorrect number of arguments")
RAW_INDEX_FN = args[0]
LITERAL_INDEX_FN = args[-1]
ALL_NUMBERS_FN = args[0]
DOUBLED_NUMBERS_FN = args[1]
WN_FN = args[2]
def read_raw_index(filename):
def read_raw(filename):
f = open(filename)
lines = map(lambda x: x.strip().split(':'),f.readlines())
lines = map(lambda x: int(x.strip()), f.readlines())
f.close()
return lines
def make_literal_index(iList):
"""iList should be list of lists with 2 members
"""
oDict = dict()
for i in iList:
# print i
try:
if oDict and i[0] in oDict:
oDict[i[0]].append(i[1])
else:
oDict.update({i[0]:[i[1]]})
except KeyError:
print 'võtme viga',i
return oDict
ri = read_raw_index(RAW_INDEX_FN)
def write_index(iDict,filename):
"""Write index (dict) to file
"""
f = open(filename,'a')
for i in iDict:
try:
oStr = u'%s:%s\n' % (i.decode('utf8'),
u' '.join(iDict[i])
)
except UnicodeDecodeError:
print iDict[i]
f.write(oStr.encode('utf-8'))
f.close()
litindex = make_literal_index(ri)
al = read_raw(ALL_NUMBERS_FN)
doub = read_raw(DOUBLED_NUMBERS_FN)
print al
print doub
print len(al)
print len(doub)
write_index(litindex,LITERAL_INDEX_FN)
p = eurown.Parser(WN_FN)
s = p.parse_synset()
print s.polarisText
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment