Commit e134b523 authored by Neeme Kahusk's avatar Neeme Kahusk
Browse files

making indexes

parent c54b3555
......@@ -21,8 +21,8 @@ EX_EXT=`echo $EXPORTFILE |sed -r "s/^[.]*[^.]+[.]([^.]+)$/\1/1"`
echo 'Do we have exportfile extension?'
echo $EX_EXT
# Do we have unique Synset numbers?
NO_OF_ORIGSYNSETS=( cat $1 |egrep '^0'|wc -l )
# all operations are done on NORMFILE!
# extensions of index files
......@@ -35,6 +35,35 @@ RELEXT='rix' # relations index file
ILIEXT='iix' # ili relations index file
ILAEXT='iax' # ili relations add on index file
# Do we have unique Synset numbers?
NO_OF_ORIGSYNSETS=$( cat $1 |egrep '^0'|sort|wc -l|bc -l )
# NO_OF_ORIGSYNSETS=$( printf "%0i" $NO_OF_ORIGSYNSETS )
echo "$NO_OF_ORIGSYNSETS"
NO_OF_UNIQESYNSETS=$( cat $1 |egrep '^0'|sort|sort -u|wc -l|bc -l )
# NO_OF_UNIQUESYNSETS=$(( $NO_OF_UNIQUESYNSETS*1 ))
echo "$NO_OF_UNIQESYNSETS"
if [ "$NO_OF_ORIGSYNSETS" -eq "$NO_OF_UNIQESYNSETS" ] ; then
echo "Synset numbers are unique"
else
echo "Synset numbers ARE NOT unique"
cat $1 |grep '^0'|sed -r 's/(0 @)([0-9]+)(@.+$)/\2/g'|sort|uniq > all.numbers
cat $1 |grep '^0'|sed -r 's/(0 @)([0-9]+)(@.+$)/\2/g'|sort|uniq -c|sort -nr|egrep '^ {3,6}[2-9]'|sed -r 's/^ {3,6}[2-9] //g' > doubled.numbers
cp $1 tempfile
python unique_synsets.py all.numbers doubled.numbers tempfile > $EXPORTFILE
# rm all.numbers
# rm doubled.numbers
fi
# echo "$NO_OF_ORIGSYNSETS"
# echo "$NO_OF_UNIQESYNSETS"
# exit
NORMFILE=`echo $EXPORTFILE|sed -r "s/(^[.]*[^.]+[.])($EX_EXT)$/\1$NORMEXT/1"`
# echo $NORMFILE
......
......@@ -14,3 +14,4 @@ import os
# PWN_PATH = '{}/WordNet-3.0/dict'.format(os.environ['HOME'])
PWN_PATH = '%s/Wordnet/WordNet-3.0/dict' % (os.environ['HOME'])
FIN_PATH = '%s/Wordnet/FinWN/fiwn-2.0/dict' % (os.environ['HOME'])
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment