Commit d09b174a authored by Neeme Kahusk's avatar Neeme Kahusk
Browse files

read all synsets

parent 9505b464
......@@ -168,13 +168,13 @@ class Synset(object):
if lst[start]['level'] > 0:
start = find_parent(start, 0, lst)
iterator = iter(lst[start:])
a = iterator.__next__()
a = next(iterator, None)
if 'drn' in a:
self.number = a['drn']
self.pos = iterator.__next__()['value']
a = iterator.__next__()
self.pos = next(iterator, None)['value']
a = next(iterator, None)
if a['field'] == 'VARIANTS':
a = iterator.__next__()
a = next(iterator, None)
while a['level'] > 1:
if (a['level'],a['field']) == (2,'LITERAL'):
literal = a['value']
......@@ -184,12 +184,12 @@ class Synset(object):
elif (a['level'],a['field']) == (3,'DEFINITION'):
self.variants[-1].gloss = a['value']
elif (a['level'],a['field']) == (3,'EXAMPLES'):
a = iterator.__next__()
a = next(iterator, None)
while a['level'] > 3:
self.variants[-1].add_example(Example(a['value']))
a = iterator.__next__()
a = next(iterator, None)
if (a['level'],a['field']) == (3,'EXTERNAL_INFO'):
a = iterator.__next__()
a = next(iterator, None)
while a['level'] > 3:
if (a['level'],a['field']) == (4,'SOURCE_ID'):
source_id = a['value']
......@@ -199,10 +199,10 @@ class Synset(object):
tk = a['value']
self.variants[
-1].external_info[-1].text_key = tk
a = iterator.__next__()
a = iterator.__next__()
a = next(iterator, None)
a = next(iterator, None)
if a['field'] == 'INTERNAL_LINKS':
a = iterator.__next__()
a = next(iterator, None)
while a['level'] > 1:
if (a['level'],a['field']) == (2,'RELATION'):
name = a['value']
......@@ -216,12 +216,10 @@ class Synset(object):
sense = a['value']
il.target_concept.add_variant(Variant(literal, sense))
self.add_internal_link(il)
a = iterator.__next__()
a = next(iterator, None)
if a['field'] == 'EQ_LINKS':
print(a)
a = iterator.__next__()
while a['level'] > 1:
print(a)
a = next(iterator, None)
while a and a['level'] > 1:
if (a['level'],a['field']) == (2,'EQ_RELATION'):
name = a['value']
elif (a['level'],a['field']) == (3,'TARGET_ILI'):
......@@ -234,7 +232,9 @@ class Synset(object):
sense = a['value']
il.target_concept.wordnet_offset = sense
self.add_eq_link(il)
a = iterator.__next__()
a = next(iterator, None)
return self
def __str__(self):
if self.number:
out = format_polaris(0, self.fieldname, None, self.number)
......@@ -418,7 +418,18 @@ if __name__ == '__main__':
'est', '01')
lex.read_file()
a = Synset()
a.lexicon = lex
a.read(2)
print(a)
# a = Synset()
# a.lexicon = lex
# a.read(2)
# print(a)
snset_indexes = [[j,i] for j,i in enumerate(lex.data) if 'level' in i and i['level'] == 0]
print(len(snset_indexes))
print(snset_indexes[:5])
snsets = [Synset(lexicon=lex).read(0) for i in snset_indexes]
# print(snsets)
for i in snsets:
print(i)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment