1 from nltk.corpus
import wordnet
as wn
11 def match_syn(word, basis_dat, pos_type=None, deep_search=False):
12 """Calculates the synonym set of a given word, and attempts to match the meanings.""" 14 syn = wn.synsets(word, pos=pos_type)
16 spl = s.name().rsplit(
".")
17 for bn
in basis_dat.keys():
18 if spl[0].casefold() == bn.casefold():
19 basis_set.add(basis_dat[bn][0])
22 if len( basis_set ) == 0
and deep_search ==
True and len(syn) > 0:
24 for b
in basis_dat.keys():
25 b_syn = wn.synsets(b, pos=pos_type)
26 for s1,s2
in itertools.product(syn, b_syn):
27 sim.append((b, wn.wup_similarity(s1,s2)))
28 sim.sort(reverse=
True, key=
lambda x:x[1])
29 basis_set.add(basis_dat[sim[0][0]][0])
33 if db_file_name !=
None:
34 db = qnlp_db.qnlp_db( os.path.basename(db_file_name), os.path.dirname(db_file_name) )
36 db = qnlp_db.qnlp_db(
"qnlp_tagged_corpus",
".")
38 basis_nouns = db.db_load(
"noun",
"basis")
39 basis_verbs = db.db_load(
"verb",
"basis")
41 basis_nouns_rev = db.db_load(
"noun",
"basis",
"reverse")
42 basis_verbs_rev = db.db_load(
"verb",
"basis",
"reverse")
44 corpus_nouns = db.db_load(
"noun",
"corpus")
45 corpus_verbs = db.db_load(
"verb",
"corpus")
58 print(
"###############################################################################")
63 if len(sys.argv) > 1
and sys.argv[1] ==
"t":
69 for ci
in corpus_nouns.keys():
75 elif (ci.casefold()
in [b.casefold()
for b
in basis_nouns.keys()]):
76 basis_set.add( basis_nouns[ci][0])
78 basis_set.update(
match_syn(ci, basis_nouns, pos_type=
'n', deep_search=deep_search) )
79 corpus_nouns[ci].append(list(basis_set))
83 for ci
in corpus_verbs.keys():
84 if (ci.casefold()
in [b.casefold()
for b
in basis_verbs.keys()]):
85 basis_set.add(basis_verbs[ci][0])
87 basis_set.update(
match_syn(ci, basis_verbs, pos_type=
'v', deep_search=deep_search) )
88 corpus_verbs[ci].append(list(basis_set))
92 from tabulate
import tabulate
93 print(
"###############################################################################")
94 print(
"############################## Noun encodings #################################")
95 print(
"###############################################################################")
96 d = [(k,v[0],[bin(i)
for i
in v[1]])
for k,v
in corpus_nouns.items()]
97 print(tabulate(d,[
"Token",
"ID",
"Encoding"]))
100 for k,v
in corpus_nouns.items():
103 print(k,
"->", basis_nouns_rev[j])
105 print(
"###############################################################################")
106 print(
"############################## Verb encodings #################################")
107 print(
"###############################################################################")
108 d = [(k,v[0],[bin(i)
for i
in v[1]])
for k,v
in corpus_verbs.items()]
109 print(tabulate(d,[
"Token",
"ID",
"Encoding"]))
112 for k,v
in corpus_verbs.items():
115 print(k,
"->", basis_verbs_rev[j])
def match_syn(word, basis_dat, pos_type=None, deep_search=False)
def basis_check(db_file_name=None)