QNLP  v1.0
QNLP.proc.basis_check Namespace Reference

Functions

def match_syn (word, basis_dat, pos_type=None, deep_search=False)
 
def basis_check (db_file_name=None)
 

Function Documentation

◆ basis_check()

def QNLP.proc.basis_check.basis_check (   db_file_name = None)

Definition at line 32 of file basis_check.py.

32 def basis_check(db_file_name=None):
33  if db_file_name != None:
34  db = qnlp_db.qnlp_db( os.path.basename(db_file_name), os.path.dirname(db_file_name) )
35  else:
36  db = qnlp_db.qnlp_db("qnlp_tagged_corpus", ".")
37 
38  basis_nouns = db.db_load("noun", "basis")
39  basis_verbs = db.db_load("verb", "basis")
40 
41  basis_nouns_rev = db.db_load("noun", "basis", "reverse")
42  basis_verbs_rev = db.db_load("verb", "basis", "reverse")
43 
44  corpus_nouns = db.db_load("noun", "corpus")
45  corpus_verbs = db.db_load("verb", "corpus")
46 
47  #As names aren't currently recognised, read in a list of known names, if matched -> noun::person
48  #names = []
49 
50  #All names return type person
51  #with open("../corpus/names.dat", 'r') as namesFile:
52  # names=namesFile.read().splitlines()
53 
54  #Skip the header of the file
55  #names = names[17:]
56  db.close_db()
57 
58  print("###############################################################################")
59 
60  basis_set = set()
61 
62 
63  if len(sys.argv) > 1 and sys.argv[1] == "t":
64  deep_search = True
65  else:
66  deep_search = False
67 
68  # Match corpus nouns to basis nouns
69  for ci in corpus_nouns.keys():
70  #Match directly, or first level synonyms
71  #if ci in names:
72  #basis_set.add( basis_nouns["person"][0] )
73  if 0:
74  pass
75  elif (ci.casefold() in [b.casefold() for b in basis_nouns.keys()]):
76  basis_set.add( basis_nouns[ci][0])
77  else:
78  basis_set.update( match_syn(ci, basis_nouns, pos_type='n', deep_search=deep_search) )
79  corpus_nouns[ci].append(list(basis_set))
80  basis_set.clear()
81 
82  # Match corpus verbs to basis verbs
83  for ci in corpus_verbs.keys():
84  if (ci.casefold() in [b.casefold() for b in basis_verbs.keys()]):
85  basis_set.add(basis_verbs[ci][0])
86  else:
87  basis_set.update( match_syn(ci, basis_verbs, pos_type='v', deep_search=deep_search) )
88  corpus_verbs[ci].append(list(basis_set))
89  basis_set.clear()
90 
91  # Print the matched data
92  from tabulate import tabulate
93  print("###############################################################################")
94  print("############################## Noun encodings #################################")
95  print("###############################################################################")
96  d = [(k,v[0],[bin(i) for i in v[1]]) for k,v in corpus_nouns.items()]
97  print(tabulate(d,["Token","ID","Encoding"]))
98  print("")
99  #print (corpus_nouns)
100  for k,v in corpus_nouns.items():
101  if(len(v[1]) > 0):
102  for j in v[1]:
103  print(k, "->", basis_nouns_rev[j])
104  print("")
105  print("###############################################################################")
106  print("############################## Verb encodings #################################")
107  print("###############################################################################")
108  d = [(k,v[0],[bin(i) for i in v[1]]) for k,v in corpus_verbs.items()]
109  print(tabulate(d,["Token","ID","Encoding"]))
110  print("")
111  #print (corpus_verbs)
112  for k,v in corpus_verbs.items():
113  if(len(v[1]) > 0):
114  for j in v[1]:
115  print(k, "->", basis_verbs_rev[j])
116  print("")
def match_syn(word, basis_dat, pos_type=None, deep_search=False)
Definition: basis_check.py:11
def basis_check(db_file_name=None)
Definition: basis_check.py:32

References QNLP.proc.basis_check.match_syn().

Here is the call graph for this function:

◆ match_syn()

def QNLP.proc.basis_check.match_syn (   word,
  basis_dat,
  pos_type = None,
  deep_search = False 
)
Calculates the synonym set of a given word, and attempts to match the meanings.

Definition at line 11 of file basis_check.py.

11 def match_syn(word, basis_dat, pos_type=None, deep_search=False):
12  """Calculates the synonym set of a given word, and attempts to match the meanings."""
13  basis_set = set()
14  syn = wn.synsets(word, pos=pos_type)
15  for s in syn:
16  spl = s.name().rsplit(".")
17  for bn in basis_dat.keys():
18  if spl[0].casefold() == bn.casefold():
19  basis_set.add(basis_dat[bn][0])
20 
21  #If nothing found, perform next closest match using similarity measures between all basis terms and current synonyms. Very expensive!
22  if len( basis_set ) == 0 and deep_search == True and len(syn) > 0:
23  sim=[]
24  for b in basis_dat.keys():
25  b_syn = wn.synsets(b, pos=pos_type)
26  for s1,s2 in itertools.product(syn, b_syn):
27  sim.append((b, wn.wup_similarity(s1,s2)))
28  sim.sort(reverse=True, key=lambda x:x[1])
29  basis_set.add(basis_dat[sim[0][0]][0])
30  return basis_set
31 
def match_syn(word, basis_dat, pos_type=None, deep_search=False)
Definition: basis_check.py:11

Referenced by QNLP.proc.basis_check.basis_check().

Here is the caller graph for this function: