25 from mpi4py
import MPI
26 from PyQNLPSimulator
import PyQNLPSimulator
as p
29 from QNLP
import DisCoCat
38 from itertools
import product
42 rank = comm.Get_rank()
44 NUM_BASIS_NOUN = int(os.environ[
'NUM_BASIS_NOUN'])
45 NUM_BASIS_VERB = int(os.environ[
'NUM_BASIS_VERB'])
47 BASIS_NOUN_DIST_CUTOFF = int(os.environ[
'BASIS_NOUN_DIST_CUTOFF'])
48 BASIS_VERB_DIST_CUTOFF = int(os.environ[
'BASIS_VERB_DIST_CUTOFF'])
50 VERB_NOUN_DIST_CUTOFF = int(os.environ[
'VERB_NOUN_DIST_CUTOFF'])
56 BASIS_NOUN_DIST_CUTOFF = 2
57 BASIS_VERB_DIST_CUTOFF = 2
58 VERB_NOUN_DIST_CUTOFF = 2
69 s_encoder = simple.SimpleEncoder(num_nouns=NUM_BASIS_NOUN, num_verbs=NUM_BASIS_VERB)
70 assert (len(sys.argv) > 1)
71 corpus_file=sys.argv[1]
72 if not os.path.isfile(corpus_file):
73 print (
"Error: Inputted file does not exist")
75 vsm = q.VectorSpaceModel.VectorSpaceModel(
76 corpus_path=corpus_file,
86 basis = vsm.define_basis({
'verbs' : NUM_BASIS_VERB,
'nouns' : NUM_BASIS_NOUN})
97 verb_dist = vsm.sort_basis_tokens_by_dist(
"verbs", num_basis=NUM_BASIS_VERB)
98 noun_dist = vsm.sort_basis_tokens_by_dist(
"nouns", num_basis=NUM_BASIS_NOUN)
110 vsm.assign_indexing(
"nouns");
111 vsm.assign_indexing(
"verbs");
124 dcc = DisCoCat.DisCoCat()
125 mapping_verbs = dcc.map_to_basis(vsm.tokens[
'verbs'] , verb_dist[
'verbs'], basis_dist_cutoff=BASIS_VERB_DIST_CUTOFF)
126 mapping_nouns = dcc.map_to_basis(vsm.tokens[
'nouns'] , noun_dist[
'nouns'], basis_dist_cutoff=BASIS_NOUN_DIST_CUTOFF)
176 encoding_dict = {
"ns" : vsm.encoded_tokens[
"nouns"],
177 "v" : vsm.encoded_tokens[
"verbs"],
178 "no" : vsm.encoded_tokens[
"nouns"]
181 decoding_dict = {
"ns" : { v:k
for k,v
in encoding_dict[
"ns"].items() },
182 "v" : { v:k
for k,v
in encoding_dict[
"v"].items() },
183 "no" : { v:k
for k,v
in encoding_dict[
"no"].items() }
190 len_reg_memory = q.encoding.utils.pow2bits( int(np.max( list(encoding_dict[
'v'].values()))) )[1] + \
191 q.encoding.utils.pow2bits( int(np.max( list(encoding_dict[
'no'].values()))) )[1] + \
192 q.encoding.utils.pow2bits( int(np.max( list(encoding_dict[
'ns'].values()))) )[1]
194 len_reg_aux = len_reg_memory + 2
195 num_qubits = len_reg_memory + len_reg_aux
198 Requires {} qubits to encode data using {} 199 noun and {} verb basis elements, allowing a 200 maximum of {} unique patterns. 202 """.format(
"#"*48, num_qubits, NUM_BASIS_NOUN, NUM_BASIS_VERB, (NUM_BASIS_NOUN**2)*NUM_BASIS_VERB,
"#"*48)
208 corpus_list_n = vsm.tokens[
'nouns']
209 corpus_list_v = vsm.tokens[
'verbs']
210 dist_cutoff = BASIS_VERB_DIST_CUTOFF
212 v_list = vg.calc_verb_noun_pairings(corpus_list_v, corpus_list_n, VERB_NOUN_DIST_CUTOFF)
217 for i
in v.lr_nouns.keys():
220 if mapping_nouns[i[0]] !=
None and mapping_verbs[v.verb] !=
None and mapping_nouns[i[1]] !=
None:
222 [ {i[0] : [encoding_dict[
'ns'][k]
for k
in mapping_nouns[i[0]].keys()] },
223 {v.verb : [encoding_dict[
'v'][k]
for k
in mapping_verbs[v.verb].keys()] },
224 {i[1] : [encoding_dict[
'no'][k]
for k
in mapping_nouns[i[1]].keys()] }
228 print(
"Sentences matching noun-verb-noun structure captured as:", sentences)
233 reg_memory = [0]*len_reg_memory;
234 for i
in range(len_reg_memory):
235 reg_memory[i] = i + len_reg_aux
237 reg_aux = [0]*len_reg_aux
238 for i
in range(len_reg_aux-2):
242 print(
"REG_MEM=",reg_memory)
243 print(
"REG_AUX=",reg_aux)
250 for idx,sentence
in enumerate(sentences):
251 superpos_patterns = list( product( list(sentence[0].values())[0], list(sentence[1].values())[0], list(sentence[2].values())[0] ) )
253 for patt
in superpos_patterns:
254 num = q.utils.encode_binary_pattern_direct(patt, encoding_dict)
255 vec_to_encode.extend([num])
258 vec_to_encode = list(set(vec_to_encode))
261 d ={
"sentences" : len(sentences),
262 "patterns" : len(vec_to_encode),
263 "NUM_BASIS_NOUN" : NUM_BASIS_NOUN,
264 "NUM_BASIS_VERB" : NUM_BASIS_VERB,
265 "BASIS_NOUN_DIST_CUTOFF" : BASIS_NOUN_DIST_CUTOFF,
266 "BASIS_VERB_DIST_CUTOFF" : BASIS_VERB_DIST_CUTOFF,
267 "VERB_NOUN_DIST_CUTOFF" : VERB_NOUN_DIST_CUTOFF
270 print(
"Encoding data:", vec_to_encode)
275 for i
in vec_to_encode:
276 shot_counter.update({i : 0})
283 len_reg_memory =
None 289 reg_memory = comm.bcast(reg_memory, root=0)
290 reg_aux = comm.bcast(reg_aux, root=0)
291 vec_to_encode = comm.bcast(vec_to_encode, root=0)
292 shot_counter = comm.bcast(shot_counter, root=0)
293 encoding_dict = comm.bcast(encoding_dict, root=0)
295 num_qubits = len(reg_memory) + len(reg_aux)
300 if os.environ.get(
'RESOURCE_EST')
is not None:
301 print(
"Overriding default qubit count")
304 sim1 = p(num_qubits, use_fusion)
305 sim2 = p(num_qubits, use_fusion)
310 num_exps = int(sys.argv[2])
311 except Exception
as e:
317 num_exps=comm.bcast(num_exps, root=0)
322 test_strings = list(product(encoding_dict[
"ns"].values(), encoding_dict[
"v"].values(), encoding_dict[
"no"].values()))
325 for i_ts1,ts1
in enumerate(test_strings[:-1]):
326 test_pattern1 = q.utils.encode_binary_pattern_direct(ts1, encoding_dict)
327 if test_pattern1
in vec_to_encode:
329 print(
"Pattern {} already exists. Skipping".format(test_pattern1))
333 for i_ts2,ts2
in enumerate(test_strings[i_ts1+1:]):
334 test_pattern2 = q.utils.encode_binary_pattern_direct(ts2, encoding_dict)
336 if test_pattern2
in vec_to_encode:
338 print(
"Pattern {} already exists. Skipping encoding of {} and {}".format(test_pattern2, test_pattern1, test_pattern2))
346 sim1.encodeBinToSuperpos_unique(reg_memory, reg_aux, vec_to_encode, len(reg_memory))
347 sim2.encodeBinToSuperpos_unique(reg_memory, reg_aux, vec_to_encode, len(reg_memory))
350 sim1.applyHammingDistanceRotY(test_pattern1, reg_memory, reg_aux, len(reg_memory))
351 sim2.applyHammingDistanceRotY(test_pattern2, reg_memory, reg_aux, len(reg_memory))
354 sim1.collapseToBasisZ(reg_aux[len(reg_aux)-2], 1)
355 sim2.collapseToBasisZ(reg_aux[len(reg_aux)-2], 1)
357 val = sim1.overlap(sim2)
360 print(
"|<{}|{}>|^2,|<{}|{}>|^2,{}".format(test_pattern1, test_pattern2, test_string1, test_string2, np.abs(val)**2))