import multiprocessing import time import numpy as np import pandas as pd import torch from tqdm import tqdm from md_discovery.multi_process_infer_by_pairs import table_encode, inference_from_record_pairs from md_discovery import tmp_discover from settings import er_output_dir, similarity_threshold, target_attr, embedding_dict def fuck(i): i = i * i + 1 def test1(): li = [[[6, 6, 2], [2, 4, 6], [2, 4, 7], [3, 6, 4]], [[6, 2, 7], [3, 2, 4], [5, 3, 5], [6, 2, 4]], [[7, 2, 2], [6, 3, 2], [6, 4, 3], [6, 5, 6]]] tensor = torch.Tensor(li) norm_tensor = torch.nn.functional.normalize(tensor, dim=2) print(norm_tensor, '\n') sim_ten = torch.matmul(norm_tensor, norm_tensor.transpose(1, 2)) print(sim_ten / 2 + 0.5, '\n') print(sim_ten.size()) def test2(): multiprocessing.set_start_method("spawn") manager = multiprocessing.Manager() lock = manager.Lock() pool = multiprocessing.Pool(16) with manager: for _ in tqdm(range(0, 1000)): result = pool.apply_async(fuck, args=(_,)) print(result) def test3(): dic = {'description': 0, 'id': 1, 'manufacturer': 0, 'name': 0.9309734582901001, 'price': 0.912541675567627} ll = list(dic.values()) ten = torch.Tensor(ll) t = ten.unsqueeze(1) t = t.unsqueeze(2) y = t.repeat(1, 742, 742) print(ten) print(y) print(torch.isfinite(ten)) print(torch.count_nonzero(y).item()) def test4(): one_bool_tensor = torch.ones((3, 3, 3), dtype=torch.bool) print(torch.count_nonzero(one_bool_tensor).item()) if __name__ == '__main__': start = time.time() tp_single_tuple_path = er_output_dir + "tp_single_tuple.csv" # tp_mds, tp_vio = inference_from_record_pairs(tp_single_tuple_path, similarity_threshold, target_attr) tp_mds, tp_vio = tmp_discover.inference_from_record_pairs(tp_single_tuple_path, similarity_threshold, target_attr) print(time.time() - start)