import json import multiprocessing import os import time import ConfigSpace import numpy as np import pandas as pd import torch from tqdm import tqdm from ConfigSpace.read_and_write import json as csj from md_discovery import discovery_executor from settings import er_output_dir, hpo_output_dir def fuck(i): i = i * i + 1 def test1(): li = [[[6, 6, 2], [2, 4, 6], [2, 4, 7], [3, 6, 4]], [[6, 2, 7], [3, 2, 4], [5, 3, 5], [6, 2, 4]], [[7, 2, 2], [6, 3, 2], [6, 4, 3], [6, 5, 6]]] tensor = torch.Tensor(li) norm_tensor = torch.nn.functional.normalize(tensor, dim=2) print(norm_tensor, '\n') sim_ten = torch.matmul(norm_tensor, norm_tensor.transpose(1, 2)) print(sim_ten / 2 + 0.5, '\n') print(sim_ten.size()) def test2(): multiprocessing.set_start_method("spawn") manager = multiprocessing.Manager() lock = manager.Lock() pool = multiprocessing.Pool(16) with manager: for _ in tqdm(range(0, 1000)): result = pool.apply_async(fuck, args=(_,)) print(result) def test3(): dic = {'description': 0, 'id': 1, 'manufacturer': 0, 'name': 0.9309734582901001, 'price': 0.912541675567627} ll = list(dic.values()) ten = torch.Tensor(ll) t = ten.unsqueeze(1) t = t.unsqueeze(2) y = t.repeat(1, 742, 742) print(ten) print(y) print(torch.isfinite(ten)) print(torch.count_nonzero(y).item()) def test4(): one_bool_tensor = torch.ones((3, 3, 3), dtype=torch.bool) print(torch.count_nonzero(one_bool_tensor).item()) def test5(): ten1 = torch.tensor([[1, 2, 3], [7, 8, 9]]) ten2 = torch.tensor([[4, 5, 6], [11, 12, 15]]) result = ten1 * ten2 r = torch.sum(result, 1) print('\n') print(result) print(r) def test6(): table_tensor = torch.tensor([[[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]], [[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]]) t = torch.tensor([[1., 2., 3.], [4., 5., 6.]]) norm1 = torch.nn.functional.normalize(table_tensor, dim=1) norm2 = torch.nn.functional.normalize(table_tensor, dim=2) print('\n') print(norm1) print(norm2) print(t.shape) def test7(): iterations = 1 filename_list = os.listdir(er_output_dir) if len(filename_list) > 0: for _ in filename_list: if _.startswith('eval_result'): iterations = int(_[12:13]) + 1 print(iterations) def test8(): with open(hpo_output_dir + "configspace.json", 'r') as load_f: dict_configspace = json.load(load_f) str_configspace = json.dumps(dict_configspace) configspace = csj.read(str_configspace) def test9(): df = pd.read_json(r'./datasets/t.json', encoding='ISO-8859-1', lines=True) df.to_csv(r'./datasets/s.csv') d = pd.read_csv(r'./datasets/s.csv', encoding='ISO-8859-1') print(1) def test10(): rtable = pd.read_csv(r'E:\Data\Research\Projects\matching_dependency\ml_er\output\predictions.csv', encoding='ISO-8859-1') print(1) rtable.columns = ["id", "title", "authors", "venue", "year"] rtable.to_csv(r'E:\Data\Research\Projects\matching_dependency\datasets\DBLP-GoogleScholar\tableB.csv', sep=',', index=False, header=True, quoting=1) def test11(): values = { 'block_attr': 'class', 'confidence_thresh': 0.2717823249253852, 'ml_blocker': 'attr_equiv', 'ml_matcher': 'ln', 'similarity_thresh': 0.20681820299103484, 'support_thresh': 129, } with open(hpo_output_dir + "incumbent.json", "w") as f: json.dump(values, f, indent=4) def test12(): with open(hpo_output_dir + "incumbent.json", 'r') as f: dic = json.load(f) for _ in dic.keys(): print(f'Key:{_}\tValue:{dic[_]}\tType:{type(dic[_])}') def test13(): outcome_dir = r'E:\Data\Research\Outcome' configs_dir = r'\Magellan+Smac+roberta-large-nli-stsb-mean-tokens' datasets_list = os.listdir(outcome_dir) f = [] for _ in datasets_list: f.append(outcome_dir + rf'\{_}' + configs_dir) print(f)