import csv import pandas as pd import json import sentence_transformers.util import torch from sentence_transformers import SentenceTransformer from torch import nn if __name__ == '__main__': directory = r'E:\Data\Research\Projects\matching_dependency\datasets\DBLP-ACM_dirty' train = pd.read_csv(directory + r'\train.csv', encoding='ISO-8859-1') valid = pd.read_csv(directory + r'\valid.csv', encoding='ISO-8859-1') test = pd.read_csv(directory + r'\test.csv', encoding='ISO-8859-1') train = train[train['label'] == 1] valid = valid[valid['label'] == 1] test = test[test['label'] == 1] matches = pd.concat([train, valid, test]) matches.drop(columns=['label'], inplace=True) matches = matches.sort_values(by='ltable_id') matches.to_csv(directory + r'\matches.csv', sep=',', index=False, header=True)