You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
matching_dependency/generate_matches.py

21 lines
981 B

import csv
import pandas as pd
import json
import sentence_transformers.util
import torch
from sentence_transformers import SentenceTransformer
from torch import nn
if __name__ == '__main__':
train = pd.read_csv(r'E:\Data\Research\Projects\matching_dependency\datasets\Fodors-Zagats\train.csv', encoding='ISO-8859-1')
valid = pd.read_csv(r'E:\Data\Research\Projects\matching_dependency\datasets\Fodors-Zagats\valid.csv', encoding='ISO-8859-1')
test = pd.read_csv(r'E:\Data\Research\Projects\matching_dependency\datasets\Fodors-Zagats\test.csv', encoding='ISO-8859-1')
train = train[train['label'] == 1]
valid = valid[valid['label'] == 1]
test = test[test['label'] == 1]
matches = pd.concat([train, valid, test])
matches.drop(columns=['label'], inplace=True)
matches = matches.sort_values(by='ltable_id')
matches.to_csv(r'E:\Data\Research\Projects\matching_dependency\datasets\Fodors-Zagats\matches.csv', sep=',', index=False, header=True)