You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
22 lines
845 B
22 lines
845 B
import csv
|
|
|
|
import pandas as pd
|
|
import json
|
|
import sentence_transformers.util
|
|
import torch
|
|
from sentence_transformers import SentenceTransformer
|
|
from torch import nn
|
|
|
|
if __name__ == '__main__':
|
|
directory = r'E:\Data\Research\Projects\matching_dependency\datasets\iTunes-Amazon'
|
|
train = pd.read_csv(directory + r'\train.csv', encoding='ISO-8859-1')
|
|
valid = pd.read_csv(directory + r'\valid.csv', encoding='ISO-8859-1')
|
|
test = pd.read_csv(directory + r'\test.csv', encoding='ISO-8859-1')
|
|
train = train[train['label'] == 1]
|
|
valid = valid[valid['label'] == 1]
|
|
test = test[test['label'] == 1]
|
|
matches = pd.concat([train, valid, test])
|
|
matches.drop(columns=['label'], inplace=True)
|
|
matches = matches.sort_values(by='ltable_id')
|
|
matches.to_csv(directory + r'\matches.csv', sep=',', index=False, header=True)
|