ADD file via upload

main
hnu202409060624 8 months ago
parent 92297d8ea2
commit b16547b186

@ -0,0 +1,337 @@
import json,config
import torch
from torch.utils.data import Dataset,DataLoader
import numpy as np
class DateBuild:
def voc_build(self):
voc={'CH4':0,'CH3':1,'CH2':2,'CH1':3,'CH0':4}
with open('vocab/vocabulary.json','w',encoding='utf-8') as f:
json.dump(voc, f, indent=4)
def train_date(self):
data={"乙烷": {"features": "CH3-CH3", "bonds": [[1, 2]],'nature':[0.241674468,0.209750435],'smiles':'CC'},
"丙烷": {"features": "CH3-CH2-CH3", "bonds": [[1, 2], [2, 3]],'nature':[0.228835094,0.198607081],'smiles':'CCC'},
"丁烷": {"features": "CH3-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4]],'nature':[0.360973653, 0.313290772],'smiles':'CCCC'},
"异丁烷": {"features": "CH3-CH-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 4]],'nature':[0.360706166,0.313058619],'smiles':'CC(C)C'},
"正戊烷": {"features": "CH3-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5]],'nature':[0.383710044,0.333023796],'smiles':'CCCCC'},
"新戊烷": {"features": "CH3-C-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [2, 4], [2, 5]],'nature':[0.68677277,0.596053395],'smiles':'CC(C)(C)C'},
"2-甲基丁烷": {"features": "CH3-CH-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5]],'nature':[0.68677277 ,0.596053395],'smiles':'CCC(C)C'},
"己烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]],'nature':[0.475725558,0.412884504],'smiles':'CCCCCC'},
"2,3-二甲基丁烷": {"features": "CH3-CH-CH-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [3, 6]],'nature':[0.386117427,0.335113175],'smiles':'CC(C)C(C)C'},
"2-甲基戊烷": {"features": "CH3-CH-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6]],
'nature': [0.319780661, 0.277539176],'smiles':'CCCC(C)C'},
"2,2-二甲基丁烷": {"features": "CH3-C-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]],'nature':[0.465828541,0.404294835],'smiles':'CCC(C)(C)C'},
"3,3-二甲基戊烷": {"features": "CH3-CH2-C-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6],[3,7]],
'nature': [0.369800722, 0.320951828],'smiles':'CCC(C)(C)CC'},
"2-甲基己烷": {"features": "CH3-CH-CH2-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[2, 7],],'nature':[0.414471045,0.359721416],'smiles':'CCCCC(C)C'},
"2,4-二甲基戊烷": {"features": "CH3-CH-CH2-CH-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6],[4, 7]],'nature':[0.409923766,0.355774811],'smiles':'CC(C)CC(C)C'},
"3-甲基已烷": {"features": "CH3-CH2-CH-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[3, 6]],'nature':[0.412331149,0.35786419],'smiles':'CCCC(C)CC'},
"2,2,3-三甲基丁烷": {"features": "CH3-CH-CH-CH-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [3, 6], [3, 7]],'nature':[0.663768891,0.576088218],'smiles':'CC(C)C(C)(C)C'},
"3-乙基戊烷": {"features": "CH3-CH2-CH-CH2-CH3-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6], [6, 7]], 'nature': [0.413401097, 0.358792803],'smiles':'CCC(CC)CC'},
"正庚烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]], 'nature': [0.488297445, 0.423795705],'smiles':'CCCCCCC'},
"2,2-二甲基戊烷": {"features": "CH3-C-CH2-CH2-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[2, 7],[2,8]],'nature':[0.399491775,0.346720836],'smiles':'CCCC(C)(C)C'},
"2-甲基庚烷": {"features": "CH3-CH-CH2-CH2-CH2-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [2, 8]],
'nature': [0.439347332, 0.381311666],'smiles':'CCCCCC(C)C'},
"2,2-二甲基己烷": {"features": "CH3-C-CH2-CH2-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7], [2, 8]],
'nature': [0.406713923, 0.352988973],'smiles':'CCCCC(C)(C)C'},
"六甲基乙烷": {"features": "CH3-C-C-CH3-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6], [3, 7], [3, 8]],
'nature': [1, 0.867904817],'smiles':'CC(C)(C)C(C)(C)C'},
"2,5-二甲基己烷": {"features": "CH3-CH-CH2-CH2-CH-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7],[5,8]],
'nature': [0.487227498, 0.422867092],'smiles':'CC(C)CCC(C)C'},
"2,3,4-三甲基戊烷": {"features": "CH3-CH-CH-CH-CH2-CH3-CH3-CH3-CH3", "bonds":[[1, 2], [2, 3], [3, 4], [2, 5], [2, 6],[3, 7],[4, 8]],'nature':[0.438544871,0.380615206],'smiles':'CC(C)C(C)C(C)C'},
"4-甲基庚烷": {"features": "CH3-CH2-CH2-CH-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [4, 8]],'nature':[0.40698141,0.353221126],'smiles':'CCCC(C)CCC'},
"正辛烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[2, 6]],'nature':[0.578708038,0.502263494],'smiles':'CCCCCCCC'},
"3-乙基-3-甲基戊烷": {"features": "CH3-CH2-C-CH2-CH3-CH3-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6],[3, 7],[7, 8]],'nature':[0.487494985,0.423099246],'smiles':'CCC(C)(CC)CC'},
"2,3,3-三甲基戊烷": {"features": "CH3-CH-C-CH2-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [3, 7],[3,8]],
'nature': [0.460746289, 0.399883923],'smiles':'CCC(C)(C)C(C)C'},
"异辛烷": {"features": "CH3-CH-CH2-CH2-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [2, 8]],'nature':[0.443627123,0.385026117],'smiles':'CCCCC(C)C'},
"3,3-二甲基已烷": {"features": "CH3-CH2-C-CH2-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [3, 7], [3, 8]],'nature':[0.393339575,0.341381312],'smiles':'CCCC(C)(C)CC'},
"2,4-二甲基-3-乙基戊烷": {"features": "CH3-CH-CH-CH-CH3-CH3-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [3, 7],[7,8],[4,9]],
'nature': [0.403236592, 0.349970981],'smiles':'CCC(C(C)C)C(C)C'},
"2,2-二甲基-3-乙基戊烷": {"features": "CH3-C-CH-CH2-CH3-CH3-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7],[3,8],[8,9]],
'nature': [0.46502608, 0.403598375],'smiles':'CCC(CC)C(C)(C)C'},
"2,2,5-三甲基己烷": {"features": "CH3-C-CH2-CH2-CH-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7],[2,8],[5,9]],
'nature': [0.447906915, 0.388740569],'smiles':'CC(C)CCC(C)(C)C'},
"2,2,3,3-四甲基戊烷": {"features": "CH3-C-C-CH2-CH3-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7],[3,8],[3,9]],
'nature': [0.704426909, 0.611375508],'smiles':'CCC(C)(C)C(C)(C)C'},
"正壬烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[6,7],[7, 8],[8,9]],'nature':[0.8479,0.549125],'smiles':'CCCCCCCCC'},
"2,3,3-三甲基己烷": {"features": "CH3-CH-C-CH2-CH2-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7], [3, 8], [3, 9]],
'nature': [0.587535108, 0.362971561],'smiles':'CCCC(C)(C)C(C)C'},
"2,6-二甲基庚烷": {"features": "CH3-CH-CH2-CH2-CH2-CH-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [3, 8], [6, 9]],
'nature': [0.455396549, 0.395240859],'smiles':'CC(C)CCCC(C)C'},
"3,3-二乙基戊烷": {"features": "CH3-CH2-C-CH2-CH3-CH2-CH3-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6], [6, 7], [3, 8], [8, 9]],
'nature': [0.642102448, 0.557283807],'smiles':'CCC(CC)(CC)CC'},
"2,4,4-三甲基己烷": {"features": "CH3-CH-CH2-C-CH2-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7], [4, 8], [4, 9]],
'nature': [0.427310419, 0.370864771],'smiles':'CCC(C)(C)CC(C)C'},
"2,2,4,4-四甲基戊烷": {"features": "CH3-C-CH2-C-CH3-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7], [4, 8], [4, 9]],
'nature': [0.552761803, 0.479744631],'smiles':'CC(C)(C)CC(C)(C)C'},
"2,2-二甲基庚烷": {"features": "CH3-C-CH2-CH2-CH2-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [2, 8], [2, 9]],
'nature': [0.428380366, 0.371793384],'smiles':'CCCCCC(C)(C)C'},
"2,3,3,4-四甲基戊烷": {"features": "CH3-CH-C-CH-CH3-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [3, 7], [3, 8], [4, 9]],
'nature': [0.457536445, 0.397098085],'smiles':'CC(C)C(C)(C)C(C)C'},
"甲醇": {"features": "OH-CH3",
"bonds": [[1, 2]],
'nature': [0.469573358, 0.40754498],'smiles':'CO'},
"乙醇": {"features": "OH-CH2-CH3",
"bonds": [[1, 2], [2, 3]],
'nature': [0.42543801, 0.369239698],'smiles':'CCO'},
"丙醇": {"features": "OH-CH2-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4]],
'nature': [0.393339575, 0.341381312],'smiles':'CCCO'},
"异丙醇": {"features": "OH-CH-CH3-CH3",
"bonds": [[1, 2], [2, 3], [2, 4]],
'nature': [0.491239802, 0.426349391],'smiles':'CC(C)O'},
"仲丁醇": {"features": "OH-CH-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5]],
'nature': [0.423833088, 0.367846779],'smiles':'CCC(C)O'},
"异丁醇": {"features": "OH-CH2-CH-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [3, 5]],
'nature': [0.441754714, 0.383401045],'smiles':'CC(C)CO'},
"正丁醇": {"features": "OH-CH2-CH2-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5]],
'nature': [0.490437341, 0.425652931],'smiles':'CCCCO'},
"3-戊醇": {"features": "OH-C-CH2-CH3-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [5, 6]],
'nature': [0.709241674, 0.615554266],'smiles':'CCC(CC)O'},
"正戊醇": {"features": "OH-CH2-CH2-CH2-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]],
'nature': [0.51959342, 0.450957632],'smiles':'CCCCCO'},
"2-甲基丁醇": {"features": "OH-CH-CH2-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6]],
'nature': [0.543399759, 0.471619269],'smiles':'CCC(C)CO'},
"2-戊醇": {"features": "OH-CH2-CH2-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]],
'nature': [0.53537515, 0.464654672],'smiles':'CCCC(C)O'},
"甲基叔丁基醚": {"features": "CH3-C-O-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]],
'nature': [0.440149793, 0.382008125],'smiles':'CC(C)(C)OC'},
"新戊醇": {"features": "OH-CH2-C-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [3, 5], [3, 6]],
'nature': [0.871071285, 0.756006965],'smiles':'CC(C)(C)CO'},
"异戊醇": {"features": "OH-CH2-CH2-C-CH3-CH3-CH3",###
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [4, 6], [4, 7]],
'nature': [0.417145914, 0.362042948],'smiles':'CC(C)CCO'},
"2-己醇": {"features": "OH-CH-CH2-CH2-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7]],
'nature': [0.840310285, 0.729309344],'smiles':'CCCCC(C)O'},
"3-甲基-3-戊醇": {"features": "OH-C-CH2-CH3-CH3-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6], [6, 7]],
'nature': [0.667513709, 0.579338363],'smiles':'CCC(C)(CC)O'},
"2,3-二甲基-2-丁醇": {"features": "OH-C-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]],
'nature': [0.693192457, 0.601625073],'smiles':'CC(C)C(C)(C)O'},
"2-甲基-2-戊醇": {"features": "OH-C-CH2-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7]],
'nature': [0.455129062, 0.395008706],'smiles':'CCCC(C)(C)O'},
"正己醇": {"features": "OH-CH2-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]],'nature':[0.611341447,0.530586187],'smiles':'CCCCCCO'},
"甲醚": {"features": "CH3-O-CH3", "bonds": [[1, 2], [2, 3]], 'nature': [0.352146583, 0.305629716],'smiles':'COC'},
"甲乙醚": {"features": "CH3-CH2-O-CH3", "bonds": [[1, 2], [2, 3], [3, 4]],'nature':[0.428380366,0.371793384],'smiles':'CCOC'},
"乙醚": {"features": "CH3-CH2-O-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5]],'nature':[0.419553297,0.364132327],'smiles':'CCOCC'},
"甲丁醚": {"features": "CH3-CH2-CH2-CH2-O-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]],
'nature': [0.421693192, 0.365989553],'smiles':'CCCCOC'},
"2-甲基-2-丁醇": {"features": "OH-C-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]],
'nature': [0.706299318, 0.61300058],'smiles':'CCC(C)(C)O'},
"乙丙醚": {"features": "CH3-CH2-CH2-O-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]],
'nature': [0.389594757, 0.338131167],'smiles':'CCCOCC'},
"叔丁基乙醚": {"features": "CH3-C-O-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7]],
'nature': [0.479202889, 0.415902496],'smiles':'CCOC(C)(C)C'},
"异丙醚": {"features": "CH3-CH-O-CH-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [4, 7]],
'nature': [0.49846195, 0.432617528],'smiles':'CC(C)OC(C)C'},
"正丙醚": {"features": "CH3-CH2-CH2-O-CH2-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]],
'nature': [0.40430654, 0.350899594],'smiles':'CCCOCCC'},
"甲基叔戊醚": {"features": "CH3-CH2-C-O-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6], [3, 7]],
'nature': [0.516651063, 0.448403947],'smiles':'CCC(C)(C)OC'},
"3,3-二甲基-2-丁醇": {"features": "OH-C-CH-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6], [3, 7]],
'nature': [0.745619901, 0.647127104],'smiles':'CC(C(C)(C)C)O'},
"乙基丁基醚": {"features": "CH3-CH2-CH2-CH2-O-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]],
'nature': [0.398956801, 0.346256529],'smiles':'CCCCOCC'},
"3,3-二甲基-1-丁基酸酯": {"features": "OH-CH2-CH2-C-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [4, 6], [4, 7]],
'nature': [0.570148455, 0.494834591],'smiles':'CC(C)(C)CCO'},
}
with open('train_date/train_date.json','w',encoding='utf-8') as f:
json.dump(data, f, indent=4)
pass
DateBuild=DateBuild()
def padding( chemic, lens=config.seq_len):
while len(chemic) < lens:
chemic.append(0)
return chemic
def dec_input_padding( chemical, lens=config.seq_len):
chemical.append(1)
chemical.insert(0,2)
while len(chemical)<lens:
chemical.append(0)
return chemical
def target_padding( chemical, lens=config.seq_len):
chemical.append(1)
while len(chemical)<lens:
chemical.append(0)
return chemical
class Chemical_Trans:
def __init__(self,len=config.seq_len):
with open('vocab/vocabulary.json', 'r', encoding='utf-8') as f:
f = json.load(f)
self.vocab = f
self.len = len
with open('vocab/sm_voc.json', 'r', encoding='utf-8') as f:
f = json.load(f)
self.sm_voc = f
def CCO(self,adj_matrix):
source_nodes = []
target_nodes = []
# 遍历邻接矩阵只记录i < j的边
for i in range(len(adj_matrix)):
for j in range(i + 1, len(adj_matrix[i])): # 注意这里从i + 1开始
if adj_matrix[i][j] != 0: # 存在边
source_nodes.append(i)
target_nodes.append(j)
return [source_nodes, target_nodes]
def chemical_trans(self, chemical):
chemical = chemical.split('-')
sequence = []
for i in chemical:
sequence.append(self.vocab[i])
return sequence
def chemical_trans1(self, chemical):
chemical = list(chemical)
sequence = []
for i in chemical:
sequence.append(self.sm_voc[i])
return sequence
def generate_adjacency_matrix(self,num_vertices, edges, directed=False):
# 初始化邻接矩阵为0
adjacency_matrix = [[0] * num_vertices for _ in range(num_vertices)]
# 遍历边的列表,更新邻接矩阵
for u, v in edges:
# 确保顶点索引在有效范围内
if 0 <= u-1 < num_vertices and 0 <= v-1 < num_vertices:
adjacency_matrix[u-1][v-1] = 1
if not directed:
adjacency_matrix[v-1][u-1] = 1
return adjacency_matrix
def adjacency_to_degree_matrix(self,adjacency_matrix):
# 确保输入是NumPy数组
adjacency_matrix = np.array(adjacency_matrix)
# 计算每个顶点的度数(即每行的和)
degrees = np.sum(adjacency_matrix, axis=1)
# 构造度矩阵(对角矩阵,对角线元素为度数)
degree_matrix = np.zeros_like(adjacency_matrix)
np.fill_diagonal(degree_matrix, degrees)
return degree_matrix
def __len__(self):
return len(self.vocab)
def __getitem__(self, key):
return self.vocab.keys()
Chemical_Trans=Chemical_Trans()
class Chem_Dataset(Dataset):
def __init__(self):
super(Dataset).__init__()
self.chemic_list=[ '3-戊醇','2-甲基庚烷','乙醚','2,3,3-三甲基戊烷','异丁烷','丙醇']
self.input_list=[]
self.bonds=[]
self.vectors=[]
self.target_list=[]
self.dujuzheng=[]
self.CCO=[]
self.enc=[]
with open('train_date/train_date.json', 'r', encoding='utf-8') as f:
f=json.load(f)
x=[f[i] for i in self.chemic_list]
for i in x:
self.input_list.append(padding(Chemical_Trans.chemical_trans(i['features'])))
self.bonds.append(Chemical_Trans.generate_adjacency_matrix(len(i['features'].split('-')),edges=i['bonds']))
self.vectors.append(i['bonds'])
self.target_list.append(i['nature'])
self.dujuzheng.append(Chemical_Trans.adjacency_to_degree_matrix(self.bonds[-1]))
self.CCO.append(Chemical_Trans.CCO(self.bonds[-1]))
self.enc.append(padding(Chemical_Trans.chemical_trans1(i['smiles'])))
def __len__(self):
return len(self.input_list)
def __getitem__(self, index):
input_list=self.input_list[index]
bonds=self.bonds[index]
vector=self.vectors[index]
target=self.target_list[index]
dujuzheng=self.dujuzheng[index]
CCO=self.CCO[index]
enc=self.enc[index]
return input_list, bonds,target,dujuzheng,CCO,enc
def chem_collate_fn(batch):
input_batch,bonds,target,dujuzheng,CCO,enc = zip(*batch) # 解包批次数据为输入和输出列表
input_tensor=torch.tensor([i for i in input_batch])
target_tensor=torch.tensor([i for i in target])
CCO=torch.tensor([i for i in CCO]) # 这里的CCO实际上是edge_index
edge_index=CCO
enc=torch.tensor([i for i in enc])
return input_tensor,edge_index,target_tensor,enc
Chem_Dataset = DataLoader(Chem_Dataset(), batch_size=1, shuffle=True, collate_fn=chem_collate_fn)
if __name__ == '__main__':
for input_tensor,CCO,target_tensor,enc in Chem_Dataset:
print(enc.shape)
Loading…
Cancel
Save