You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

338 lines
22 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import json,config
import torch
from torch.utils.data import Dataset,DataLoader
import numpy as np
class DateBuild:
def voc_build(self):
voc={'CH4':0,'CH3':1,'CH2':2,'CH1':3,'CH0':4}
with open('vocab/vocabulary.json','w',encoding='utf-8') as f:
json.dump(voc, f, indent=4)
def train_date(self):
data={"乙烷": {"features": "CH3-CH3", "bonds": [[1, 2]],'nature':[0.241674468,0.209750435],'smiles':'CC'},
"丙烷": {"features": "CH3-CH2-CH3", "bonds": [[1, 2], [2, 3]],'nature':[0.228835094,0.198607081],'smiles':'CCC'},
"丁烷": {"features": "CH3-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4]],'nature':[0.360973653, 0.313290772],'smiles':'CCCC'},
"异丁烷": {"features": "CH3-CH-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 4]],'nature':[0.360706166,0.313058619],'smiles':'CC(C)C'},
"正戊烷": {"features": "CH3-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5]],'nature':[0.383710044,0.333023796],'smiles':'CCCCC'},
"新戊烷": {"features": "CH3-C-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [2, 4], [2, 5]],'nature':[0.68677277,0.596053395],'smiles':'CC(C)(C)C'},
"2-甲基丁烷": {"features": "CH3-CH-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5]],'nature':[0.68677277 ,0.596053395],'smiles':'CCC(C)C'},
"己烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]],'nature':[0.475725558,0.412884504],'smiles':'CCCCCC'},
"2,3-二甲基丁烷": {"features": "CH3-CH-CH-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [3, 6]],'nature':[0.386117427,0.335113175],'smiles':'CC(C)C(C)C'},
"2-甲基戊烷": {"features": "CH3-CH-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6]],
'nature': [0.319780661, 0.277539176],'smiles':'CCCC(C)C'},
"2,2-二甲基丁烷": {"features": "CH3-C-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]],'nature':[0.465828541,0.404294835],'smiles':'CCC(C)(C)C'},
"3,3-二甲基戊烷": {"features": "CH3-CH2-C-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6],[3,7]],
'nature': [0.369800722, 0.320951828],'smiles':'CCC(C)(C)CC'},
"2-甲基己烷": {"features": "CH3-CH-CH2-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[2, 7],],'nature':[0.414471045,0.359721416],'smiles':'CCCCC(C)C'},
"2,4-二甲基戊烷": {"features": "CH3-CH-CH2-CH-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6],[4, 7]],'nature':[0.409923766,0.355774811],'smiles':'CC(C)CC(C)C'},
"3-甲基已烷": {"features": "CH3-CH2-CH-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[3, 6]],'nature':[0.412331149,0.35786419],'smiles':'CCCC(C)CC'},
"2,2,3-三甲基丁烷": {"features": "CH3-CH-CH-CH-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [3, 6], [3, 7]],'nature':[0.663768891,0.576088218],'smiles':'CC(C)C(C)(C)C'},
"3-乙基戊烷": {"features": "CH3-CH2-CH-CH2-CH3-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6], [6, 7]], 'nature': [0.413401097, 0.358792803],'smiles':'CCC(CC)CC'},
"正庚烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]], 'nature': [0.488297445, 0.423795705],'smiles':'CCCCCCC'},
"2,2-二甲基戊烷": {"features": "CH3-C-CH2-CH2-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[2, 7],[2,8]],'nature':[0.399491775,0.346720836],'smiles':'CCCC(C)(C)C'},
"2-甲基庚烷": {"features": "CH3-CH-CH2-CH2-CH2-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [2, 8]],
'nature': [0.439347332, 0.381311666],'smiles':'CCCCCC(C)C'},
"2,2-二甲基己烷": {"features": "CH3-C-CH2-CH2-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7], [2, 8]],
'nature': [0.406713923, 0.352988973],'smiles':'CCCCC(C)(C)C'},
"六甲基乙烷": {"features": "CH3-C-C-CH3-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6], [3, 7], [3, 8]],
'nature': [1, 0.867904817],'smiles':'CC(C)(C)C(C)(C)C'},
"2,5-二甲基己烷": {"features": "CH3-CH-CH2-CH2-CH-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7],[5,8]],
'nature': [0.487227498, 0.422867092],'smiles':'CC(C)CCC(C)C'},
"2,3,4-三甲基戊烷": {"features": "CH3-CH-CH-CH-CH2-CH3-CH3-CH3-CH3", "bonds":[[1, 2], [2, 3], [3, 4], [2, 5], [2, 6],[3, 7],[4, 8]],'nature':[0.438544871,0.380615206],'smiles':'CC(C)C(C)C(C)C'},
"4-甲基庚烷": {"features": "CH3-CH2-CH2-CH-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [4, 8]],'nature':[0.40698141,0.353221126],'smiles':'CCCC(C)CCC'},
"正辛烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[2, 6]],'nature':[0.578708038,0.502263494],'smiles':'CCCCCCCC'},
"3-乙基-3-甲基戊烷": {"features": "CH3-CH2-C-CH2-CH3-CH3-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6],[3, 7],[7, 8]],'nature':[0.487494985,0.423099246],'smiles':'CCC(C)(CC)CC'},
"2,3,3-三甲基戊烷": {"features": "CH3-CH-C-CH2-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [3, 7],[3,8]],
'nature': [0.460746289, 0.399883923],'smiles':'CCC(C)(C)C(C)C'},
"异辛烷": {"features": "CH3-CH-CH2-CH2-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [2, 8]],'nature':[0.443627123,0.385026117],'smiles':'CCCCC(C)C'},
"3,3-二甲基已烷": {"features": "CH3-CH2-C-CH2-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [3, 7], [3, 8]],'nature':[0.393339575,0.341381312],'smiles':'CCCC(C)(C)CC'},
"2,4-二甲基-3-乙基戊烷": {"features": "CH3-CH-CH-CH-CH3-CH3-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [3, 7],[7,8],[4,9]],
'nature': [0.403236592, 0.349970981],'smiles':'CCC(C(C)C)C(C)C'},
"2,2-二甲基-3-乙基戊烷": {"features": "CH3-C-CH-CH2-CH3-CH3-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7],[3,8],[8,9]],
'nature': [0.46502608, 0.403598375],'smiles':'CCC(CC)C(C)(C)C'},
"2,2,5-三甲基己烷": {"features": "CH3-C-CH2-CH2-CH-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7],[2,8],[5,9]],
'nature': [0.447906915, 0.388740569],'smiles':'CC(C)CCC(C)(C)C'},
"2,2,3,3-四甲基戊烷": {"features": "CH3-C-C-CH2-CH3-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7],[3,8],[3,9]],
'nature': [0.704426909, 0.611375508],'smiles':'CCC(C)(C)C(C)(C)C'},
"正壬烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[6,7],[7, 8],[8,9]],'nature':[0.8479,0.549125],'smiles':'CCCCCCCCC'},
"2,3,3-三甲基己烷": {"features": "CH3-CH-C-CH2-CH2-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7], [3, 8], [3, 9]],
'nature': [0.587535108, 0.362971561],'smiles':'CCCC(C)(C)C(C)C'},
"2,6-二甲基庚烷": {"features": "CH3-CH-CH2-CH2-CH2-CH-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [3, 8], [6, 9]],
'nature': [0.455396549, 0.395240859],'smiles':'CC(C)CCCC(C)C'},
"3,3-二乙基戊烷": {"features": "CH3-CH2-C-CH2-CH3-CH2-CH3-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6], [6, 7], [3, 8], [8, 9]],
'nature': [0.642102448, 0.557283807],'smiles':'CCC(CC)(CC)CC'},
"2,4,4-三甲基己烷": {"features": "CH3-CH-CH2-C-CH2-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7], [4, 8], [4, 9]],
'nature': [0.427310419, 0.370864771],'smiles':'CCC(C)(C)CC(C)C'},
"2,2,4,4-四甲基戊烷": {"features": "CH3-C-CH2-C-CH3-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7], [4, 8], [4, 9]],
'nature': [0.552761803, 0.479744631],'smiles':'CC(C)(C)CC(C)(C)C'},
"2,2-二甲基庚烷": {"features": "CH3-C-CH2-CH2-CH2-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [2, 8], [2, 9]],
'nature': [0.428380366, 0.371793384],'smiles':'CCCCCC(C)(C)C'},
"2,3,3,4-四甲基戊烷": {"features": "CH3-CH-C-CH-CH3-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [3, 7], [3, 8], [4, 9]],
'nature': [0.457536445, 0.397098085],'smiles':'CC(C)C(C)(C)C(C)C'},
"甲醇": {"features": "OH-CH3",
"bonds": [[1, 2]],
'nature': [0.469573358, 0.40754498],'smiles':'CO'},
"乙醇": {"features": "OH-CH2-CH3",
"bonds": [[1, 2], [2, 3]],
'nature': [0.42543801, 0.369239698],'smiles':'CCO'},
"丙醇": {"features": "OH-CH2-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4]],
'nature': [0.393339575, 0.341381312],'smiles':'CCCO'},
"异丙醇": {"features": "OH-CH-CH3-CH3",
"bonds": [[1, 2], [2, 3], [2, 4]],
'nature': [0.491239802, 0.426349391],'smiles':'CC(C)O'},
"仲丁醇": {"features": "OH-CH-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5]],
'nature': [0.423833088, 0.367846779],'smiles':'CCC(C)O'},
"异丁醇": {"features": "OH-CH2-CH-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [3, 5]],
'nature': [0.441754714, 0.383401045],'smiles':'CC(C)CO'},
"正丁醇": {"features": "OH-CH2-CH2-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5]],
'nature': [0.490437341, 0.425652931],'smiles':'CCCCO'},
"3-戊醇": {"features": "OH-C-CH2-CH3-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [5, 6]],
'nature': [0.709241674, 0.615554266],'smiles':'CCC(CC)O'},
"正戊醇": {"features": "OH-CH2-CH2-CH2-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]],
'nature': [0.51959342, 0.450957632],'smiles':'CCCCCO'},
"2-甲基丁醇": {"features": "OH-CH-CH2-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6]],
'nature': [0.543399759, 0.471619269],'smiles':'CCC(C)CO'},
"2-戊醇": {"features": "OH-CH2-CH2-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]],
'nature': [0.53537515, 0.464654672],'smiles':'CCCC(C)O'},
"甲基叔丁基醚": {"features": "CH3-C-O-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]],
'nature': [0.440149793, 0.382008125],'smiles':'CC(C)(C)OC'},
"新戊醇": {"features": "OH-CH2-C-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [3, 5], [3, 6]],
'nature': [0.871071285, 0.756006965],'smiles':'CC(C)(C)CO'},
"异戊醇": {"features": "OH-CH2-CH2-C-CH3-CH3-CH3",###
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [4, 6], [4, 7]],
'nature': [0.417145914, 0.362042948],'smiles':'CC(C)CCO'},
"2-己醇": {"features": "OH-CH-CH2-CH2-CH2-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7]],
'nature': [0.840310285, 0.729309344],'smiles':'CCCCC(C)O'},
"3-甲基-3-戊醇": {"features": "OH-C-CH2-CH3-CH3-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6], [6, 7]],
'nature': [0.667513709, 0.579338363],'smiles':'CCC(C)(CC)O'},
"2,3-二甲基-2-丁醇": {"features": "OH-C-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]],
'nature': [0.693192457, 0.601625073],'smiles':'CC(C)C(C)(C)O'},
"2-甲基-2-戊醇": {"features": "OH-C-CH2-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7]],
'nature': [0.455129062, 0.395008706],'smiles':'CCCC(C)(C)O'},
"正己醇": {"features": "OH-CH2-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]],'nature':[0.611341447,0.530586187],'smiles':'CCCCCCO'},
"甲醚": {"features": "CH3-O-CH3", "bonds": [[1, 2], [2, 3]], 'nature': [0.352146583, 0.305629716],'smiles':'COC'},
"甲乙醚": {"features": "CH3-CH2-O-CH3", "bonds": [[1, 2], [2, 3], [3, 4]],'nature':[0.428380366,0.371793384],'smiles':'CCOC'},
"乙醚": {"features": "CH3-CH2-O-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5]],'nature':[0.419553297,0.364132327],'smiles':'CCOCC'},
"甲丁醚": {"features": "CH3-CH2-CH2-CH2-O-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]],
'nature': [0.421693192, 0.365989553],'smiles':'CCCCOC'},
"2-甲基-2-丁醇": {"features": "OH-C-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]],
'nature': [0.706299318, 0.61300058],'smiles':'CCC(C)(C)O'},
"乙丙醚": {"features": "CH3-CH2-CH2-O-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]],
'nature': [0.389594757, 0.338131167],'smiles':'CCCOCC'},
"叔丁基乙醚": {"features": "CH3-C-O-CH2-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7]],
'nature': [0.479202889, 0.415902496],'smiles':'CCOC(C)(C)C'},
"异丙醚": {"features": "CH3-CH-O-CH-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [4, 7]],
'nature': [0.49846195, 0.432617528],'smiles':'CC(C)OC(C)C'},
"正丙醚": {"features": "CH3-CH2-CH2-O-CH2-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]],
'nature': [0.40430654, 0.350899594],'smiles':'CCCOCCC'},
"甲基叔戊醚": {"features": "CH3-CH2-C-O-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6], [3, 7]],
'nature': [0.516651063, 0.448403947],'smiles':'CCC(C)(C)OC'},
"3,3-二甲基-2-丁醇": {"features": "OH-C-CH-CH3-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6], [3, 7]],
'nature': [0.745619901, 0.647127104],'smiles':'CC(C(C)(C)C)O'},
"乙基丁基醚": {"features": "CH3-CH2-CH2-CH2-O-CH2-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]],
'nature': [0.398956801, 0.346256529],'smiles':'CCCCOCC'},
"3,3-二甲基-1-丁基酸酯": {"features": "OH-CH2-CH2-C-CH3-CH3-CH3",
"bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [4, 6], [4, 7]],
'nature': [0.570148455, 0.494834591],'smiles':'CC(C)(C)CCO'},
}
with open('train_date/train_date.json','w',encoding='utf-8') as f:
json.dump(data, f, indent=4)
pass
DateBuild=DateBuild()
def padding( chemic, lens=config.seq_len):
while len(chemic) < lens:
chemic.append(0)
return chemic
def dec_input_padding( chemical, lens=config.seq_len):
chemical.append(1)
chemical.insert(0,2)
while len(chemical)<lens:
chemical.append(0)
return chemical
def target_padding( chemical, lens=config.seq_len):
chemical.append(1)
while len(chemical)<lens:
chemical.append(0)
return chemical
class Chemical_Trans:
def __init__(self,len=config.seq_len):
with open('vocab/vocabulary.json', 'r', encoding='utf-8') as f:
f = json.load(f)
self.vocab = f
self.len = len
with open('vocab/sm_voc.json', 'r', encoding='utf-8') as f:
f = json.load(f)
self.sm_voc = f
def CCO(self,adj_matrix):
source_nodes = []
target_nodes = []
# 遍历邻接矩阵只记录i < j的边
for i in range(len(adj_matrix)):
for j in range(i + 1, len(adj_matrix[i])): # 注意这里从i + 1开始
if adj_matrix[i][j] != 0: # 存在边
source_nodes.append(i)
target_nodes.append(j)
return [source_nodes, target_nodes]
def chemical_trans(self, chemical):
chemical = chemical.split('-')
sequence = []
for i in chemical:
sequence.append(self.vocab[i])
return sequence
def chemical_trans1(self, chemical):
chemical = list(chemical)
sequence = []
for i in chemical:
sequence.append(self.sm_voc[i])
return sequence
def generate_adjacency_matrix(self,num_vertices, edges, directed=False):
# 初始化邻接矩阵为0
adjacency_matrix = [[0] * num_vertices for _ in range(num_vertices)]
# 遍历边的列表,更新邻接矩阵
for u, v in edges:
# 确保顶点索引在有效范围内
if 0 <= u-1 < num_vertices and 0 <= v-1 < num_vertices:
adjacency_matrix[u-1][v-1] = 1
if not directed:
adjacency_matrix[v-1][u-1] = 1
return adjacency_matrix
def adjacency_to_degree_matrix(self,adjacency_matrix):
# 确保输入是NumPy数组
adjacency_matrix = np.array(adjacency_matrix)
# 计算每个顶点的度数(即每行的和)
degrees = np.sum(adjacency_matrix, axis=1)
# 构造度矩阵(对角矩阵,对角线元素为度数)
degree_matrix = np.zeros_like(adjacency_matrix)
np.fill_diagonal(degree_matrix, degrees)
return degree_matrix
def __len__(self):
return len(self.vocab)
def __getitem__(self, key):
return self.vocab.keys()
Chemical_Trans=Chemical_Trans()
class Chem_Dataset(Dataset):
def __init__(self):
super(Dataset).__init__()
self.chemic_list=[ '3-戊醇','2-甲基庚烷','乙醚','2,3,3-三甲基戊烷','异丁烷','丙醇']
self.input_list=[]
self.bonds=[]
self.vectors=[]
self.target_list=[]
self.dujuzheng=[]
self.CCO=[]
self.enc=[]
with open('train_date/train_date.json', 'r', encoding='utf-8') as f:
f=json.load(f)
x=[f[i] for i in self.chemic_list]
for i in x:
self.input_list.append(padding(Chemical_Trans.chemical_trans(i['features'])))
self.bonds.append(Chemical_Trans.generate_adjacency_matrix(len(i['features'].split('-')),edges=i['bonds']))
self.vectors.append(i['bonds'])
self.target_list.append(i['nature'])
self.dujuzheng.append(Chemical_Trans.adjacency_to_degree_matrix(self.bonds[-1]))
self.CCO.append(Chemical_Trans.CCO(self.bonds[-1]))
self.enc.append(padding(Chemical_Trans.chemical_trans1(i['smiles'])))
def __len__(self):
return len(self.input_list)
def __getitem__(self, index):
input_list=self.input_list[index]
bonds=self.bonds[index]
vector=self.vectors[index]
target=self.target_list[index]
dujuzheng=self.dujuzheng[index]
CCO=self.CCO[index]
enc=self.enc[index]
return input_list, bonds,target,dujuzheng,CCO,enc
def chem_collate_fn(batch):
input_batch,bonds,target,dujuzheng,CCO,enc = zip(*batch) # 解包批次数据为输入和输出列表
input_tensor=torch.tensor([i for i in input_batch])
target_tensor=torch.tensor([i for i in target])
CCO=torch.tensor([i for i in CCO]) # 这里的CCO实际上是edge_index
edge_index=CCO
enc=torch.tensor([i for i in enc])
return input_tensor,edge_index,target_tensor,enc
Chem_Dataset = DataLoader(Chem_Dataset(), batch_size=1, shuffle=True, collate_fn=chem_collate_fn)
if __name__ == '__main__':
for input_tensor,CCO,target_tensor,enc in Chem_Dataset:
print(enc.shape)