import json,config import torch from torch.utils.data import Dataset,DataLoader import numpy as np class DateBuild: def voc_build(self): voc={'CH4':0,'CH3':1,'CH2':2,'CH1':3,'CH0':4} with open('vocab/vocabulary.json','w',encoding='utf-8') as f: json.dump(voc, f, indent=4) def train_date(self): data={"乙烷": {"features": "CH3-CH3", "bonds": [[1, 2]],'nature':[0.241674468,0.209750435],'smiles':'CC'}, "丙烷": {"features": "CH3-CH2-CH3", "bonds": [[1, 2], [2, 3]],'nature':[0.228835094,0.198607081],'smiles':'CCC'}, "丁烷": {"features": "CH3-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4]],'nature':[0.360973653, 0.313290772],'smiles':'CCCC'}, "异丁烷": {"features": "CH3-CH-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 4]],'nature':[0.360706166,0.313058619],'smiles':'CC(C)C'}, "正戊烷": {"features": "CH3-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5]],'nature':[0.383710044,0.333023796],'smiles':'CCCCC'}, "新戊烷": {"features": "CH3-C-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [2, 4], [2, 5]],'nature':[0.68677277,0.596053395],'smiles':'CC(C)(C)C'}, "2-甲基丁烷": {"features": "CH3-CH-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5]],'nature':[0.68677277 ,0.596053395],'smiles':'CCC(C)C'}, "己烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]],'nature':[0.475725558,0.412884504],'smiles':'CCCCCC'}, "2,3-二甲基丁烷": {"features": "CH3-CH-CH-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [3, 6]],'nature':[0.386117427,0.335113175],'smiles':'CC(C)C(C)C'}, "2-甲基戊烷": {"features": "CH3-CH-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6]], 'nature': [0.319780661, 0.277539176],'smiles':'CCCC(C)C'}, "2,2-二甲基丁烷": {"features": "CH3-C-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]],'nature':[0.465828541,0.404294835],'smiles':'CCC(C)(C)C'}, "3,3-二甲基戊烷": {"features": "CH3-CH2-C-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6],[3,7]], 'nature': [0.369800722, 0.320951828],'smiles':'CCC(C)(C)CC'}, "2-甲基己烷": {"features": "CH3-CH-CH2-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[2, 7],],'nature':[0.414471045,0.359721416],'smiles':'CCCCC(C)C'}, "2,4-二甲基戊烷": {"features": "CH3-CH-CH2-CH-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6],[4, 7]],'nature':[0.409923766,0.355774811],'smiles':'CC(C)CC(C)C'}, "3-甲基已烷": {"features": "CH3-CH2-CH-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[3, 6]],'nature':[0.412331149,0.35786419],'smiles':'CCCC(C)CC'}, "2,2,3-三甲基丁烷": {"features": "CH3-CH-CH-CH-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [3, 6], [3, 7]],'nature':[0.663768891,0.576088218],'smiles':'CC(C)C(C)(C)C'}, "3-乙基戊烷": {"features": "CH3-CH2-CH-CH2-CH3-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6], [6, 7]], 'nature': [0.413401097, 0.358792803],'smiles':'CCC(CC)CC'}, "正庚烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]], 'nature': [0.488297445, 0.423795705],'smiles':'CCCCCCC'}, "2,2-二甲基戊烷": {"features": "CH3-C-CH2-CH2-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[2, 7],[2,8]],'nature':[0.399491775,0.346720836],'smiles':'CCCC(C)(C)C'}, "2-甲基庚烷": {"features": "CH3-CH-CH2-CH2-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [2, 8]], 'nature': [0.439347332, 0.381311666],'smiles':'CCCCCC(C)C'}, "2,2-二甲基己烷": {"features": "CH3-C-CH2-CH2-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7], [2, 8]], 'nature': [0.406713923, 0.352988973],'smiles':'CCCCC(C)(C)C'}, "六甲基乙烷": {"features": "CH3-C-C-CH3-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6], [3, 7], [3, 8]], 'nature': [1, 0.867904817],'smiles':'CC(C)(C)C(C)(C)C'}, "2,5-二甲基己烷": {"features": "CH3-CH-CH2-CH2-CH-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7],[5,8]], 'nature': [0.487227498, 0.422867092],'smiles':'CC(C)CCC(C)C'}, "2,3,4-三甲基戊烷": {"features": "CH3-CH-CH-CH-CH2-CH3-CH3-CH3-CH3", "bonds":[[1, 2], [2, 3], [3, 4], [2, 5], [2, 6],[3, 7],[4, 8]],'nature':[0.438544871,0.380615206],'smiles':'CC(C)C(C)C(C)C'}, "4-甲基庚烷": {"features": "CH3-CH2-CH2-CH-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [4, 8]],'nature':[0.40698141,0.353221126],'smiles':'CCCC(C)CCC'}, "正辛烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[2, 6]],'nature':[0.578708038,0.502263494],'smiles':'CCCCCCCC'}, "3-乙基-3-甲基戊烷": {"features": "CH3-CH2-C-CH2-CH3-CH3-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6],[3, 7],[7, 8]],'nature':[0.487494985,0.423099246],'smiles':'CCC(C)(CC)CC'}, "2,3,3-三甲基戊烷": {"features": "CH3-CH-C-CH2-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [3, 7],[3,8]], 'nature': [0.460746289, 0.399883923],'smiles':'CCC(C)(C)C(C)C'}, "异辛烷": {"features": "CH3-CH-CH2-CH2-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [2, 8]],'nature':[0.443627123,0.385026117],'smiles':'CCCCC(C)C'}, "3,3-二甲基已烷": {"features": "CH3-CH2-C-CH2-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [3, 7], [3, 8]],'nature':[0.393339575,0.341381312],'smiles':'CCCC(C)(C)CC'}, "2,4-二甲基-3-乙基戊烷": {"features": "CH3-CH-CH-CH-CH3-CH3-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [3, 7],[7,8],[4,9]], 'nature': [0.403236592, 0.349970981],'smiles':'CCC(C(C)C)C(C)C'}, "2,2-二甲基-3-乙基戊烷": {"features": "CH3-C-CH-CH2-CH3-CH3-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7],[3,8],[8,9]], 'nature': [0.46502608, 0.403598375],'smiles':'CCC(CC)C(C)(C)C'}, "2,2,5-三甲基己烷": {"features": "CH3-C-CH2-CH2-CH-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7],[2,8],[5,9]], 'nature': [0.447906915, 0.388740569],'smiles':'CC(C)CCC(C)(C)C'}, "2,2,3,3-四甲基戊烷": {"features": "CH3-C-C-CH2-CH3-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7],[3,8],[3,9]], 'nature': [0.704426909, 0.611375508],'smiles':'CCC(C)(C)C(C)(C)C'}, "正壬烷": {"features": "CH3-CH2-CH2-CH2-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6],[6,7],[7, 8],[8,9]],'nature':[0.8479,0.549125],'smiles':'CCCCCCCCC'}, "2,3,3-三甲基己烷": {"features": "CH3-CH-C-CH2-CH2-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7], [3, 8], [3, 9]], 'nature': [0.587535108, 0.362971561],'smiles':'CCCC(C)(C)C(C)C'}, "2,6-二甲基庚烷": {"features": "CH3-CH-CH2-CH2-CH2-CH-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [3, 8], [6, 9]], 'nature': [0.455396549, 0.395240859],'smiles':'CC(C)CCCC(C)C'}, "3,3-二乙基戊烷": {"features": "CH3-CH2-C-CH2-CH3-CH2-CH3-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6], [6, 7], [3, 8], [8, 9]], 'nature': [0.642102448, 0.557283807],'smiles':'CCC(CC)(CC)CC'}, "2,4,4-三甲基己烷": {"features": "CH3-CH-CH2-C-CH2-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7], [4, 8], [4, 9]], 'nature': [0.427310419, 0.370864771],'smiles':'CCC(C)(C)CC(C)C'}, "2,2,4,4-四甲基戊烷": {"features": "CH3-C-CH2-C-CH3-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7], [4, 8], [4, 9]], 'nature': [0.552761803, 0.479744631],'smiles':'CC(C)(C)CC(C)(C)C'}, "2,2-二甲基庚烷": {"features": "CH3-C-CH2-CH2-CH2-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [2, 8], [2, 9]], 'nature': [0.428380366, 0.371793384],'smiles':'CCCCCC(C)(C)C'}, "2,3,3,4-四甲基戊烷": {"features": "CH3-CH-C-CH-CH3-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [3, 7], [3, 8], [4, 9]], 'nature': [0.457536445, 0.397098085],'smiles':'CC(C)C(C)(C)C(C)C'}, "甲醇": {"features": "OH-CH3", "bonds": [[1, 2]], 'nature': [0.469573358, 0.40754498],'smiles':'CO'}, "乙醇": {"features": "OH-CH2-CH3", "bonds": [[1, 2], [2, 3]], 'nature': [0.42543801, 0.369239698],'smiles':'CCO'}, "丙醇": {"features": "OH-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4]], 'nature': [0.393339575, 0.341381312],'smiles':'CCCO'}, "异丙醇": {"features": "OH-CH-CH3-CH3", "bonds": [[1, 2], [2, 3], [2, 4]], 'nature': [0.491239802, 0.426349391],'smiles':'CC(C)O'}, "仲丁醇": {"features": "OH-CH-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5]], 'nature': [0.423833088, 0.367846779],'smiles':'CCC(C)O'}, "异丁醇": {"features": "OH-CH2-CH-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [3, 5]], 'nature': [0.441754714, 0.383401045],'smiles':'CC(C)CO'}, "正丁醇": {"features": "OH-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5]], 'nature': [0.490437341, 0.425652931],'smiles':'CCCCO'}, "3-戊醇": {"features": "OH-C-CH2-CH3-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [5, 6]], 'nature': [0.709241674, 0.615554266],'smiles':'CCC(CC)O'}, "正戊醇": {"features": "OH-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]], 'nature': [0.51959342, 0.450957632],'smiles':'CCCCCO'}, "2-甲基丁醇": {"features": "OH-CH-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6]], 'nature': [0.543399759, 0.471619269],'smiles':'CCC(C)CO'}, "2-戊醇": {"features": "OH-CH2-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]], 'nature': [0.53537515, 0.464654672],'smiles':'CCCC(C)O'}, "甲基叔丁基醚": {"features": "CH3-C-O-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]], 'nature': [0.440149793, 0.382008125],'smiles':'CC(C)(C)OC'}, "新戊醇": {"features": "OH-CH2-C-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [3, 5], [3, 6]], 'nature': [0.871071285, 0.756006965],'smiles':'CC(C)(C)CO'}, "异戊醇": {"features": "OH-CH2-CH2-C-CH3-CH3-CH3",### "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [4, 6], [4, 7]], 'nature': [0.417145914, 0.362042948],'smiles':'CC(C)CCO'}, "2-己醇": {"features": "OH-CH-CH2-CH2-CH2-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [2, 7]], 'nature': [0.840310285, 0.729309344],'smiles':'CCCCC(C)O'}, "3-甲基-3-戊醇": {"features": "OH-C-CH2-CH3-CH3-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6], [6, 7]], 'nature': [0.667513709, 0.579338363],'smiles':'CCC(C)(CC)O'}, "2,3-二甲基-2-丁醇": {"features": "OH-C-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]], 'nature': [0.693192457, 0.601625073],'smiles':'CC(C)C(C)(C)O'}, "2-甲基-2-戊醇": {"features": "OH-C-CH2-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7]], 'nature': [0.455129062, 0.395008706],'smiles':'CCCC(C)(C)O'}, "正己醇": {"features": "OH-CH2-CH2-CH2-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]],'nature':[0.611341447,0.530586187],'smiles':'CCCCCCO'}, "甲醚": {"features": "CH3-O-CH3", "bonds": [[1, 2], [2, 3]], 'nature': [0.352146583, 0.305629716],'smiles':'COC'}, "甲乙醚": {"features": "CH3-CH2-O-CH3", "bonds": [[1, 2], [2, 3], [3, 4]],'nature':[0.428380366,0.371793384],'smiles':'CCOC'}, "乙醚": {"features": "CH3-CH2-O-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5]],'nature':[0.419553297,0.364132327],'smiles':'CCOCC'}, "甲丁醚": {"features": "CH3-CH2-CH2-CH2-O-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]], 'nature': [0.421693192, 0.365989553],'smiles':'CCCCOC'}, "2-甲基-2-丁醇": {"features": "OH-C-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6]], 'nature': [0.706299318, 0.61300058],'smiles':'CCC(C)(C)O'}, "乙丙醚": {"features": "CH3-CH2-CH2-O-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]], 'nature': [0.389594757, 0.338131167],'smiles':'CCCOCC'}, "叔丁基乙醚": {"features": "CH3-C-O-CH2-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [2, 7]], 'nature': [0.479202889, 0.415902496],'smiles':'CCOC(C)(C)C'}, "异丙醚": {"features": "CH3-CH-O-CH-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [2, 6], [4, 7]], 'nature': [0.49846195, 0.432617528],'smiles':'CC(C)OC(C)C'}, "正丙醚": {"features": "CH3-CH2-CH2-O-CH2-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]], 'nature': [0.40430654, 0.350899594],'smiles':'CCCOCCC'}, "甲基叔戊醚": {"features": "CH3-CH2-C-O-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [3, 6], [3, 7]], 'nature': [0.516651063, 0.448403947],'smiles':'CCC(C)(C)OC'}, "3,3-二甲基-2-丁醇": {"features": "OH-C-CH-CH3-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [2, 5], [2, 6], [3, 7]], 'nature': [0.745619901, 0.647127104],'smiles':'CC(C(C)(C)C)O'}, "乙基丁基醚": {"features": "CH3-CH2-CH2-CH2-O-CH2-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]], 'nature': [0.398956801, 0.346256529],'smiles':'CCCCOCC'}, "3,3-二甲基-1-丁基酸酯": {"features": "OH-CH2-CH2-C-CH3-CH3-CH3", "bonds": [[1, 2], [2, 3], [3, 4], [4, 5], [4, 6], [4, 7]], 'nature': [0.570148455, 0.494834591],'smiles':'CC(C)(C)CCO'}, } with open('train_date/train_date.json','w',encoding='utf-8') as f: json.dump(data, f, indent=4) pass DateBuild=DateBuild() def padding( chemic, lens=config.seq_len): while len(chemic) < lens: chemic.append(0) return chemic def dec_input_padding( chemical, lens=config.seq_len): chemical.append(1) chemical.insert(0,2) while len(chemical)