You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1882 lines
73 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "markdown",
"id": "973482e4",
"metadata": {},
"source": [
"## Pandas读取数据"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "beb98a6b",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"id": "76818406",
"metadata": {},
"source": [
"## CSV文件的读取与保存"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "682bdd7c",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Molecule ChEMBL ID</th>\n",
" <th>Molecule Name</th>\n",
" <th>Molecule Max Phase</th>\n",
" <th>Molecular Weight</th>\n",
" <th>#RO5 Violations</th>\n",
" <th>AlogP</th>\n",
" <th>Compound Key</th>\n",
" <th>Smiles</th>\n",
" <th>Standard Type</th>\n",
" <th>Standard Relation</th>\n",
" <th>...</th>\n",
" <th>Target ChEMBL ID</th>\n",
" <th>Target Name</th>\n",
" <th>Target Organism</th>\n",
" <th>Target Type</th>\n",
" <th>Document ChEMBL ID</th>\n",
" <th>Source ID</th>\n",
" <th>Source Description</th>\n",
" <th>Document Journal</th>\n",
" <th>Document Year</th>\n",
" <th>Cell ChEMBL ID</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CHEMBL465295</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>371.39</td>\n",
" <td>0.0</td>\n",
" <td>1.89</td>\n",
" <td>29</td>\n",
" <td>CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1221246</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>Bioorg. Med. Chem. Lett.</td>\n",
" <td>2010.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>CHEMBL385800</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>216.02</td>\n",
" <td>0.0</td>\n",
" <td>2.13</td>\n",
" <td>50</td>\n",
" <td>Clc1cc(Cl)c2C(=O)C(=O)Nc2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>CHEMBL222190</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>181.58</td>\n",
" <td>0.0</td>\n",
" <td>1.47</td>\n",
" <td>43</td>\n",
" <td>Clc1cccc2C(=O)C(=O)Nc12</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>CHEMBL327012</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>181.58</td>\n",
" <td>0.0</td>\n",
" <td>1.47</td>\n",
" <td>33</td>\n",
" <td>Clc1ccc2NC(=O)C(=O)c2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CHEMBL222660</td>\n",
" <td>5-METHOXYISATIN</td>\n",
" <td>0</td>\n",
" <td>177.16</td>\n",
" <td>0.0</td>\n",
" <td>0.83</td>\n",
" <td>31</td>\n",
" <td>COc1ccc2NC(=O)C(=O)c2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1753</th>\n",
" <td>CHEMBL105</td>\n",
" <td>MITOMYCIN</td>\n",
" <td>4</td>\n",
" <td>334.33</td>\n",
" <td>0.0</td>\n",
" <td>-1.65</td>\n",
" <td>Mitomycin C</td>\n",
" <td>CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1754</th>\n",
" <td>CHEMBL1064</td>\n",
" <td>SIMVASTATIN</td>\n",
" <td>4</td>\n",
" <td>418.57</td>\n",
" <td>0.0</td>\n",
" <td>4.59</td>\n",
" <td>simvastatin</td>\n",
" <td>CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1755</th>\n",
" <td>CHEMBL841</td>\n",
" <td>LOPERAMIDE</td>\n",
" <td>4</td>\n",
" <td>477.05</td>\n",
" <td>1.0</td>\n",
" <td>5.09</td>\n",
" <td>loperamide</td>\n",
" <td>CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1756</th>\n",
" <td>CHEMBL196</td>\n",
" <td>ASCORBIC ACID</td>\n",
" <td>4</td>\n",
" <td>176.12</td>\n",
" <td>0.0</td>\n",
" <td>-1.41</td>\n",
" <td>ascorbic acid</td>\n",
" <td>OC[C@H](O)[C@H]1OC(=O)C(=C1O)O</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1757</th>\n",
" <td>CHEMBL741</td>\n",
" <td>LAMOTRIGINE</td>\n",
" <td>4</td>\n",
" <td>256.10</td>\n",
" <td>0.0</td>\n",
" <td>2.01</td>\n",
" <td>LAMOTRIGINE</td>\n",
" <td>Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1758 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" Molecule ChEMBL ID Molecule Name Molecule Max Phase \\\n",
"0 CHEMBL465295 NaN 0 \n",
"1 CHEMBL385800 NaN 0 \n",
"2 CHEMBL222190 NaN 0 \n",
"3 CHEMBL327012 NaN 0 \n",
"4 CHEMBL222660 5-METHOXYISATIN 0 \n",
"... ... ... ... \n",
"1753 CHEMBL105 MITOMYCIN 4 \n",
"1754 CHEMBL1064 SIMVASTATIN 4 \n",
"1755 CHEMBL841 LOPERAMIDE 4 \n",
"1756 CHEMBL196 ASCORBIC ACID 4 \n",
"1757 CHEMBL741 LAMOTRIGINE 4 \n",
"\n",
" Molecular Weight #RO5 Violations AlogP Compound Key \\\n",
"0 371.39 0.0 1.89 29 \n",
"1 216.02 0.0 2.13 50 \n",
"2 181.58 0.0 1.47 43 \n",
"3 181.58 0.0 1.47 33 \n",
"4 177.16 0.0 0.83 31 \n",
"... ... ... ... ... \n",
"1753 334.33 0.0 -1.65 Mitomycin C \n",
"1754 418.57 0.0 4.59 simvastatin \n",
"1755 477.05 1.0 5.09 loperamide \n",
"1756 176.12 0.0 -1.41 ascorbic acid \n",
"1757 256.10 0.0 2.01 LAMOTRIGINE \n",
"\n",
" Smiles Standard Type \\\n",
"0 CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H... Ki \n",
"1 Clc1cc(Cl)c2C(=O)C(=O)Nc2c1 Ki \n",
"2 Clc1cccc2C(=O)C(=O)Nc12 Ki \n",
"3 Clc1ccc2NC(=O)C(=O)c2c1 Ki \n",
"4 COc1ccc2NC(=O)C(=O)c2c1 Ki \n",
"... ... ... \n",
"1753 CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(... Ki \n",
"1754 CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)... Ki \n",
"1755 CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc... Ki \n",
"1756 OC[C@H](O)[C@H]1OC(=O)C(=C1O)O Ki \n",
"1757 Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl Ki \n",
"\n",
" Standard Relation ... Target ChEMBL ID Target Name \\\n",
"0 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1 '>' ... CHEMBL220 Acetylcholinesterase \n",
"2 '>' ... CHEMBL220 Acetylcholinesterase \n",
"3 '>' ... CHEMBL220 Acetylcholinesterase \n",
"4 '>' ... CHEMBL220 Acetylcholinesterase \n",
"... ... ... ... ... \n",
"1753 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1754 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1755 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1756 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1757 NaN ... CHEMBL220 Acetylcholinesterase \n",
"\n",
" Target Organism Target Type Document ChEMBL ID Source ID \\\n",
"0 Homo sapiens SINGLE PROTEIN CHEMBL1221246 1 \n",
"1 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"2 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"3 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"4 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"... ... ... ... ... \n",
"1753 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1754 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1755 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1756 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1757 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"\n",
" Source Description Document Journal Document Year \\\n",
"0 Scientific Literature Bioorg. Med. Chem. Lett. 2010.0 \n",
"1 Scientific Literature J. Med. Chem. 2007.0 \n",
"2 Scientific Literature J. Med. Chem. 2007.0 \n",
"3 Scientific Literature J. Med. Chem. 2007.0 \n",
"4 Scientific Literature J. Med. Chem. 2007.0 \n",
"... ... ... ... \n",
"1753 DrugMatrix NaN NaN \n",
"1754 DrugMatrix NaN NaN \n",
"1755 DrugMatrix NaN NaN \n",
"1756 DrugMatrix NaN NaN \n",
"1757 DrugMatrix NaN NaN \n",
"\n",
" Cell ChEMBL ID \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"1753 CHEMBL3307715 \n",
"1754 CHEMBL3307715 \n",
"1755 CHEMBL3307715 \n",
"1756 CHEMBL3307715 \n",
"1757 CHEMBL3307715 \n",
"\n",
"[1758 rows x 41 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 读取CSV文件\n",
"data = pd.read_csv('output.csv')\n",
"# 查看数据\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "2c72ff5c",
"metadata": {},
"outputs": [],
"source": [
"data.to_csv('output1.csv',index=True)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "d98130c9",
"metadata": {},
"outputs": [],
"source": [
"data.to_csv('output2.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "3fd5ac0f",
"metadata": {},
"outputs": [],
"source": [
"data.to_csv('output3.csv', encoding='utf-8', index=False, sep=';')"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "3a7a6bba",
"metadata": {},
"outputs": [],
"source": [
"data.to_csv('output4.csv', encoding='GBK', index=False, sep=';')"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "f8467281",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Molecule ChEMBL ID</th>\n",
" <th>Molecule Name</th>\n",
" <th>Molecule Max Phase</th>\n",
" <th>Molecular Weight</th>\n",
" <th>#RO5 Violations</th>\n",
" <th>AlogP</th>\n",
" <th>Compound Key</th>\n",
" <th>Smiles</th>\n",
" <th>Standard Type</th>\n",
" <th>Standard Relation</th>\n",
" <th>...</th>\n",
" <th>Target ChEMBL ID</th>\n",
" <th>Target Name</th>\n",
" <th>Target Organism</th>\n",
" <th>Target Type</th>\n",
" <th>Document ChEMBL ID</th>\n",
" <th>Source ID</th>\n",
" <th>Source Description</th>\n",
" <th>Document Journal</th>\n",
" <th>Document Year</th>\n",
" <th>Cell ChEMBL ID</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CHEMBL465295</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>371.39</td>\n",
" <td>0.0</td>\n",
" <td>1.89</td>\n",
" <td>29</td>\n",
" <td>CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1221246</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>Bioorg. Med. Chem. Lett.</td>\n",
" <td>2010.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>CHEMBL385800</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>216.02</td>\n",
" <td>0.0</td>\n",
" <td>2.13</td>\n",
" <td>50</td>\n",
" <td>Clc1cc(Cl)c2C(=O)C(=O)Nc2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>CHEMBL222190</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>181.58</td>\n",
" <td>0.0</td>\n",
" <td>1.47</td>\n",
" <td>43</td>\n",
" <td>Clc1cccc2C(=O)C(=O)Nc12</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>CHEMBL327012</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>181.58</td>\n",
" <td>0.0</td>\n",
" <td>1.47</td>\n",
" <td>33</td>\n",
" <td>Clc1ccc2NC(=O)C(=O)c2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CHEMBL222660</td>\n",
" <td>5-METHOXYISATIN</td>\n",
" <td>0</td>\n",
" <td>177.16</td>\n",
" <td>0.0</td>\n",
" <td>0.83</td>\n",
" <td>31</td>\n",
" <td>COc1ccc2NC(=O)C(=O)c2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1753</th>\n",
" <td>CHEMBL105</td>\n",
" <td>MITOMYCIN</td>\n",
" <td>4</td>\n",
" <td>334.33</td>\n",
" <td>0.0</td>\n",
" <td>-1.65</td>\n",
" <td>Mitomycin C</td>\n",
" <td>CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1754</th>\n",
" <td>CHEMBL1064</td>\n",
" <td>SIMVASTATIN</td>\n",
" <td>4</td>\n",
" <td>418.57</td>\n",
" <td>0.0</td>\n",
" <td>4.59</td>\n",
" <td>simvastatin</td>\n",
" <td>CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1755</th>\n",
" <td>CHEMBL841</td>\n",
" <td>LOPERAMIDE</td>\n",
" <td>4</td>\n",
" <td>477.05</td>\n",
" <td>1.0</td>\n",
" <td>5.09</td>\n",
" <td>loperamide</td>\n",
" <td>CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1756</th>\n",
" <td>CHEMBL196</td>\n",
" <td>ASCORBIC ACID</td>\n",
" <td>4</td>\n",
" <td>176.12</td>\n",
" <td>0.0</td>\n",
" <td>-1.41</td>\n",
" <td>ascorbic acid</td>\n",
" <td>OC[C@H](O)[C@H]1OC(=O)C(=C1O)O</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1757</th>\n",
" <td>CHEMBL741</td>\n",
" <td>LAMOTRIGINE</td>\n",
" <td>4</td>\n",
" <td>256.10</td>\n",
" <td>0.0</td>\n",
" <td>2.01</td>\n",
" <td>LAMOTRIGINE</td>\n",
" <td>Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1758 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" Molecule ChEMBL ID Molecule Name Molecule Max Phase \\\n",
"0 CHEMBL465295 NaN 0 \n",
"1 CHEMBL385800 NaN 0 \n",
"2 CHEMBL222190 NaN 0 \n",
"3 CHEMBL327012 NaN 0 \n",
"4 CHEMBL222660 5-METHOXYISATIN 0 \n",
"... ... ... ... \n",
"1753 CHEMBL105 MITOMYCIN 4 \n",
"1754 CHEMBL1064 SIMVASTATIN 4 \n",
"1755 CHEMBL841 LOPERAMIDE 4 \n",
"1756 CHEMBL196 ASCORBIC ACID 4 \n",
"1757 CHEMBL741 LAMOTRIGINE 4 \n",
"\n",
" Molecular Weight #RO5 Violations AlogP Compound Key \\\n",
"0 371.39 0.0 1.89 29 \n",
"1 216.02 0.0 2.13 50 \n",
"2 181.58 0.0 1.47 43 \n",
"3 181.58 0.0 1.47 33 \n",
"4 177.16 0.0 0.83 31 \n",
"... ... ... ... ... \n",
"1753 334.33 0.0 -1.65 Mitomycin C \n",
"1754 418.57 0.0 4.59 simvastatin \n",
"1755 477.05 1.0 5.09 loperamide \n",
"1756 176.12 0.0 -1.41 ascorbic acid \n",
"1757 256.10 0.0 2.01 LAMOTRIGINE \n",
"\n",
" Smiles Standard Type \\\n",
"0 CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H... Ki \n",
"1 Clc1cc(Cl)c2C(=O)C(=O)Nc2c1 Ki \n",
"2 Clc1cccc2C(=O)C(=O)Nc12 Ki \n",
"3 Clc1ccc2NC(=O)C(=O)c2c1 Ki \n",
"4 COc1ccc2NC(=O)C(=O)c2c1 Ki \n",
"... ... ... \n",
"1753 CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(... Ki \n",
"1754 CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)... Ki \n",
"1755 CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc... Ki \n",
"1756 OC[C@H](O)[C@H]1OC(=O)C(=C1O)O Ki \n",
"1757 Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl Ki \n",
"\n",
" Standard Relation ... Target ChEMBL ID Target Name \\\n",
"0 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1 '>' ... CHEMBL220 Acetylcholinesterase \n",
"2 '>' ... CHEMBL220 Acetylcholinesterase \n",
"3 '>' ... CHEMBL220 Acetylcholinesterase \n",
"4 '>' ... CHEMBL220 Acetylcholinesterase \n",
"... ... ... ... ... \n",
"1753 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1754 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1755 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1756 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1757 NaN ... CHEMBL220 Acetylcholinesterase \n",
"\n",
" Target Organism Target Type Document ChEMBL ID Source ID \\\n",
"0 Homo sapiens SINGLE PROTEIN CHEMBL1221246 1 \n",
"1 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"2 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"3 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"4 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"... ... ... ... ... \n",
"1753 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1754 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1755 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1756 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1757 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"\n",
" Source Description Document Journal Document Year \\\n",
"0 Scientific Literature Bioorg. Med. Chem. Lett. 2010.0 \n",
"1 Scientific Literature J. Med. Chem. 2007.0 \n",
"2 Scientific Literature J. Med. Chem. 2007.0 \n",
"3 Scientific Literature J. Med. Chem. 2007.0 \n",
"4 Scientific Literature J. Med. Chem. 2007.0 \n",
"... ... ... ... \n",
"1753 DrugMatrix NaN NaN \n",
"1754 DrugMatrix NaN NaN \n",
"1755 DrugMatrix NaN NaN \n",
"1756 DrugMatrix NaN NaN \n",
"1757 DrugMatrix NaN NaN \n",
"\n",
" Cell ChEMBL ID \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"1753 CHEMBL3307715 \n",
"1754 CHEMBL3307715 \n",
"1755 CHEMBL3307715 \n",
"1756 CHEMBL3307715 \n",
"1757 CHEMBL3307715 \n",
"\n",
"[1758 rows x 41 columns]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 读取CSV文件\n",
"data1 = pd.read_csv('output3.csv',sep=';')\n",
"# 查看数据\n",
"data1"
]
},
{
"cell_type": "markdown",
"id": "ac843c56",
"metadata": {},
"source": [
"### Excel文件的读取与保存"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "cccd0009",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Molecule ChEMBL ID</th>\n",
" <th>Molecule Name</th>\n",
" <th>Molecule Max Phase</th>\n",
" <th>Molecular Weight</th>\n",
" <th>#RO5 Violations</th>\n",
" <th>AlogP</th>\n",
" <th>Compound Key</th>\n",
" <th>Smiles</th>\n",
" <th>Standard Type</th>\n",
" <th>Standard Relation</th>\n",
" <th>...</th>\n",
" <th>Target ChEMBL ID</th>\n",
" <th>Target Name</th>\n",
" <th>Target Organism</th>\n",
" <th>Target Type</th>\n",
" <th>Document ChEMBL ID</th>\n",
" <th>Source ID</th>\n",
" <th>Source Description</th>\n",
" <th>Document Journal</th>\n",
" <th>Document Year</th>\n",
" <th>Cell ChEMBL ID</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CHEMBL465295</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>371.39</td>\n",
" <td>0.0</td>\n",
" <td>1.89</td>\n",
" <td>29</td>\n",
" <td>CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1221246</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>Bioorg. Med. Chem. Lett.</td>\n",
" <td>2010.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>CHEMBL385800</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>216.02</td>\n",
" <td>0.0</td>\n",
" <td>2.13</td>\n",
" <td>50</td>\n",
" <td>Clc1cc(Cl)c2C(=O)C(=O)Nc2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>CHEMBL222190</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>181.58</td>\n",
" <td>0.0</td>\n",
" <td>1.47</td>\n",
" <td>43</td>\n",
" <td>Clc1cccc2C(=O)C(=O)Nc12</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>CHEMBL327012</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>181.58</td>\n",
" <td>0.0</td>\n",
" <td>1.47</td>\n",
" <td>33</td>\n",
" <td>Clc1ccc2NC(=O)C(=O)c2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CHEMBL222660</td>\n",
" <td>5-METHOXYISATIN</td>\n",
" <td>0</td>\n",
" <td>177.16</td>\n",
" <td>0.0</td>\n",
" <td>0.83</td>\n",
" <td>31</td>\n",
" <td>COc1ccc2NC(=O)C(=O)c2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1753</th>\n",
" <td>CHEMBL105</td>\n",
" <td>MITOMYCIN</td>\n",
" <td>4</td>\n",
" <td>334.33</td>\n",
" <td>0.0</td>\n",
" <td>-1.65</td>\n",
" <td>Mitomycin C</td>\n",
" <td>CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1754</th>\n",
" <td>CHEMBL1064</td>\n",
" <td>SIMVASTATIN</td>\n",
" <td>4</td>\n",
" <td>418.57</td>\n",
" <td>0.0</td>\n",
" <td>4.59</td>\n",
" <td>simvastatin</td>\n",
" <td>CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1755</th>\n",
" <td>CHEMBL841</td>\n",
" <td>LOPERAMIDE</td>\n",
" <td>4</td>\n",
" <td>477.05</td>\n",
" <td>1.0</td>\n",
" <td>5.09</td>\n",
" <td>loperamide</td>\n",
" <td>CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1756</th>\n",
" <td>CHEMBL196</td>\n",
" <td>ASCORBIC ACID</td>\n",
" <td>4</td>\n",
" <td>176.12</td>\n",
" <td>0.0</td>\n",
" <td>-1.41</td>\n",
" <td>ascorbic acid</td>\n",
" <td>OC[C@H](O)[C@H]1OC(=O)C(=C1O)O</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1757</th>\n",
" <td>CHEMBL741</td>\n",
" <td>LAMOTRIGINE</td>\n",
" <td>4</td>\n",
" <td>256.10</td>\n",
" <td>0.0</td>\n",
" <td>2.01</td>\n",
" <td>LAMOTRIGINE</td>\n",
" <td>Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1758 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" Molecule ChEMBL ID Molecule Name Molecule Max Phase \\\n",
"0 CHEMBL465295 NaN 0 \n",
"1 CHEMBL385800 NaN 0 \n",
"2 CHEMBL222190 NaN 0 \n",
"3 CHEMBL327012 NaN 0 \n",
"4 CHEMBL222660 5-METHOXYISATIN 0 \n",
"... ... ... ... \n",
"1753 CHEMBL105 MITOMYCIN 4 \n",
"1754 CHEMBL1064 SIMVASTATIN 4 \n",
"1755 CHEMBL841 LOPERAMIDE 4 \n",
"1756 CHEMBL196 ASCORBIC ACID 4 \n",
"1757 CHEMBL741 LAMOTRIGINE 4 \n",
"\n",
" Molecular Weight #RO5 Violations AlogP Compound Key \\\n",
"0 371.39 0.0 1.89 29 \n",
"1 216.02 0.0 2.13 50 \n",
"2 181.58 0.0 1.47 43 \n",
"3 181.58 0.0 1.47 33 \n",
"4 177.16 0.0 0.83 31 \n",
"... ... ... ... ... \n",
"1753 334.33 0.0 -1.65 Mitomycin C \n",
"1754 418.57 0.0 4.59 simvastatin \n",
"1755 477.05 1.0 5.09 loperamide \n",
"1756 176.12 0.0 -1.41 ascorbic acid \n",
"1757 256.10 0.0 2.01 LAMOTRIGINE \n",
"\n",
" Smiles Standard Type \\\n",
"0 CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H... Ki \n",
"1 Clc1cc(Cl)c2C(=O)C(=O)Nc2c1 Ki \n",
"2 Clc1cccc2C(=O)C(=O)Nc12 Ki \n",
"3 Clc1ccc2NC(=O)C(=O)c2c1 Ki \n",
"4 COc1ccc2NC(=O)C(=O)c2c1 Ki \n",
"... ... ... \n",
"1753 CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(... Ki \n",
"1754 CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)... Ki \n",
"1755 CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc... Ki \n",
"1756 OC[C@H](O)[C@H]1OC(=O)C(=C1O)O Ki \n",
"1757 Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl Ki \n",
"\n",
" Standard Relation ... Target ChEMBL ID Target Name \\\n",
"0 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1 '>' ... CHEMBL220 Acetylcholinesterase \n",
"2 '>' ... CHEMBL220 Acetylcholinesterase \n",
"3 '>' ... CHEMBL220 Acetylcholinesterase \n",
"4 '>' ... CHEMBL220 Acetylcholinesterase \n",
"... ... ... ... ... \n",
"1753 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1754 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1755 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1756 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1757 NaN ... CHEMBL220 Acetylcholinesterase \n",
"\n",
" Target Organism Target Type Document ChEMBL ID Source ID \\\n",
"0 Homo sapiens SINGLE PROTEIN CHEMBL1221246 1 \n",
"1 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"2 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"3 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"4 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"... ... ... ... ... \n",
"1753 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1754 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1755 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1756 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1757 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"\n",
" Source Description Document Journal Document Year \\\n",
"0 Scientific Literature Bioorg. Med. Chem. Lett. 2010.0 \n",
"1 Scientific Literature J. Med. Chem. 2007.0 \n",
"2 Scientific Literature J. Med. Chem. 2007.0 \n",
"3 Scientific Literature J. Med. Chem. 2007.0 \n",
"4 Scientific Literature J. Med. Chem. 2007.0 \n",
"... ... ... ... \n",
"1753 DrugMatrix NaN NaN \n",
"1754 DrugMatrix NaN NaN \n",
"1755 DrugMatrix NaN NaN \n",
"1756 DrugMatrix NaN NaN \n",
"1757 DrugMatrix NaN NaN \n",
"\n",
" Cell ChEMBL ID \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"1753 CHEMBL3307715 \n",
"1754 CHEMBL3307715 \n",
"1755 CHEMBL3307715 \n",
"1756 CHEMBL3307715 \n",
"1757 CHEMBL3307715 \n",
"\n",
"[1758 rows x 41 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 读取 Excel 文件\n",
"data_excel = pd.read_excel('output.xlsx')\n",
"data_excel"
]
},
{
"cell_type": "markdown",
"id": "09110d1a",
"metadata": {},
"source": [
"在这个示例中我们使用read_excel()函数来读取Excel文件中的数据并将数据加载到一个DataFrame对象中。sheet_name参数用于指定要读取的工作表名称。如果省略此参数则默认读取第一个工作表。"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "eeaeade9",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\l'r's\\AppData\\Local\\conda\\conda\\envs\\python38\\lib\\site-packages\\pandas\\util\\_decorators.py:211: FutureWarning: the 'encoding' keyword is deprecated and will be removed in a future version. Please take steps to stop the use of 'encoding'\n",
" return func(*args, **kwargs)\n"
]
}
],
"source": [
"data.to_excel('output5.xlsx', encoding='GBK', index=False, sheet_name='Sheet1')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "d1950caf",
"metadata": {},
"outputs": [],
"source": [
"data_excel.to_excel('output5.xlsx', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "bdd897ec",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\l'r's\\AppData\\Local\\conda\\conda\\envs\\python38\\lib\\site-packages\\pandas\\util\\_decorators.py:211: FutureWarning: the 'encoding' keyword is deprecated and will be removed in a future version. Please take steps to stop the use of 'encoding'\n",
" return func(*args, **kwargs)\n"
]
}
],
"source": [
"data_excel.to_excel('output6.xlsx', encoding='GBK', index=False, sheet_name='Sheet2')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "e3f57ea1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Molecule ChEMBL ID</th>\n",
" <th>Molecule Name</th>\n",
" <th>Molecule Max Phase</th>\n",
" <th>Molecular Weight</th>\n",
" <th>#RO5 Violations</th>\n",
" <th>AlogP</th>\n",
" <th>Compound Key</th>\n",
" <th>Smiles</th>\n",
" <th>Standard Type</th>\n",
" <th>Standard Relation</th>\n",
" <th>...</th>\n",
" <th>Target ChEMBL ID</th>\n",
" <th>Target Name</th>\n",
" <th>Target Organism</th>\n",
" <th>Target Type</th>\n",
" <th>Document ChEMBL ID</th>\n",
" <th>Source ID</th>\n",
" <th>Source Description</th>\n",
" <th>Document Journal</th>\n",
" <th>Document Year</th>\n",
" <th>Cell ChEMBL ID</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CHEMBL465295</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>371.39</td>\n",
" <td>0.0</td>\n",
" <td>1.89</td>\n",
" <td>29</td>\n",
" <td>CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1221246</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>Bioorg. Med. Chem. Lett.</td>\n",
" <td>2010.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>CHEMBL385800</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>216.02</td>\n",
" <td>0.0</td>\n",
" <td>2.13</td>\n",
" <td>50</td>\n",
" <td>Clc1cc(Cl)c2C(=O)C(=O)Nc2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>CHEMBL222190</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>181.58</td>\n",
" <td>0.0</td>\n",
" <td>1.47</td>\n",
" <td>43</td>\n",
" <td>Clc1cccc2C(=O)C(=O)Nc12</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>CHEMBL327012</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>181.58</td>\n",
" <td>0.0</td>\n",
" <td>1.47</td>\n",
" <td>33</td>\n",
" <td>Clc1ccc2NC(=O)C(=O)c2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CHEMBL222660</td>\n",
" <td>5-METHOXYISATIN</td>\n",
" <td>0</td>\n",
" <td>177.16</td>\n",
" <td>0.0</td>\n",
" <td>0.83</td>\n",
" <td>31</td>\n",
" <td>COc1ccc2NC(=O)C(=O)c2c1</td>\n",
" <td>Ki</td>\n",
" <td>'&gt;'</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1144473</td>\n",
" <td>1</td>\n",
" <td>Scientific Literature</td>\n",
" <td>J. Med. Chem.</td>\n",
" <td>2007.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1753</th>\n",
" <td>CHEMBL105</td>\n",
" <td>MITOMYCIN</td>\n",
" <td>4</td>\n",
" <td>334.33</td>\n",
" <td>0.0</td>\n",
" <td>-1.65</td>\n",
" <td>Mitomycin C</td>\n",
" <td>CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1754</th>\n",
" <td>CHEMBL1064</td>\n",
" <td>SIMVASTATIN</td>\n",
" <td>4</td>\n",
" <td>418.57</td>\n",
" <td>0.0</td>\n",
" <td>4.59</td>\n",
" <td>simvastatin</td>\n",
" <td>CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1755</th>\n",
" <td>CHEMBL841</td>\n",
" <td>LOPERAMIDE</td>\n",
" <td>4</td>\n",
" <td>477.05</td>\n",
" <td>1.0</td>\n",
" <td>5.09</td>\n",
" <td>loperamide</td>\n",
" <td>CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc...</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1756</th>\n",
" <td>CHEMBL196</td>\n",
" <td>ASCORBIC ACID</td>\n",
" <td>4</td>\n",
" <td>176.12</td>\n",
" <td>0.0</td>\n",
" <td>-1.41</td>\n",
" <td>ascorbic acid</td>\n",
" <td>OC[C@H](O)[C@H]1OC(=O)C(=C1O)O</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1757</th>\n",
" <td>CHEMBL741</td>\n",
" <td>LAMOTRIGINE</td>\n",
" <td>4</td>\n",
" <td>256.10</td>\n",
" <td>0.0</td>\n",
" <td>2.01</td>\n",
" <td>LAMOTRIGINE</td>\n",
" <td>Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl</td>\n",
" <td>Ki</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>CHEMBL220</td>\n",
" <td>Acetylcholinesterase</td>\n",
" <td>Homo sapiens</td>\n",
" <td>SINGLE PROTEIN</td>\n",
" <td>CHEMBL1909046</td>\n",
" <td>15</td>\n",
" <td>DrugMatrix</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CHEMBL3307715</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1758 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" Molecule ChEMBL ID Molecule Name Molecule Max Phase \\\n",
"0 CHEMBL465295 NaN 0 \n",
"1 CHEMBL385800 NaN 0 \n",
"2 CHEMBL222190 NaN 0 \n",
"3 CHEMBL327012 NaN 0 \n",
"4 CHEMBL222660 5-METHOXYISATIN 0 \n",
"... ... ... ... \n",
"1753 CHEMBL105 MITOMYCIN 4 \n",
"1754 CHEMBL1064 SIMVASTATIN 4 \n",
"1755 CHEMBL841 LOPERAMIDE 4 \n",
"1756 CHEMBL196 ASCORBIC ACID 4 \n",
"1757 CHEMBL741 LAMOTRIGINE 4 \n",
"\n",
" Molecular Weight #RO5 Violations AlogP Compound Key \\\n",
"0 371.39 0.0 1.89 29 \n",
"1 216.02 0.0 2.13 50 \n",
"2 181.58 0.0 1.47 43 \n",
"3 181.58 0.0 1.47 33 \n",
"4 177.16 0.0 0.83 31 \n",
"... ... ... ... ... \n",
"1753 334.33 0.0 -1.65 Mitomycin C \n",
"1754 418.57 0.0 4.59 simvastatin \n",
"1755 477.05 1.0 5.09 loperamide \n",
"1756 176.12 0.0 -1.41 ascorbic acid \n",
"1757 256.10 0.0 2.01 LAMOTRIGINE \n",
"\n",
" Smiles Standard Type \\\n",
"0 CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H... Ki \n",
"1 Clc1cc(Cl)c2C(=O)C(=O)Nc2c1 Ki \n",
"2 Clc1cccc2C(=O)C(=O)Nc12 Ki \n",
"3 Clc1ccc2NC(=O)C(=O)c2c1 Ki \n",
"4 COc1ccc2NC(=O)C(=O)c2c1 Ki \n",
"... ... ... \n",
"1753 CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(... Ki \n",
"1754 CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)... Ki \n",
"1755 CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc... Ki \n",
"1756 OC[C@H](O)[C@H]1OC(=O)C(=C1O)O Ki \n",
"1757 Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl Ki \n",
"\n",
" Standard Relation ... Target ChEMBL ID Target Name \\\n",
"0 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1 '>' ... CHEMBL220 Acetylcholinesterase \n",
"2 '>' ... CHEMBL220 Acetylcholinesterase \n",
"3 '>' ... CHEMBL220 Acetylcholinesterase \n",
"4 '>' ... CHEMBL220 Acetylcholinesterase \n",
"... ... ... ... ... \n",
"1753 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1754 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1755 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1756 NaN ... CHEMBL220 Acetylcholinesterase \n",
"1757 NaN ... CHEMBL220 Acetylcholinesterase \n",
"\n",
" Target Organism Target Type Document ChEMBL ID Source ID \\\n",
"0 Homo sapiens SINGLE PROTEIN CHEMBL1221246 1 \n",
"1 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"2 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"3 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"4 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n",
"... ... ... ... ... \n",
"1753 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1754 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1755 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1756 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"1757 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n",
"\n",
" Source Description Document Journal Document Year \\\n",
"0 Scientific Literature Bioorg. Med. Chem. Lett. 2010.0 \n",
"1 Scientific Literature J. Med. Chem. 2007.0 \n",
"2 Scientific Literature J. Med. Chem. 2007.0 \n",
"3 Scientific Literature J. Med. Chem. 2007.0 \n",
"4 Scientific Literature J. Med. Chem. 2007.0 \n",
"... ... ... ... \n",
"1753 DrugMatrix NaN NaN \n",
"1754 DrugMatrix NaN NaN \n",
"1755 DrugMatrix NaN NaN \n",
"1756 DrugMatrix NaN NaN \n",
"1757 DrugMatrix NaN NaN \n",
"\n",
" Cell ChEMBL ID \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"1753 CHEMBL3307715 \n",
"1754 CHEMBL3307715 \n",
"1755 CHEMBL3307715 \n",
"1756 CHEMBL3307715 \n",
"1757 CHEMBL3307715 \n",
"\n",
"[1758 rows x 41 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 读取 Excel 文件\n",
"data_excel2 = pd.read_excel('output6.xlsx', sheet_name='Sheet2')\n",
"data_excel2"
]
},
{
"cell_type": "markdown",
"id": "9a23ec85",
"metadata": {},
"source": [
"## 重复数据删除"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e210b229",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}