{ "cells": [ { "cell_type": "markdown", "id": "973482e4", "metadata": {}, "source": [ "## Pandas读取数据" ] }, { "cell_type": "code", "execution_count": 1, "id": "beb98a6b", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "markdown", "id": "76818406", "metadata": {}, "source": [ "## CSV文件的读取与保存" ] }, { "cell_type": "code", "execution_count": 2, "id": "682bdd7c", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Molecule ChEMBL IDMolecule NameMolecule Max PhaseMolecular Weight#RO5 ViolationsAlogPCompound KeySmilesStandard TypeStandard Relation...Target ChEMBL IDTarget NameTarget OrganismTarget TypeDocument ChEMBL IDSource IDSource DescriptionDocument JournalDocument YearCell ChEMBL ID
0CHEMBL465295NaN0371.390.01.8929CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL12212461Scientific LiteratureBioorg. Med. Chem. Lett.2010.0NaN
1CHEMBL385800NaN0216.020.02.1350Clc1cc(Cl)c2C(=O)C(=O)Nc2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
2CHEMBL222190NaN0181.580.01.4743Clc1cccc2C(=O)C(=O)Nc12Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
3CHEMBL327012NaN0181.580.01.4733Clc1ccc2NC(=O)C(=O)c2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
4CHEMBL2226605-METHOXYISATIN0177.160.00.8331COc1ccc2NC(=O)C(=O)c2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
..................................................................
1753CHEMBL105MITOMYCIN4334.330.0-1.65Mitomycin CCO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1754CHEMBL1064SIMVASTATIN4418.570.04.59simvastatinCCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1755CHEMBL841LOPERAMIDE4477.051.05.09loperamideCN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1756CHEMBL196ASCORBIC ACID4176.120.0-1.41ascorbic acidOC[C@H](O)[C@H]1OC(=O)C(=C1O)OKiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1757CHEMBL741LAMOTRIGINE4256.100.02.01LAMOTRIGINENc1nnc(c(N)n1)c2cccc(Cl)c2ClKiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
\n", "

1758 rows × 41 columns

\n", "
" ], "text/plain": [ " Molecule ChEMBL ID Molecule Name Molecule Max Phase \\\n", "0 CHEMBL465295 NaN 0 \n", "1 CHEMBL385800 NaN 0 \n", "2 CHEMBL222190 NaN 0 \n", "3 CHEMBL327012 NaN 0 \n", "4 CHEMBL222660 5-METHOXYISATIN 0 \n", "... ... ... ... \n", "1753 CHEMBL105 MITOMYCIN 4 \n", "1754 CHEMBL1064 SIMVASTATIN 4 \n", "1755 CHEMBL841 LOPERAMIDE 4 \n", "1756 CHEMBL196 ASCORBIC ACID 4 \n", "1757 CHEMBL741 LAMOTRIGINE 4 \n", "\n", " Molecular Weight #RO5 Violations AlogP Compound Key \\\n", "0 371.39 0.0 1.89 29 \n", "1 216.02 0.0 2.13 50 \n", "2 181.58 0.0 1.47 43 \n", "3 181.58 0.0 1.47 33 \n", "4 177.16 0.0 0.83 31 \n", "... ... ... ... ... \n", "1753 334.33 0.0 -1.65 Mitomycin C \n", "1754 418.57 0.0 4.59 simvastatin \n", "1755 477.05 1.0 5.09 loperamide \n", "1756 176.12 0.0 -1.41 ascorbic acid \n", "1757 256.10 0.0 2.01 LAMOTRIGINE \n", "\n", " Smiles Standard Type \\\n", "0 CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H... Ki \n", "1 Clc1cc(Cl)c2C(=O)C(=O)Nc2c1 Ki \n", "2 Clc1cccc2C(=O)C(=O)Nc12 Ki \n", "3 Clc1ccc2NC(=O)C(=O)c2c1 Ki \n", "4 COc1ccc2NC(=O)C(=O)c2c1 Ki \n", "... ... ... \n", "1753 CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(... Ki \n", "1754 CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)... Ki \n", "1755 CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc... Ki \n", "1756 OC[C@H](O)[C@H]1OC(=O)C(=C1O)O Ki \n", "1757 Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl Ki \n", "\n", " Standard Relation ... Target ChEMBL ID Target Name \\\n", "0 NaN ... CHEMBL220 Acetylcholinesterase \n", "1 '>' ... CHEMBL220 Acetylcholinesterase \n", "2 '>' ... CHEMBL220 Acetylcholinesterase \n", "3 '>' ... CHEMBL220 Acetylcholinesterase \n", "4 '>' ... CHEMBL220 Acetylcholinesterase \n", "... ... ... ... ... \n", "1753 NaN ... CHEMBL220 Acetylcholinesterase \n", "1754 NaN ... CHEMBL220 Acetylcholinesterase \n", "1755 NaN ... CHEMBL220 Acetylcholinesterase \n", "1756 NaN ... CHEMBL220 Acetylcholinesterase \n", "1757 NaN ... CHEMBL220 Acetylcholinesterase \n", "\n", " Target Organism Target Type Document ChEMBL ID Source ID \\\n", "0 Homo sapiens SINGLE PROTEIN CHEMBL1221246 1 \n", "1 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "2 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "3 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "4 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "... ... ... ... ... \n", "1753 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1754 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1755 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1756 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1757 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "\n", " Source Description Document Journal Document Year \\\n", "0 Scientific Literature Bioorg. Med. Chem. Lett. 2010.0 \n", "1 Scientific Literature J. Med. Chem. 2007.0 \n", "2 Scientific Literature J. Med. Chem. 2007.0 \n", "3 Scientific Literature J. Med. Chem. 2007.0 \n", "4 Scientific Literature J. Med. Chem. 2007.0 \n", "... ... ... ... \n", "1753 DrugMatrix NaN NaN \n", "1754 DrugMatrix NaN NaN \n", "1755 DrugMatrix NaN NaN \n", "1756 DrugMatrix NaN NaN \n", "1757 DrugMatrix NaN NaN \n", "\n", " Cell ChEMBL ID \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "1753 CHEMBL3307715 \n", "1754 CHEMBL3307715 \n", "1755 CHEMBL3307715 \n", "1756 CHEMBL3307715 \n", "1757 CHEMBL3307715 \n", "\n", "[1758 rows x 41 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 读取CSV文件\n", "data = pd.read_csv('output.csv')\n", "# 查看数据\n", "data" ] }, { "cell_type": "code", "execution_count": 4, "id": "2c72ff5c", "metadata": {}, "outputs": [], "source": [ "data.to_csv('output1.csv',index=True)" ] }, { "cell_type": "code", "execution_count": 32, "id": "d98130c9", "metadata": {}, "outputs": [], "source": [ "data.to_csv('output2.csv',index=False)" ] }, { "cell_type": "code", "execution_count": 33, "id": "3fd5ac0f", "metadata": {}, "outputs": [], "source": [ "data.to_csv('output3.csv', encoding='utf-8', index=False, sep=';')" ] }, { "cell_type": "code", "execution_count": 43, "id": "3a7a6bba", "metadata": {}, "outputs": [], "source": [ "data.to_csv('output4.csv', encoding='GBK', index=False, sep=';')" ] }, { "cell_type": "code", "execution_count": 35, "id": "f8467281", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Molecule ChEMBL IDMolecule NameMolecule Max PhaseMolecular Weight#RO5 ViolationsAlogPCompound KeySmilesStandard TypeStandard Relation...Target ChEMBL IDTarget NameTarget OrganismTarget TypeDocument ChEMBL IDSource IDSource DescriptionDocument JournalDocument YearCell ChEMBL ID
0CHEMBL465295NaN0371.390.01.8929CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL12212461Scientific LiteratureBioorg. Med. Chem. Lett.2010.0NaN
1CHEMBL385800NaN0216.020.02.1350Clc1cc(Cl)c2C(=O)C(=O)Nc2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
2CHEMBL222190NaN0181.580.01.4743Clc1cccc2C(=O)C(=O)Nc12Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
3CHEMBL327012NaN0181.580.01.4733Clc1ccc2NC(=O)C(=O)c2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
4CHEMBL2226605-METHOXYISATIN0177.160.00.8331COc1ccc2NC(=O)C(=O)c2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
..................................................................
1753CHEMBL105MITOMYCIN4334.330.0-1.65Mitomycin CCO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1754CHEMBL1064SIMVASTATIN4418.570.04.59simvastatinCCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1755CHEMBL841LOPERAMIDE4477.051.05.09loperamideCN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1756CHEMBL196ASCORBIC ACID4176.120.0-1.41ascorbic acidOC[C@H](O)[C@H]1OC(=O)C(=C1O)OKiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1757CHEMBL741LAMOTRIGINE4256.100.02.01LAMOTRIGINENc1nnc(c(N)n1)c2cccc(Cl)c2ClKiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
\n", "

1758 rows × 41 columns

\n", "
" ], "text/plain": [ " Molecule ChEMBL ID Molecule Name Molecule Max Phase \\\n", "0 CHEMBL465295 NaN 0 \n", "1 CHEMBL385800 NaN 0 \n", "2 CHEMBL222190 NaN 0 \n", "3 CHEMBL327012 NaN 0 \n", "4 CHEMBL222660 5-METHOXYISATIN 0 \n", "... ... ... ... \n", "1753 CHEMBL105 MITOMYCIN 4 \n", "1754 CHEMBL1064 SIMVASTATIN 4 \n", "1755 CHEMBL841 LOPERAMIDE 4 \n", "1756 CHEMBL196 ASCORBIC ACID 4 \n", "1757 CHEMBL741 LAMOTRIGINE 4 \n", "\n", " Molecular Weight #RO5 Violations AlogP Compound Key \\\n", "0 371.39 0.0 1.89 29 \n", "1 216.02 0.0 2.13 50 \n", "2 181.58 0.0 1.47 43 \n", "3 181.58 0.0 1.47 33 \n", "4 177.16 0.0 0.83 31 \n", "... ... ... ... ... \n", "1753 334.33 0.0 -1.65 Mitomycin C \n", "1754 418.57 0.0 4.59 simvastatin \n", "1755 477.05 1.0 5.09 loperamide \n", "1756 176.12 0.0 -1.41 ascorbic acid \n", "1757 256.10 0.0 2.01 LAMOTRIGINE \n", "\n", " Smiles Standard Type \\\n", "0 CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H... Ki \n", "1 Clc1cc(Cl)c2C(=O)C(=O)Nc2c1 Ki \n", "2 Clc1cccc2C(=O)C(=O)Nc12 Ki \n", "3 Clc1ccc2NC(=O)C(=O)c2c1 Ki \n", "4 COc1ccc2NC(=O)C(=O)c2c1 Ki \n", "... ... ... \n", "1753 CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(... Ki \n", "1754 CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)... Ki \n", "1755 CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc... Ki \n", "1756 OC[C@H](O)[C@H]1OC(=O)C(=C1O)O Ki \n", "1757 Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl Ki \n", "\n", " Standard Relation ... Target ChEMBL ID Target Name \\\n", "0 NaN ... CHEMBL220 Acetylcholinesterase \n", "1 '>' ... CHEMBL220 Acetylcholinesterase \n", "2 '>' ... CHEMBL220 Acetylcholinesterase \n", "3 '>' ... CHEMBL220 Acetylcholinesterase \n", "4 '>' ... CHEMBL220 Acetylcholinesterase \n", "... ... ... ... ... \n", "1753 NaN ... CHEMBL220 Acetylcholinesterase \n", "1754 NaN ... CHEMBL220 Acetylcholinesterase \n", "1755 NaN ... CHEMBL220 Acetylcholinesterase \n", "1756 NaN ... CHEMBL220 Acetylcholinesterase \n", "1757 NaN ... CHEMBL220 Acetylcholinesterase \n", "\n", " Target Organism Target Type Document ChEMBL ID Source ID \\\n", "0 Homo sapiens SINGLE PROTEIN CHEMBL1221246 1 \n", "1 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "2 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "3 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "4 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "... ... ... ... ... \n", "1753 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1754 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1755 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1756 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1757 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "\n", " Source Description Document Journal Document Year \\\n", "0 Scientific Literature Bioorg. Med. Chem. Lett. 2010.0 \n", "1 Scientific Literature J. Med. Chem. 2007.0 \n", "2 Scientific Literature J. Med. Chem. 2007.0 \n", "3 Scientific Literature J. Med. Chem. 2007.0 \n", "4 Scientific Literature J. Med. Chem. 2007.0 \n", "... ... ... ... \n", "1753 DrugMatrix NaN NaN \n", "1754 DrugMatrix NaN NaN \n", "1755 DrugMatrix NaN NaN \n", "1756 DrugMatrix NaN NaN \n", "1757 DrugMatrix NaN NaN \n", "\n", " Cell ChEMBL ID \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "1753 CHEMBL3307715 \n", "1754 CHEMBL3307715 \n", "1755 CHEMBL3307715 \n", "1756 CHEMBL3307715 \n", "1757 CHEMBL3307715 \n", "\n", "[1758 rows x 41 columns]" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 读取CSV文件\n", "data1 = pd.read_csv('output3.csv',sep=';')\n", "# 查看数据\n", "data1" ] }, { "cell_type": "markdown", "id": "ac843c56", "metadata": {}, "source": [ "### Excel文件的读取与保存" ] }, { "cell_type": "code", "execution_count": 5, "id": "cccd0009", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Molecule ChEMBL IDMolecule NameMolecule Max PhaseMolecular Weight#RO5 ViolationsAlogPCompound KeySmilesStandard TypeStandard Relation...Target ChEMBL IDTarget NameTarget OrganismTarget TypeDocument ChEMBL IDSource IDSource DescriptionDocument JournalDocument YearCell ChEMBL ID
0CHEMBL465295NaN0371.390.01.8929CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL12212461Scientific LiteratureBioorg. Med. Chem. Lett.2010.0NaN
1CHEMBL385800NaN0216.020.02.1350Clc1cc(Cl)c2C(=O)C(=O)Nc2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
2CHEMBL222190NaN0181.580.01.4743Clc1cccc2C(=O)C(=O)Nc12Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
3CHEMBL327012NaN0181.580.01.4733Clc1ccc2NC(=O)C(=O)c2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
4CHEMBL2226605-METHOXYISATIN0177.160.00.8331COc1ccc2NC(=O)C(=O)c2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
..................................................................
1753CHEMBL105MITOMYCIN4334.330.0-1.65Mitomycin CCO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1754CHEMBL1064SIMVASTATIN4418.570.04.59simvastatinCCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1755CHEMBL841LOPERAMIDE4477.051.05.09loperamideCN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1756CHEMBL196ASCORBIC ACID4176.120.0-1.41ascorbic acidOC[C@H](O)[C@H]1OC(=O)C(=C1O)OKiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1757CHEMBL741LAMOTRIGINE4256.100.02.01LAMOTRIGINENc1nnc(c(N)n1)c2cccc(Cl)c2ClKiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
\n", "

1758 rows × 41 columns

\n", "
" ], "text/plain": [ " Molecule ChEMBL ID Molecule Name Molecule Max Phase \\\n", "0 CHEMBL465295 NaN 0 \n", "1 CHEMBL385800 NaN 0 \n", "2 CHEMBL222190 NaN 0 \n", "3 CHEMBL327012 NaN 0 \n", "4 CHEMBL222660 5-METHOXYISATIN 0 \n", "... ... ... ... \n", "1753 CHEMBL105 MITOMYCIN 4 \n", "1754 CHEMBL1064 SIMVASTATIN 4 \n", "1755 CHEMBL841 LOPERAMIDE 4 \n", "1756 CHEMBL196 ASCORBIC ACID 4 \n", "1757 CHEMBL741 LAMOTRIGINE 4 \n", "\n", " Molecular Weight #RO5 Violations AlogP Compound Key \\\n", "0 371.39 0.0 1.89 29 \n", "1 216.02 0.0 2.13 50 \n", "2 181.58 0.0 1.47 43 \n", "3 181.58 0.0 1.47 33 \n", "4 177.16 0.0 0.83 31 \n", "... ... ... ... ... \n", "1753 334.33 0.0 -1.65 Mitomycin C \n", "1754 418.57 0.0 4.59 simvastatin \n", "1755 477.05 1.0 5.09 loperamide \n", "1756 176.12 0.0 -1.41 ascorbic acid \n", "1757 256.10 0.0 2.01 LAMOTRIGINE \n", "\n", " Smiles Standard Type \\\n", "0 CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H... Ki \n", "1 Clc1cc(Cl)c2C(=O)C(=O)Nc2c1 Ki \n", "2 Clc1cccc2C(=O)C(=O)Nc12 Ki \n", "3 Clc1ccc2NC(=O)C(=O)c2c1 Ki \n", "4 COc1ccc2NC(=O)C(=O)c2c1 Ki \n", "... ... ... \n", "1753 CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(... Ki \n", "1754 CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)... Ki \n", "1755 CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc... Ki \n", "1756 OC[C@H](O)[C@H]1OC(=O)C(=C1O)O Ki \n", "1757 Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl Ki \n", "\n", " Standard Relation ... Target ChEMBL ID Target Name \\\n", "0 NaN ... CHEMBL220 Acetylcholinesterase \n", "1 '>' ... CHEMBL220 Acetylcholinesterase \n", "2 '>' ... CHEMBL220 Acetylcholinesterase \n", "3 '>' ... CHEMBL220 Acetylcholinesterase \n", "4 '>' ... CHEMBL220 Acetylcholinesterase \n", "... ... ... ... ... \n", "1753 NaN ... CHEMBL220 Acetylcholinesterase \n", "1754 NaN ... CHEMBL220 Acetylcholinesterase \n", "1755 NaN ... CHEMBL220 Acetylcholinesterase \n", "1756 NaN ... CHEMBL220 Acetylcholinesterase \n", "1757 NaN ... CHEMBL220 Acetylcholinesterase \n", "\n", " Target Organism Target Type Document ChEMBL ID Source ID \\\n", "0 Homo sapiens SINGLE PROTEIN CHEMBL1221246 1 \n", "1 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "2 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "3 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "4 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "... ... ... ... ... \n", "1753 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1754 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1755 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1756 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1757 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "\n", " Source Description Document Journal Document Year \\\n", "0 Scientific Literature Bioorg. Med. Chem. Lett. 2010.0 \n", "1 Scientific Literature J. Med. Chem. 2007.0 \n", "2 Scientific Literature J. Med. Chem. 2007.0 \n", "3 Scientific Literature J. Med. Chem. 2007.0 \n", "4 Scientific Literature J. Med. Chem. 2007.0 \n", "... ... ... ... \n", "1753 DrugMatrix NaN NaN \n", "1754 DrugMatrix NaN NaN \n", "1755 DrugMatrix NaN NaN \n", "1756 DrugMatrix NaN NaN \n", "1757 DrugMatrix NaN NaN \n", "\n", " Cell ChEMBL ID \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "1753 CHEMBL3307715 \n", "1754 CHEMBL3307715 \n", "1755 CHEMBL3307715 \n", "1756 CHEMBL3307715 \n", "1757 CHEMBL3307715 \n", "\n", "[1758 rows x 41 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 读取 Excel 文件\n", "data_excel = pd.read_excel('output.xlsx')\n", "data_excel" ] }, { "cell_type": "markdown", "id": "09110d1a", "metadata": {}, "source": [ "在这个示例中,我们使用read_excel()函数来读取Excel文件中的数据,并将数据加载到一个DataFrame对象中。sheet_name参数用于指定要读取的工作表名称。如果省略此参数,则默认读取第一个工作表。" ] }, { "cell_type": "code", "execution_count": 20, "id": "eeaeade9", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\l'r's\\AppData\\Local\\conda\\conda\\envs\\python38\\lib\\site-packages\\pandas\\util\\_decorators.py:211: FutureWarning: the 'encoding' keyword is deprecated and will be removed in a future version. Please take steps to stop the use of 'encoding'\n", " return func(*args, **kwargs)\n" ] } ], "source": [ "data.to_excel('output5.xlsx', encoding='GBK', index=False, sheet_name='Sheet1')" ] }, { "cell_type": "code", "execution_count": 10, "id": "d1950caf", "metadata": {}, "outputs": [], "source": [ "data_excel.to_excel('output5.xlsx', index=False)" ] }, { "cell_type": "code", "execution_count": 7, "id": "bdd897ec", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\l'r's\\AppData\\Local\\conda\\conda\\envs\\python38\\lib\\site-packages\\pandas\\util\\_decorators.py:211: FutureWarning: the 'encoding' keyword is deprecated and will be removed in a future version. Please take steps to stop the use of 'encoding'\n", " return func(*args, **kwargs)\n" ] } ], "source": [ "data_excel.to_excel('output6.xlsx', encoding='GBK', index=False, sheet_name='Sheet2')" ] }, { "cell_type": "code", "execution_count": 8, "id": "e3f57ea1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Molecule ChEMBL IDMolecule NameMolecule Max PhaseMolecular Weight#RO5 ViolationsAlogPCompound KeySmilesStandard TypeStandard Relation...Target ChEMBL IDTarget NameTarget OrganismTarget TypeDocument ChEMBL IDSource IDSource DescriptionDocument JournalDocument YearCell ChEMBL ID
0CHEMBL465295NaN0371.390.01.8929CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL12212461Scientific LiteratureBioorg. Med. Chem. Lett.2010.0NaN
1CHEMBL385800NaN0216.020.02.1350Clc1cc(Cl)c2C(=O)C(=O)Nc2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
2CHEMBL222190NaN0181.580.01.4743Clc1cccc2C(=O)C(=O)Nc12Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
3CHEMBL327012NaN0181.580.01.4733Clc1ccc2NC(=O)C(=O)c2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
4CHEMBL2226605-METHOXYISATIN0177.160.00.8331COc1ccc2NC(=O)C(=O)c2c1Ki'>'...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL11444731Scientific LiteratureJ. Med. Chem.2007.0NaN
..................................................................
1753CHEMBL105MITOMYCIN4334.330.0-1.65Mitomycin CCO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1754CHEMBL1064SIMVASTATIN4418.570.04.59simvastatinCCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1755CHEMBL841LOPERAMIDE4477.051.05.09loperamideCN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc...KiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1756CHEMBL196ASCORBIC ACID4176.120.0-1.41ascorbic acidOC[C@H](O)[C@H]1OC(=O)C(=C1O)OKiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
1757CHEMBL741LAMOTRIGINE4256.100.02.01LAMOTRIGINENc1nnc(c(N)n1)c2cccc(Cl)c2ClKiNaN...CHEMBL220AcetylcholinesteraseHomo sapiensSINGLE PROTEINCHEMBL190904615DrugMatrixNaNNaNCHEMBL3307715
\n", "

1758 rows × 41 columns

\n", "
" ], "text/plain": [ " Molecule ChEMBL ID Molecule Name Molecule Max Phase \\\n", "0 CHEMBL465295 NaN 0 \n", "1 CHEMBL385800 NaN 0 \n", "2 CHEMBL222190 NaN 0 \n", "3 CHEMBL327012 NaN 0 \n", "4 CHEMBL222660 5-METHOXYISATIN 0 \n", "... ... ... ... \n", "1753 CHEMBL105 MITOMYCIN 4 \n", "1754 CHEMBL1064 SIMVASTATIN 4 \n", "1755 CHEMBL841 LOPERAMIDE 4 \n", "1756 CHEMBL196 ASCORBIC ACID 4 \n", "1757 CHEMBL741 LAMOTRIGINE 4 \n", "\n", " Molecular Weight #RO5 Violations AlogP Compound Key \\\n", "0 371.39 0.0 1.89 29 \n", "1 216.02 0.0 2.13 50 \n", "2 181.58 0.0 1.47 43 \n", "3 181.58 0.0 1.47 33 \n", "4 177.16 0.0 0.83 31 \n", "... ... ... ... ... \n", "1753 334.33 0.0 -1.65 Mitomycin C \n", "1754 418.57 0.0 4.59 simvastatin \n", "1755 477.05 1.0 5.09 loperamide \n", "1756 176.12 0.0 -1.41 ascorbic acid \n", "1757 256.10 0.0 2.01 LAMOTRIGINE \n", "\n", " Smiles Standard Type \\\n", "0 CC(=O)O[C@H]1C=C2CCN3Cc4cc5OCOc5cc4[C@H]([C@@H... Ki \n", "1 Clc1cc(Cl)c2C(=O)C(=O)Nc2c1 Ki \n", "2 Clc1cccc2C(=O)C(=O)Nc12 Ki \n", "3 Clc1ccc2NC(=O)C(=O)c2c1 Ki \n", "4 COc1ccc2NC(=O)C(=O)c2c1 Ki \n", "... ... ... \n", "1753 CO[C@]12[C@H]3N[C@H]3CN1C4=C([C@H]2COC(=O)N)C(... Ki \n", "1754 CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)... Ki \n", "1755 CN(C)C(=O)C(CCN1CCC(O)(CC1)c2ccc(Cl)cc2)(c3ccc... Ki \n", "1756 OC[C@H](O)[C@H]1OC(=O)C(=C1O)O Ki \n", "1757 Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl Ki \n", "\n", " Standard Relation ... Target ChEMBL ID Target Name \\\n", "0 NaN ... CHEMBL220 Acetylcholinesterase \n", "1 '>' ... CHEMBL220 Acetylcholinesterase \n", "2 '>' ... CHEMBL220 Acetylcholinesterase \n", "3 '>' ... CHEMBL220 Acetylcholinesterase \n", "4 '>' ... CHEMBL220 Acetylcholinesterase \n", "... ... ... ... ... \n", "1753 NaN ... CHEMBL220 Acetylcholinesterase \n", "1754 NaN ... CHEMBL220 Acetylcholinesterase \n", "1755 NaN ... CHEMBL220 Acetylcholinesterase \n", "1756 NaN ... CHEMBL220 Acetylcholinesterase \n", "1757 NaN ... CHEMBL220 Acetylcholinesterase \n", "\n", " Target Organism Target Type Document ChEMBL ID Source ID \\\n", "0 Homo sapiens SINGLE PROTEIN CHEMBL1221246 1 \n", "1 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "2 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "3 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "4 Homo sapiens SINGLE PROTEIN CHEMBL1144473 1 \n", "... ... ... ... ... \n", "1753 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1754 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1755 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1756 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "1757 Homo sapiens SINGLE PROTEIN CHEMBL1909046 15 \n", "\n", " Source Description Document Journal Document Year \\\n", "0 Scientific Literature Bioorg. Med. Chem. Lett. 2010.0 \n", "1 Scientific Literature J. Med. Chem. 2007.0 \n", "2 Scientific Literature J. Med. Chem. 2007.0 \n", "3 Scientific Literature J. Med. Chem. 2007.0 \n", "4 Scientific Literature J. Med. Chem. 2007.0 \n", "... ... ... ... \n", "1753 DrugMatrix NaN NaN \n", "1754 DrugMatrix NaN NaN \n", "1755 DrugMatrix NaN NaN \n", "1756 DrugMatrix NaN NaN \n", "1757 DrugMatrix NaN NaN \n", "\n", " Cell ChEMBL ID \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "1753 CHEMBL3307715 \n", "1754 CHEMBL3307715 \n", "1755 CHEMBL3307715 \n", "1756 CHEMBL3307715 \n", "1757 CHEMBL3307715 \n", "\n", "[1758 rows x 41 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 读取 Excel 文件\n", "data_excel2 = pd.read_excel('output6.xlsx', sheet_name='Sheet2')\n", "data_excel2" ] }, { "cell_type": "markdown", "id": "9a23ec85", "metadata": {}, "source": [ "## 重复数据删除" ] }, { "cell_type": "code", "execution_count": null, "id": "e210b229", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 5 }