{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Usuario\\anaconda3\\lib\\site-packages\\pandas\\core\\computation\\expressions.py:20: UserWarning: Pandas requires version '2.7.3' or newer of 'numexpr' (version '2.7.1' currently installed).\n", " from pandas.core.computation.check import NUMEXPR_INSTALLED\n" ] } ], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from pandas import DataFrame\n", "from cmapPy.pandasGEXpress.parse import parse\n", "from scipy.stats import hypergeom\n", "from tqdm import tqdm\n", "import mysql.connector" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "disnet_db_ares = mysql.connector.connect(\n", " host=\"138.4.130.153\",\n", " port = \"30602\",\n", " user=\"disnet_user\",\n", " password=\"tYkX4JxV8p79\",\n", " database=\"disnet_drugslayer\"\n", ")\n", "\n", " \n", "\n", "disnet_mysql_cursor = disnet_db_ares.cursor()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "q1 = \"\"\"SELECT * FROM disnet_drugslayer.drug \"\"\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", " drug_disnet =pd.read_sql(q1, con=disnet_db_ares)\n" ] } ], "source": [ "drug_disnet =pd.read_sql(q1, con=disnet_db_ares)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "pertub_id = pd.read_csv(\"./GSE92742_Broad_LINCS_pert_info.txt\", sep=\"\\t\", dtype=str)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "28957" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(pertub_id[\"pert_iname\"].unique())" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "pertub_id_filter = pertub_id[pertub_id[\"is_touchstone\"]== \"1\"]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "pertub_id_filter_cp = pertub_id_filter[pertub_id_filter[\"pert_type\"] ==\"trt_cp\"]" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2429" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(pertub_id_filter_cp[\"pert_iname\"].unique())" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "drug_common_inchi = drug_disnet.merge(pertub_id_filter_cp, on =\"inchi_key\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " pertub_id_filter_cp['drug_name'] = pertub_id_filter_cp['pert_iname'].str.upper()\n" ] } ], "source": [ "pertub_id_filter_cp['drug_name'] = pertub_id_filter_cp['pert_iname'].str.upper() " ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "drug_common_name = drug_disnet.merge(pertub_id_filter_cp, on =\"drug_name\")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "drug_common_name_fil = drug_common_name[[\"drug_id\",\"drug_name\"]]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "drug_common_inchi_fil = drug_common_inchi[[\"drug_id\",\"drug_name\"]]" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "drugs_all_disnet_clue = pd.concat([drug_common_name,drug_common_inchi]).drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drug_idsource_iddrug_namemolecular_typechemical_structureinchi_key_xpert_idpert_inamepert_typeis_touchstoneinchi_key_prefixinchi_key_ycanonical_smilespubchem_cidinchi_key
0CHEMBL10001CETIRIZINESmall moleculeO=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1ZKLPARSLTMPFCP-UHFFFAOYSA-NBRD-A42571354cetirizinetrt_cp1ZKLPARSLTMPFCPZKLPARSLTMPFCP-UHFFFAOYSA-NOC(=O)COCCN1CCN(CC1)C(c1ccccc1)c1ccc(Cl)cc1-666NaN
1CHEMBL10041DOXYLAMINESmall moleculeCN(C)CCOC(C)(c1ccccc1)c1ccccn1HCFDWZZGGLSKEP-UHFFFAOYSA-NBRD-A44008656doxylaminetrt_cp1HCFDWZZGGLSKEPHCFDWZZGGLSKEP-UHFFFAOYSA-NCN(C)CCOC(C)(c1ccccc1)c1ccccn1-666NaN
2CHEMBL10081BEPRIDILSmall moleculeCC(C)COCC(CN(Cc1ccccc1)c1ccccc1)N1CCCC1UIEATEWHFDRYRU-UHFFFAOYSA-NBRD-A91008255bepridiltrt_cp1UIEATEWHFDRYRUUIEATEWHFDRYRU-UHFFFAOYSA-NCC(C)COCC(CN(Cc1ccccc1)c1ccccc1)N1CCCC1-666NaN
3CHEMBL1011PHENYLBUTAZONESmall moleculeCCCCC1C(=O)N(c2ccccc2)N(c2ccccc2)C1=OVYMDGNCVAMGZFE-UHFFFAOYSA-NBRD-K10843433phenylbutazonetrt_cp1VYMDGNCVAMGZFEVYMDGNCVAMGZFE-UHFFFAOYSA-NCCCCC1C(=O)N(N(C1=O)c1ccccc1)c1ccccc14781NaN
4CHEMBL10171TELMISARTANSmall moleculeCCCc1nc2c(C)cc(-c3nc4ccccc4n3C)cc2n1Cc1ccc(-c2...RMMXLENWKUUMAY-UHFFFAOYSA-NBRD-K73999723telmisartantrt_cp1RMMXLENWKUUMAYRMMXLENWKUUMAY-UHFFFAOYSA-NCCCc1nc2c(C)cc(cc2n1Cc1ccc(cc1)-c1ccccc1C(O)=O...65999NaN
................................................
642CHEMBL9891FLUOCINOLONE ACETONIDESmall moleculeCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...NaNBRD-K94353609fluocinolonetrt_cp1FEBLZLNTKCEFITNaNCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...6215FEBLZLNTKCEFIT-VSXGLTOVSA-N
643CHEMBL9911STAVUDINESmall moleculeCc1cn([C@H]2C=C[C@@H](CO)O2)c(=O)[nH]c1=ONaNBRD-K93880783stavudinetrt_cp1XNKLLVCARDGLGLNaNCc1cn([C@@H]2O[C@H](CO)C=C2)c(=O)[nH]c1=O18283XNKLLVCARDGLGL-JGVFFNPUSA-N
644CHEMBL9961CEFOXITINSmall moleculeCO[C@@]1(NC(=O)Cc2cccs2)C(=O)N2C(C(=O)O)=C(COC...NaNBRD-K70976396cefoxitintrt_cp1WZOZEZRFJCJXNZNaNCO[C@]1(NC(=O)Cc2cccs2)[C@H]2SCC(COC(N)=O)=C(N...23667300WZOZEZRFJCJXNZ-ZBFHGGJFSA-N
645CHEMBL99671PIRENZEPINESmall moleculeCN1CCN(CC(=O)N2c3ccccc3C(=O)Nc3cccnc32)CC1NaNBRD-K89375097pirenzepinetrt_cp1RMHMFHUVIITRHFNaNCN1CCN(CC(=O)N2c3ccccc3C(=O)Nc3cccnc23)CC1185248RMHMFHUVIITRHF-UHFFFAOYSA-N
646CHEMBL9981LORATADINESmall moleculeCCOC(=O)N1CCC(=C2c3ccc(Cl)cc3CCc3cccnc32)CC1NaNBRD-K82795137loratadinetrt_cp1JCCNYMKQOSZNPWNaNCCOC(=O)N1CCC(CC1)=C1c2ccc(Cl)cc2CCc2cccnc123957JCCNYMKQOSZNPW-UHFFFAOYSA-N
\n", "

1390 rows × 15 columns

\n", "
" ], "text/plain": [ " drug_id source_id drug_name molecular_type \\\n", "0 CHEMBL1000 1 CETIRIZINE Small molecule \n", "1 CHEMBL1004 1 DOXYLAMINE Small molecule \n", "2 CHEMBL1008 1 BEPRIDIL Small molecule \n", "3 CHEMBL101 1 PHENYLBUTAZONE Small molecule \n", "4 CHEMBL1017 1 TELMISARTAN Small molecule \n", ".. ... ... ... ... \n", "642 CHEMBL989 1 FLUOCINOLONE ACETONIDE Small molecule \n", "643 CHEMBL991 1 STAVUDINE Small molecule \n", "644 CHEMBL996 1 CEFOXITIN Small molecule \n", "645 CHEMBL9967 1 PIRENZEPINE Small molecule \n", "646 CHEMBL998 1 LORATADINE Small molecule \n", "\n", " chemical_structure \\\n", "0 O=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1 \n", "1 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 \n", "2 CC(C)COCC(CN(Cc1ccccc1)c1ccccc1)N1CCCC1 \n", "3 CCCCC1C(=O)N(c2ccccc2)N(c2ccccc2)C1=O \n", "4 CCCc1nc2c(C)cc(-c3nc4ccccc4n3C)cc2n1Cc1ccc(-c2... \n", ".. ... \n", "642 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... \n", "643 Cc1cn([C@H]2C=C[C@@H](CO)O2)c(=O)[nH]c1=O \n", "644 CO[C@@]1(NC(=O)Cc2cccs2)C(=O)N2C(C(=O)O)=C(COC... \n", "645 CN1CCN(CC(=O)N2c3ccccc3C(=O)Nc3cccnc32)CC1 \n", "646 CCOC(=O)N1CCC(=C2c3ccc(Cl)cc3CCc3cccnc32)CC1 \n", "\n", " inchi_key_x pert_id pert_iname pert_type \\\n", "0 ZKLPARSLTMPFCP-UHFFFAOYSA-N BRD-A42571354 cetirizine trt_cp \n", "1 HCFDWZZGGLSKEP-UHFFFAOYSA-N BRD-A44008656 doxylamine trt_cp \n", "2 UIEATEWHFDRYRU-UHFFFAOYSA-N BRD-A91008255 bepridil trt_cp \n", "3 VYMDGNCVAMGZFE-UHFFFAOYSA-N BRD-K10843433 phenylbutazone trt_cp \n", "4 RMMXLENWKUUMAY-UHFFFAOYSA-N BRD-K73999723 telmisartan trt_cp \n", ".. ... ... ... ... \n", "642 NaN BRD-K94353609 fluocinolone trt_cp \n", "643 NaN BRD-K93880783 stavudine trt_cp \n", "644 NaN BRD-K70976396 cefoxitin trt_cp \n", "645 NaN BRD-K89375097 pirenzepine trt_cp \n", "646 NaN BRD-K82795137 loratadine trt_cp \n", "\n", " is_touchstone inchi_key_prefix inchi_key_y \\\n", "0 1 ZKLPARSLTMPFCP ZKLPARSLTMPFCP-UHFFFAOYSA-N \n", "1 1 HCFDWZZGGLSKEP HCFDWZZGGLSKEP-UHFFFAOYSA-N \n", "2 1 UIEATEWHFDRYRU UIEATEWHFDRYRU-UHFFFAOYSA-N \n", "3 1 VYMDGNCVAMGZFE VYMDGNCVAMGZFE-UHFFFAOYSA-N \n", "4 1 RMMXLENWKUUMAY RMMXLENWKUUMAY-UHFFFAOYSA-N \n", ".. ... ... ... \n", "642 1 FEBLZLNTKCEFIT NaN \n", "643 1 XNKLLVCARDGLGL NaN \n", "644 1 WZOZEZRFJCJXNZ NaN \n", "645 1 RMHMFHUVIITRHF NaN \n", "646 1 JCCNYMKQOSZNPW NaN \n", "\n", " canonical_smiles pubchem_cid \\\n", "0 OC(=O)COCCN1CCN(CC1)C(c1ccccc1)c1ccc(Cl)cc1 -666 \n", "1 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 -666 \n", "2 CC(C)COCC(CN(Cc1ccccc1)c1ccccc1)N1CCCC1 -666 \n", "3 CCCCC1C(=O)N(N(C1=O)c1ccccc1)c1ccccc1 4781 \n", "4 CCCc1nc2c(C)cc(cc2n1Cc1ccc(cc1)-c1ccccc1C(O)=O... 65999 \n", ".. ... ... \n", "642 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... 6215 \n", "643 Cc1cn([C@@H]2O[C@H](CO)C=C2)c(=O)[nH]c1=O 18283 \n", "644 CO[C@]1(NC(=O)Cc2cccs2)[C@H]2SCC(COC(N)=O)=C(N... 23667300 \n", "645 CN1CCN(CC(=O)N2c3ccccc3C(=O)Nc3cccnc23)CC1 185248 \n", "646 CCOC(=O)N1CCC(CC1)=C1c2ccc(Cl)cc2CCc2cccnc12 3957 \n", "\n", " inchi_key \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", ".. ... \n", "642 FEBLZLNTKCEFIT-VSXGLTOVSA-N \n", "643 XNKLLVCARDGLGL-JGVFFNPUSA-N \n", "644 WZOZEZRFJCJXNZ-ZBFHGGJFSA-N \n", "645 RMHMFHUVIITRHF-UHFFFAOYSA-N \n", "646 JCCNYMKQOSZNPW-UHFFFAOYSA-N \n", "\n", "[1390 rows x 15 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drugs_all_disnet_clue" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "drugs_disnet_clue = pd.concat([drug_common_name_fil,drug_common_inchi_fil]).drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drug_iddrug_name
0CHEMBL1000CETIRIZINE
1CHEMBL1004DOXYLAMINE
2CHEMBL1008BEPRIDIL
3CHEMBL101PHENYLBUTAZONE
4CHEMBL1017TELMISARTAN
.........
562CHEMBL776METAPROTERENOL
583CHEMBL829METHYLPROMAZINE
603CHEMBL867IOPANOIC ACID
606CHEMBL869NITROFURAZONE
642CHEMBL989FLUOCINOLONE ACETONIDE
\n", "

827 rows × 2 columns

\n", "
" ], "text/plain": [ " drug_id drug_name\n", "0 CHEMBL1000 CETIRIZINE\n", "1 CHEMBL1004 DOXYLAMINE\n", "2 CHEMBL1008 BEPRIDIL\n", "3 CHEMBL101 PHENYLBUTAZONE\n", "4 CHEMBL1017 TELMISARTAN\n", ".. ... ...\n", "562 CHEMBL776 METAPROTERENOL\n", "583 CHEMBL829 METHYLPROMAZINE\n", "603 CHEMBL867 IOPANOIC ACID\n", "606 CHEMBL869 NITROFURAZONE\n", "642 CHEMBL989 FLUOCINOLONE ACETONIDE\n", "\n", "[827 rows x 2 columns]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drugs_disnet_clue" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "q2 = \"\"\"SELECT * FROM disnet_drugslayer.ATC_code\"\"\"" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", " drug_atc =pd.read_sql(q2, con=disnet_db_ares)\n" ] } ], "source": [ "drug_atc =pd.read_sql(q2, con=disnet_db_ares)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drug_idATC_code_idsource_id
0CHEMBL1000R06AE071
1CHEMBL100116N02AD011
2CHEMBL1004R06AA091
3CHEMBL1004R06AA591
4CHEMBL1005N01AH061
............
3218CHEMBL991J05AF041
3219CHEMBL996J01DC011
3220CHEMBL9967A02BX031
3221CHEMBL997M05BA061
3222CHEMBL998R06AX131
\n", "

3223 rows × 3 columns

\n", "
" ], "text/plain": [ " drug_id ATC_code_id source_id\n", "0 CHEMBL1000 R06AE07 1\n", "1 CHEMBL100116 N02AD01 1\n", "2 CHEMBL1004 R06AA09 1\n", "3 CHEMBL1004 R06AA59 1\n", "4 CHEMBL1005 N01AH06 1\n", "... ... ... ...\n", "3218 CHEMBL991 J05AF04 1\n", "3219 CHEMBL996 J01DC01 1\n", "3220 CHEMBL9967 A02BX03 1\n", "3221 CHEMBL997 M05BA06 1\n", "3222 CHEMBL998 R06AX13 1\n", "\n", "[3223 rows x 3 columns]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drug_atc" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "atc_class = drugs_disnet_clue.merge(drug_atc,on=\"drug_id\")" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drug_iddrug_nameATC_code_idsource_idindex
0CHEMBL1000CETIRIZINER06AE071R
1CHEMBL1004DOXYLAMINER06AA091R
2CHEMBL1004DOXYLAMINER06AA591R
3CHEMBL1008BEPRIDILC08EA021C
4CHEMBL101PHENYLBUTAZONEM01AA011M
..................
1125CHEMBL869NITROFURAZONES02AA021S
1126CHEMBL989FLUOCINOLONE ACETONIDEC05AA101C
1127CHEMBL989FLUOCINOLONE ACETONIDED07AC041D
1128CHEMBL989FLUOCINOLONE ACETONIDES01BA151S
1129CHEMBL989FLUOCINOLONE ACETONIDES02BA081S
\n", "

1130 rows × 5 columns

\n", "
" ], "text/plain": [ " drug_id drug_name ATC_code_id source_id index\n", "0 CHEMBL1000 CETIRIZINE R06AE07 1 R\n", "1 CHEMBL1004 DOXYLAMINE R06AA09 1 R\n", "2 CHEMBL1004 DOXYLAMINE R06AA59 1 R\n", "3 CHEMBL1008 BEPRIDIL C08EA02 1 C\n", "4 CHEMBL101 PHENYLBUTAZONE M01AA01 1 M\n", "... ... ... ... ... ...\n", "1125 CHEMBL869 NITROFURAZONE S02AA02 1 S\n", "1126 CHEMBL989 FLUOCINOLONE ACETONIDE C05AA10 1 C\n", "1127 CHEMBL989 FLUOCINOLONE ACETONIDE D07AC04 1 D\n", "1128 CHEMBL989 FLUOCINOLONE ACETONIDE S01BA15 1 S\n", "1129 CHEMBL989 FLUOCINOLONE ACETONIDE S02BA08 1 S\n", "\n", "[1130 rows x 5 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "atc_class['index'] = atc_class['ATC_code_id'].astype(str).str[0]\n", "atc_class" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drug_iddrug_nameATC_code_idsource_idindex
57CHEMBL1117IDARUBICINL01DB061L
74CHEMBL1173055RUCAPARIBL01XX551L
75CHEMBL1173655AFATINIBL01XE131L
79CHEMBL118CELECOXIBL01XX331L
93CHEMBL1200374EXEMESTANEL02BG061L
..................
1004CHEMBL98VORINOSTATL01XX381L
1057CHEMBL1651906STREPTOZOCINL01AD041L
1081CHEMBL46286OMACETAXINE MEPESUCCINATEL01XX401L
1103CHEMBL601AMINOLEVULINIC ACIDL01XD041L
1114CHEMBL717MEDROXYPROGESTERONE ACETATEL02AB021L
\n", "

99 rows × 5 columns

\n", "
" ], "text/plain": [ " drug_id drug_name ATC_code_id source_id index\n", "57 CHEMBL1117 IDARUBICIN L01DB06 1 L\n", "74 CHEMBL1173055 RUCAPARIB L01XX55 1 L\n", "75 CHEMBL1173655 AFATINIB L01XE13 1 L\n", "79 CHEMBL118 CELECOXIB L01XX33 1 L\n", "93 CHEMBL1200374 EXEMESTANE L02BG06 1 L\n", "... ... ... ... ... ...\n", "1004 CHEMBL98 VORINOSTAT L01XX38 1 L\n", "1057 CHEMBL1651906 STREPTOZOCIN L01AD04 1 L\n", "1081 CHEMBL46286 OMACETAXINE MEPESUCCINATE L01XX40 1 L\n", "1103 CHEMBL601 AMINOLEVULINIC ACID L01XD04 1 L\n", "1114 CHEMBL717 MEDROXYPROGESTERONE ACETATE L02AB02 1 L\n", "\n", "[99 rows x 5 columns]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "atc_class[atc_class[\"index\"]== \"L\"]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "### repito el proceso con los datos que tienen las claves para buscar en clue" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "atc_class_all = drugs_all_disnet_clue.merge(drug_atc,on=\"drug_id\")" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drug_idsource_id_xdrug_namemolecular_typechemical_structureinchi_key_xpert_idpert_inamepert_typeis_touchstoneinchi_key_prefixinchi_key_ycanonical_smilespubchem_cidinchi_keyATC_code_idsource_id_y
0CHEMBL10001CETIRIZINESmall moleculeO=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1ZKLPARSLTMPFCP-UHFFFAOYSA-NBRD-A42571354cetirizinetrt_cp1ZKLPARSLTMPFCPZKLPARSLTMPFCP-UHFFFAOYSA-NOC(=O)COCCN1CCN(CC1)C(c1ccccc1)c1ccc(Cl)cc1-666NaNR06AE071
1CHEMBL10001CETIRIZINESmall moleculeO=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1NaNBRD-A42571354cetirizinetrt_cp1ZKLPARSLTMPFCPNaNOC(=O)COCCN1CCN(CC1)C(c1ccccc1)c1ccc(Cl)cc1-666ZKLPARSLTMPFCP-UHFFFAOYSA-NR06AE071
2CHEMBL10041DOXYLAMINESmall moleculeCN(C)CCOC(C)(c1ccccc1)c1ccccn1HCFDWZZGGLSKEP-UHFFFAOYSA-NBRD-A44008656doxylaminetrt_cp1HCFDWZZGGLSKEPHCFDWZZGGLSKEP-UHFFFAOYSA-NCN(C)CCOC(C)(c1ccccc1)c1ccccn1-666NaNR06AA091
3CHEMBL10041DOXYLAMINESmall moleculeCN(C)CCOC(C)(c1ccccc1)c1ccccn1HCFDWZZGGLSKEP-UHFFFAOYSA-NBRD-A44008656doxylaminetrt_cp1HCFDWZZGGLSKEPHCFDWZZGGLSKEP-UHFFFAOYSA-NCN(C)CCOC(C)(c1ccccc1)c1ccccn1-666NaNR06AA591
4CHEMBL10041DOXYLAMINESmall moleculeCN(C)CCOC(C)(c1ccccc1)c1ccccn1NaNBRD-A44008656doxylaminetrt_cp1HCFDWZZGGLSKEPNaNCN(C)CCOC(C)(c1ccccc1)c1ccccn1-666HCFDWZZGGLSKEP-UHFFFAOYSA-NR06AA091
......................................................
1840CHEMBL8691NITROFURAZONESmall moleculeNC(=O)N/N=C/c1ccc([N+](=O)[O-])o1NaNBRD-K79092138nitrofuraltrt_cp1IAIWVQXQOWNYOUNaNNC(=O)N/N=C/c1ccc(o1)[N+](=O)[O-]-666IAIWVQXQOWNYOU-FPYGCLRLSA-NS02AA021
1841CHEMBL9891FLUOCINOLONE ACETONIDESmall moleculeCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...NaNBRD-K94353609fluocinolonetrt_cp1FEBLZLNTKCEFITNaNCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...6215FEBLZLNTKCEFIT-VSXGLTOVSA-NC05AA101
1842CHEMBL9891FLUOCINOLONE ACETONIDESmall moleculeCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...NaNBRD-K94353609fluocinolonetrt_cp1FEBLZLNTKCEFITNaNCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...6215FEBLZLNTKCEFIT-VSXGLTOVSA-ND07AC041
1843CHEMBL9891FLUOCINOLONE ACETONIDESmall moleculeCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...NaNBRD-K94353609fluocinolonetrt_cp1FEBLZLNTKCEFITNaNCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...6215FEBLZLNTKCEFIT-VSXGLTOVSA-NS01BA151
1844CHEMBL9891FLUOCINOLONE ACETONIDESmall moleculeCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...NaNBRD-K94353609fluocinolonetrt_cp1FEBLZLNTKCEFITNaNCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...6215FEBLZLNTKCEFIT-VSXGLTOVSA-NS02BA081
\n", "

1845 rows × 17 columns

\n", "
" ], "text/plain": [ " drug_id source_id_x drug_name molecular_type \\\n", "0 CHEMBL1000 1 CETIRIZINE Small molecule \n", "1 CHEMBL1000 1 CETIRIZINE Small molecule \n", "2 CHEMBL1004 1 DOXYLAMINE Small molecule \n", "3 CHEMBL1004 1 DOXYLAMINE Small molecule \n", "4 CHEMBL1004 1 DOXYLAMINE Small molecule \n", "... ... ... ... ... \n", "1840 CHEMBL869 1 NITROFURAZONE Small molecule \n", "1841 CHEMBL989 1 FLUOCINOLONE ACETONIDE Small molecule \n", "1842 CHEMBL989 1 FLUOCINOLONE ACETONIDE Small molecule \n", "1843 CHEMBL989 1 FLUOCINOLONE ACETONIDE Small molecule \n", "1844 CHEMBL989 1 FLUOCINOLONE ACETONIDE Small molecule \n", "\n", " chemical_structure \\\n", "0 O=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1 \n", "1 O=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1 \n", "2 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 \n", "3 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 \n", "4 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 \n", "... ... \n", "1840 NC(=O)N/N=C/c1ccc([N+](=O)[O-])o1 \n", "1841 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... \n", "1842 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... \n", "1843 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... \n", "1844 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... \n", "\n", " inchi_key_x pert_id pert_iname pert_type \\\n", "0 ZKLPARSLTMPFCP-UHFFFAOYSA-N BRD-A42571354 cetirizine trt_cp \n", "1 NaN BRD-A42571354 cetirizine trt_cp \n", "2 HCFDWZZGGLSKEP-UHFFFAOYSA-N BRD-A44008656 doxylamine trt_cp \n", "3 HCFDWZZGGLSKEP-UHFFFAOYSA-N BRD-A44008656 doxylamine trt_cp \n", "4 NaN BRD-A44008656 doxylamine trt_cp \n", "... ... ... ... ... \n", "1840 NaN BRD-K79092138 nitrofural trt_cp \n", "1841 NaN BRD-K94353609 fluocinolone trt_cp \n", "1842 NaN BRD-K94353609 fluocinolone trt_cp \n", "1843 NaN BRD-K94353609 fluocinolone trt_cp \n", "1844 NaN BRD-K94353609 fluocinolone trt_cp \n", "\n", " is_touchstone inchi_key_prefix inchi_key_y \\\n", "0 1 ZKLPARSLTMPFCP ZKLPARSLTMPFCP-UHFFFAOYSA-N \n", "1 1 ZKLPARSLTMPFCP NaN \n", "2 1 HCFDWZZGGLSKEP HCFDWZZGGLSKEP-UHFFFAOYSA-N \n", "3 1 HCFDWZZGGLSKEP HCFDWZZGGLSKEP-UHFFFAOYSA-N \n", "4 1 HCFDWZZGGLSKEP NaN \n", "... ... ... ... \n", "1840 1 IAIWVQXQOWNYOU NaN \n", "1841 1 FEBLZLNTKCEFIT NaN \n", "1842 1 FEBLZLNTKCEFIT NaN \n", "1843 1 FEBLZLNTKCEFIT NaN \n", "1844 1 FEBLZLNTKCEFIT NaN \n", "\n", " canonical_smiles pubchem_cid \\\n", "0 OC(=O)COCCN1CCN(CC1)C(c1ccccc1)c1ccc(Cl)cc1 -666 \n", "1 OC(=O)COCCN1CCN(CC1)C(c1ccccc1)c1ccc(Cl)cc1 -666 \n", "2 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 -666 \n", "3 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 -666 \n", "4 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 -666 \n", "... ... ... \n", "1840 NC(=O)N/N=C/c1ccc(o1)[N+](=O)[O-] -666 \n", "1841 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... 6215 \n", "1842 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... 6215 \n", "1843 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... 6215 \n", "1844 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... 6215 \n", "\n", " inchi_key ATC_code_id source_id_y \n", "0 NaN R06AE07 1 \n", "1 ZKLPARSLTMPFCP-UHFFFAOYSA-N R06AE07 1 \n", "2 NaN R06AA09 1 \n", "3 NaN R06AA59 1 \n", "4 HCFDWZZGGLSKEP-UHFFFAOYSA-N R06AA09 1 \n", "... ... ... ... \n", "1840 IAIWVQXQOWNYOU-FPYGCLRLSA-N S02AA02 1 \n", "1841 FEBLZLNTKCEFIT-VSXGLTOVSA-N C05AA10 1 \n", "1842 FEBLZLNTKCEFIT-VSXGLTOVSA-N D07AC04 1 \n", "1843 FEBLZLNTKCEFIT-VSXGLTOVSA-N S01BA15 1 \n", "1844 FEBLZLNTKCEFIT-VSXGLTOVSA-N S02BA08 1 \n", "\n", "[1845 rows x 17 columns]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "atc_class_all" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drug_idsource_id_xdrug_namemolecular_typechemical_structureinchi_key_xpert_idpert_inamepert_typeis_touchstoneinchi_key_prefixinchi_key_ycanonical_smilespubchem_cidinchi_keyATC_code_idsource_id_yindex
0CHEMBL10001CETIRIZINESmall moleculeO=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1ZKLPARSLTMPFCP-UHFFFAOYSA-NBRD-A42571354cetirizinetrt_cp1ZKLPARSLTMPFCPZKLPARSLTMPFCP-UHFFFAOYSA-NOC(=O)COCCN1CCN(CC1)C(c1ccccc1)c1ccc(Cl)cc1-666NaNR06AE071R
1CHEMBL10001CETIRIZINESmall moleculeO=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1NaNBRD-A42571354cetirizinetrt_cp1ZKLPARSLTMPFCPNaNOC(=O)COCCN1CCN(CC1)C(c1ccccc1)c1ccc(Cl)cc1-666ZKLPARSLTMPFCP-UHFFFAOYSA-NR06AE071R
2CHEMBL10041DOXYLAMINESmall moleculeCN(C)CCOC(C)(c1ccccc1)c1ccccn1HCFDWZZGGLSKEP-UHFFFAOYSA-NBRD-A44008656doxylaminetrt_cp1HCFDWZZGGLSKEPHCFDWZZGGLSKEP-UHFFFAOYSA-NCN(C)CCOC(C)(c1ccccc1)c1ccccn1-666NaNR06AA091R
3CHEMBL10041DOXYLAMINESmall moleculeCN(C)CCOC(C)(c1ccccc1)c1ccccn1HCFDWZZGGLSKEP-UHFFFAOYSA-NBRD-A44008656doxylaminetrt_cp1HCFDWZZGGLSKEPHCFDWZZGGLSKEP-UHFFFAOYSA-NCN(C)CCOC(C)(c1ccccc1)c1ccccn1-666NaNR06AA591R
4CHEMBL10041DOXYLAMINESmall moleculeCN(C)CCOC(C)(c1ccccc1)c1ccccn1NaNBRD-A44008656doxylaminetrt_cp1HCFDWZZGGLSKEPNaNCN(C)CCOC(C)(c1ccccc1)c1ccccn1-666HCFDWZZGGLSKEP-UHFFFAOYSA-NR06AA091R
.........................................................
1840CHEMBL8691NITROFURAZONESmall moleculeNC(=O)N/N=C/c1ccc([N+](=O)[O-])o1NaNBRD-K79092138nitrofuraltrt_cp1IAIWVQXQOWNYOUNaNNC(=O)N/N=C/c1ccc(o1)[N+](=O)[O-]-666IAIWVQXQOWNYOU-FPYGCLRLSA-NS02AA021S
1841CHEMBL9891FLUOCINOLONE ACETONIDESmall moleculeCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...NaNBRD-K94353609fluocinolonetrt_cp1FEBLZLNTKCEFITNaNCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...6215FEBLZLNTKCEFIT-VSXGLTOVSA-NC05AA101C
1842CHEMBL9891FLUOCINOLONE ACETONIDESmall moleculeCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...NaNBRD-K94353609fluocinolonetrt_cp1FEBLZLNTKCEFITNaNCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...6215FEBLZLNTKCEFIT-VSXGLTOVSA-ND07AC041D
1843CHEMBL9891FLUOCINOLONE ACETONIDESmall moleculeCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...NaNBRD-K94353609fluocinolonetrt_cp1FEBLZLNTKCEFITNaNCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...6215FEBLZLNTKCEFIT-VSXGLTOVSA-NS01BA151S
1844CHEMBL9891FLUOCINOLONE ACETONIDESmall moleculeCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...NaNBRD-K94353609fluocinolonetrt_cp1FEBLZLNTKCEFITNaNCC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)...6215FEBLZLNTKCEFIT-VSXGLTOVSA-NS02BA081S
\n", "

1845 rows × 18 columns

\n", "
" ], "text/plain": [ " drug_id source_id_x drug_name molecular_type \\\n", "0 CHEMBL1000 1 CETIRIZINE Small molecule \n", "1 CHEMBL1000 1 CETIRIZINE Small molecule \n", "2 CHEMBL1004 1 DOXYLAMINE Small molecule \n", "3 CHEMBL1004 1 DOXYLAMINE Small molecule \n", "4 CHEMBL1004 1 DOXYLAMINE Small molecule \n", "... ... ... ... ... \n", "1840 CHEMBL869 1 NITROFURAZONE Small molecule \n", "1841 CHEMBL989 1 FLUOCINOLONE ACETONIDE Small molecule \n", "1842 CHEMBL989 1 FLUOCINOLONE ACETONIDE Small molecule \n", "1843 CHEMBL989 1 FLUOCINOLONE ACETONIDE Small molecule \n", "1844 CHEMBL989 1 FLUOCINOLONE ACETONIDE Small molecule \n", "\n", " chemical_structure \\\n", "0 O=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1 \n", "1 O=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1 \n", "2 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 \n", "3 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 \n", "4 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 \n", "... ... \n", "1840 NC(=O)N/N=C/c1ccc([N+](=O)[O-])o1 \n", "1841 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... \n", "1842 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... \n", "1843 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... \n", "1844 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... \n", "\n", " inchi_key_x pert_id pert_iname pert_type \\\n", "0 ZKLPARSLTMPFCP-UHFFFAOYSA-N BRD-A42571354 cetirizine trt_cp \n", "1 NaN BRD-A42571354 cetirizine trt_cp \n", "2 HCFDWZZGGLSKEP-UHFFFAOYSA-N BRD-A44008656 doxylamine trt_cp \n", "3 HCFDWZZGGLSKEP-UHFFFAOYSA-N BRD-A44008656 doxylamine trt_cp \n", "4 NaN BRD-A44008656 doxylamine trt_cp \n", "... ... ... ... ... \n", "1840 NaN BRD-K79092138 nitrofural trt_cp \n", "1841 NaN BRD-K94353609 fluocinolone trt_cp \n", "1842 NaN BRD-K94353609 fluocinolone trt_cp \n", "1843 NaN BRD-K94353609 fluocinolone trt_cp \n", "1844 NaN BRD-K94353609 fluocinolone trt_cp \n", "\n", " is_touchstone inchi_key_prefix inchi_key_y \\\n", "0 1 ZKLPARSLTMPFCP ZKLPARSLTMPFCP-UHFFFAOYSA-N \n", "1 1 ZKLPARSLTMPFCP NaN \n", "2 1 HCFDWZZGGLSKEP HCFDWZZGGLSKEP-UHFFFAOYSA-N \n", "3 1 HCFDWZZGGLSKEP HCFDWZZGGLSKEP-UHFFFAOYSA-N \n", "4 1 HCFDWZZGGLSKEP NaN \n", "... ... ... ... \n", "1840 1 IAIWVQXQOWNYOU NaN \n", "1841 1 FEBLZLNTKCEFIT NaN \n", "1842 1 FEBLZLNTKCEFIT NaN \n", "1843 1 FEBLZLNTKCEFIT NaN \n", "1844 1 FEBLZLNTKCEFIT NaN \n", "\n", " canonical_smiles pubchem_cid \\\n", "0 OC(=O)COCCN1CCN(CC1)C(c1ccccc1)c1ccc(Cl)cc1 -666 \n", "1 OC(=O)COCCN1CCN(CC1)C(c1ccccc1)c1ccc(Cl)cc1 -666 \n", "2 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 -666 \n", "3 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 -666 \n", "4 CN(C)CCOC(C)(c1ccccc1)c1ccccn1 -666 \n", "... ... ... \n", "1840 NC(=O)N/N=C/c1ccc(o1)[N+](=O)[O-] -666 \n", "1841 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... 6215 \n", "1842 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... 6215 \n", "1843 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... 6215 \n", "1844 CC1(C)O[C@@H]2C[C@H]3[C@@H]4C[C@H](F)C5=CC(=O)... 6215 \n", "\n", " inchi_key ATC_code_id source_id_y index \n", "0 NaN R06AE07 1 R \n", "1 ZKLPARSLTMPFCP-UHFFFAOYSA-N R06AE07 1 R \n", "2 NaN R06AA09 1 R \n", "3 NaN R06AA59 1 R \n", "4 HCFDWZZGGLSKEP-UHFFFAOYSA-N R06AA09 1 R \n", "... ... ... ... ... \n", "1840 IAIWVQXQOWNYOU-FPYGCLRLSA-N S02AA02 1 S \n", "1841 FEBLZLNTKCEFIT-VSXGLTOVSA-N C05AA10 1 C \n", "1842 FEBLZLNTKCEFIT-VSXGLTOVSA-N D07AC04 1 D \n", "1843 FEBLZLNTKCEFIT-VSXGLTOVSA-N S01BA15 1 S \n", "1844 FEBLZLNTKCEFIT-VSXGLTOVSA-N S02BA08 1 S \n", "\n", "[1845 rows x 18 columns]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "atc_class_all['index'] = atc_class_all['ATC_code_id'].astype(str).str[0]\n", "atc_class_all" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "atc_cancer = atc_class_all[atc_class_all[\"index\"]== \"L\"]" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "atc_cancer = atc_cancer[[\"pert_iname\",\"drug_name\",\"drug_id\",\"pert_id\"]].drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "95" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(atc_cancer[\"pert_iname\"].unique())" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "atc_cancer.to_csv(\"drugs_cancer_clue.csv\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }