{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sqlalchemy import create_engine\n",
"from sklearn import preprocessing\n",
"import mysql.connector\n",
"from pandas import DataFrame\n",
"from sklearn.metrics import jaccard_score\n",
"from numpy import logical_and as l_and, logical_not as l_not"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"disnet_db_ares = mysql.connector.connect(\n",
" host=\"138.4.130.153\",\n",
" port = \"30602\",\n",
" user=\"disnet_user\",\n",
" password=\"tYkX4JxV8p79\",\n",
" database=\"disnet_drugslayer\"\n",
")\n",
"\n",
" \n",
"\n",
"disnet_mysql_cursor = disnet_db_ares.cursor()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def get_dummies(data,col_index,col_col):\n",
" \n",
" df_final= pd.get_dummies(data.set_index(col_index),col_col).reset_index()\n",
" df_final=df_final.drop_duplicates()\n",
" df_final=df_final.groupby(col_index).sum().reset_index()\n",
" \n",
" return df_final\n",
"\n",
"def convert(lista):\n",
" return tuple(i for i in lista)\n",
"\n",
"\n",
"def get_normal_diseases(rare_disease):\n",
" '''\n",
" Esta funcion devuelve un dataframe con la enfermedad, los medicamentos y los targets que son utiles\n",
" para la enfermedad rara que se le pase como parametro'''\n",
" \n",
" \n",
" q= f'''SELECT DISTINCT\n",
" ds.disease_id, drug_name,dg.gene_id\n",
" FROM\n",
" disnet_biolayer.disease_gene dg \n",
" JOIN disnet_biolayer.disease ds ON ds.disease_id = dg.disease_id\n",
" JOIN disnet_biolayer.encodes e_uno ON dg.gene_id = e_uno.gene_id\n",
" JOIN disnet_drugslayer.has_code hc ON hc.code = e_uno.protein_id\n",
" JOIN disnet_biolayer.ppi ppi on ppi.protein1_id = e_uno.protein_id\n",
" JOIN disnet_biolayer.encodes e_dos on e_dos.protein_id = ppi.protein2_id\n",
" JOIN disnet_drugslayer.drug_target dt ON hc.id = dt.target_id\n",
" JOIN disnet_drugslayer.drug d ON dt.drug_id = d.drug_id\n",
" WHERE\n",
" hc.entity_id = 3\n",
" \n",
" and ds.ddf_type = \"disease\"\n",
" and ppi.quality = \"GOLD\"\n",
" AND dg.gene_id in (SELECT distinct(gene_id) FROM disnet_biolayer.disease_gene\n",
" where disease_id = \"{rare_disease}\")\n",
" ;'''\n",
" \n",
" df = pd.read_sql(q, con=disnet_db_ares)\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" rare_dis_id | \n",
" C0000731 | \n",
" C0000737 | \n",
" C0000744 | \n",
" C0000768 | \n",
" C0000771 | \n",
" C0000772 | \n",
" C0000786 | \n",
" C0000822 | \n",
" C0000832 | \n",
" ... | \n",
" C4540327 | \n",
" C4540342 | \n",
" C4540395 | \n",
" C4540493 | \n",
" C4540496 | \n",
" C4540534 | \n",
" C4540535 | \n",
" C4540536 | \n",
" C4540602 | \n",
" C4545381 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" C0011195 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0.023256 | \n",
" 0.025641 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" C0023944 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" C0024901 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" C0027877 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" C0036391 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.016949 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 5 | \n",
" C0265202 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.017241 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 6 | \n",
" C0549463 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 7 | \n",
" C0751337 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 8 | \n",
" C0869083 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0 | \n",
" 0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
9 rows × 12311 columns
\n",
"
"
],
"text/plain": [
" rare_dis_id C0000731 C0000737 C0000744 C0000768 C0000771 C0000772 \\\n",
"0 C0011195 0 0 0 0.000000 0 0 \n",
"1 C0023944 0 0 0 0.000000 0 0 \n",
"2 C0024901 0 0 0 0.000000 0 0 \n",
"3 C0027877 0 0 0 0.000000 0 0 \n",
"4 C0036391 0 0 0 0.016949 0 0 \n",
"5 C0265202 0 0 0 0.017241 0 0 \n",
"6 C0549463 0 0 0 0.000000 0 0 \n",
"7 C0751337 0 0 0 0.000000 0 0 \n",
"8 C0869083 0 0 0 0.000000 0 0 \n",
"\n",
" C0000786 C0000822 C0000832 ... C4540327 C4540342 C4540395 C4540493 \\\n",
"0 0.023256 0.025641 0 ... 0 0 0 0 \n",
"1 0.000000 0.000000 0 ... 0 0 0 0 \n",
"2 0.000000 0.000000 0 ... 0 0 0 0 \n",
"3 0.000000 0.000000 0 ... 0 0 0 0 \n",
"4 0.000000 0.000000 0 ... 0 0 0 0 \n",
"5 0.000000 0.000000 0 ... 0 0 0 0 \n",
"6 0.000000 0.000000 0 ... 0 0 0 0 \n",
"7 0.000000 0.000000 0 ... 0 0 0 0 \n",
"8 0.000000 0.000000 0 ... 0 0 0 0 \n",
"\n",
" C4540496 C4540534 C4540535 C4540536 C4540602 C4545381 \n",
"0 0 0 0 0 0 0 \n",
"1 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 0 \n",
"5 0 0 0 0 0 0 \n",
"6 0 0 0 0 0 0 \n",
"7 0 0 0 0 0 0 \n",
"8 0 0 0 0 0 0 \n",
"\n",
"[9 rows x 12311 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rare_diseases= ['C0011195', 'C0023944', 'C0024054', 'C0024901', 'C0027877', 'C0036391', 'C0265202', 'C0268059', 'C0549463', 'C0751337', 'C0869083', 'C1852146', 'C0796280']\n",
"matrix_jac_ppi = pd.read_excel((\"mat_jacc_pip.xlsx\"),engine='openpyxl')\n",
"matrix_jac_ppi"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" disease_id | \n",
" drug_name | \n",
" gene_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" C0002448 | \n",
" SORAFENIB TOSYLATE | \n",
" 673 | \n",
"
\n",
" \n",
" 1 | \n",
" C0002448 | \n",
" VEMURAFENIB | \n",
" 673 | \n",
"
\n",
" \n",
" 2 | \n",
" C0002448 | \n",
" SORAFENIB | \n",
" 673 | \n",
"
\n",
" \n",
" 3 | \n",
" C0002448 | \n",
" REGORAFENIB | \n",
" 673 | \n",
"
\n",
" \n",
" 4 | \n",
" C0002448 | \n",
" DABRAFENIB | \n",
" 673 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 2710 | \n",
" C0699790 | \n",
" FOSTAMATINIB | \n",
" 22858 | \n",
"
\n",
" \n",
" 2711 | \n",
" C1306460 | \n",
" FOSTAMATINIB | \n",
" 22858 | \n",
"
\n",
" \n",
" 2712 | \n",
" C1691215 | \n",
" FOSTAMATINIB | \n",
" 22858 | \n",
"
\n",
" \n",
" 2713 | \n",
" C2675227 | \n",
" FOSTAMATINIB | \n",
" 22858 | \n",
"
\n",
" \n",
" 2714 | \n",
" C2981150 | \n",
" FOSTAMATINIB | \n",
" 22858 | \n",
"
\n",
" \n",
"
\n",
"
2715 rows × 3 columns
\n",
"
"
],
"text/plain": [
" disease_id drug_name gene_id\n",
"0 C0002448 SORAFENIB TOSYLATE 673\n",
"1 C0002448 VEMURAFENIB 673\n",
"2 C0002448 SORAFENIB 673\n",
"3 C0002448 REGORAFENIB 673\n",
"4 C0002448 DABRAFENIB 673\n",
"... ... ... ...\n",
"2710 C0699790 FOSTAMATINIB 22858\n",
"2711 C1306460 FOSTAMATINIB 22858\n",
"2712 C1691215 FOSTAMATINIB 22858\n",
"2713 C2675227 FOSTAMATINIB 22858\n",
"2714 C2981150 FOSTAMATINIB 22858\n",
"\n",
"[2715 rows x 3 columns]"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_useful_diseases = get_normal_diseases(\"C0869083\")\n",
"df_useful_diseases"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"352"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"useful_diseases = get_normal_diseases(\"C0869083\")['disease_id'].drop_duplicates().tolist()\n",
"len(useful_diseases)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" rare_dis_id | \n",
" C0000731 | \n",
" C0000737 | \n",
" C0000744 | \n",
" C0000768 | \n",
" C0000771 | \n",
" C0000772 | \n",
" C0000786 | \n",
" C0000822 | \n",
" C0000832 | \n",
" ... | \n",
" C4540327 | \n",
" C4540342 | \n",
" C4540395 | \n",
" C4540493 | \n",
" C4540496 | \n",
" C4540534 | \n",
" C4540535 | \n",
" C4540536 | \n",
" C4540602 | \n",
" C4545381 | \n",
"
\n",
" \n",
" \n",
" \n",
" 8 | \n",
" C0869083 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.0 | \n",
" 0 | \n",
" 0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
1 rows × 12311 columns
\n",
"
"
],
"text/plain": [
" rare_dis_id C0000731 C0000737 C0000744 C0000768 C0000771 C0000772 \\\n",
"8 C0869083 0 0 0 0.0 0 0 \n",
"\n",
" C0000786 C0000822 C0000832 ... C4540327 C4540342 C4540395 C4540493 \\\n",
"8 0.0 0.0 0 ... 0 0 0 0 \n",
"\n",
" C4540496 C4540534 C4540535 C4540536 C4540602 C4545381 \n",
"8 0 0 0 0 0 0 \n",
"\n",
"[1 rows x 12311 columns]"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = matrix_jac_ppi[matrix_jac_ppi.rare_dis_id == 'C0869083']\n",
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" rare_dis_id | \n",
" C0869083 | \n",
"
\n",
" \n",
" \n",
" \n",
" C0346360 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" C3263719 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" C0019624 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" C1275336 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" C1332969 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" C0600139 | \n",
" 0.001592 | \n",
"
\n",
" \n",
" C0376358 | \n",
" 0.001548 | \n",
"
\n",
" \n",
" C2239176 | \n",
" 0.001522 | \n",
"
\n",
" \n",
" C0678222 | \n",
" 0.001031 | \n",
"
\n",
" \n",
" C0006142 | \n",
" 0.001025 | \n",
"
\n",
" \n",
"
\n",
"
352 rows × 1 columns
\n",
"
"
],
"text/plain": [
"rare_dis_id C0869083\n",
"C0346360 0.500000\n",
"C3263719 0.500000\n",
"C0019624 0.500000\n",
"C1275336 0.500000\n",
"C1332969 0.500000\n",
"... ...\n",
"C0600139 0.001592\n",
"C0376358 0.001548\n",
"C2239176 0.001522\n",
"C0678222 0.001031\n",
"C0006142 0.001025\n",
"\n",
"[352 rows x 1 columns]"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_useful = df[['rare_dis_id'] + useful_diseases].set_index('rare_dis_id').transpose()\n",
"df_useful.sort_values(by='C0869083', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
"df_normal_dis_jacc=pd.DataFrame(df_useful['C0869083'].nlargest(5)).reset_index().rename(columns={'index':'disease_id', 'C0869083':'jacc_idx'})"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"df_normal_dis_jacc.to_excel(\"dis_jacc_C0869083_ppi.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [],
"source": [
"df_normal_rare_drug=pd.merge(df_useful_diseases,df_normal_dis_jacc,on=\"disease_id\",how=\"inner\")"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" disease_id | \n",
" drug_name | \n",
" gene_id | \n",
" jacc_idx | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" C0019624 | \n",
" SORAFENIB TOSYLATE | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 1 | \n",
" C0019624 | \n",
" VEMURAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 2 | \n",
" C0019624 | \n",
" SORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 3 | \n",
" C0019624 | \n",
" REGORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 4 | \n",
" C0019624 | \n",
" DABRAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 5 | \n",
" C0019624 | \n",
" FOSTAMATINIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 6 | \n",
" C0019624 | \n",
" DABRAFENIB MESYLATE | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 7 | \n",
" C0019624 | \n",
" ENCORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 8 | \n",
" C0278600 | \n",
" SORAFENIB TOSYLATE | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 9 | \n",
" C0278600 | \n",
" VEMURAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 10 | \n",
" C0278600 | \n",
" SORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 11 | \n",
" C0278600 | \n",
" REGORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 12 | \n",
" C0278600 | \n",
" DABRAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 13 | \n",
" C0278600 | \n",
" FOSTAMATINIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 14 | \n",
" C0278600 | \n",
" DABRAFENIB MESYLATE | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 15 | \n",
" C0278600 | \n",
" ENCORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 16 | \n",
" C0278851 | \n",
" SORAFENIB TOSYLATE | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 17 | \n",
" C0278851 | \n",
" VEMURAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 18 | \n",
" C0278851 | \n",
" SORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 19 | \n",
" C0278851 | \n",
" REGORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 20 | \n",
" C0278851 | \n",
" DABRAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 21 | \n",
" C0278851 | \n",
" FOSTAMATINIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 22 | \n",
" C0278851 | \n",
" DABRAFENIB MESYLATE | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 23 | \n",
" C0278851 | \n",
" ENCORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 24 | \n",
" C0278882 | \n",
" SORAFENIB TOSYLATE | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 25 | \n",
" C0278882 | \n",
" VEMURAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 26 | \n",
" C0278882 | \n",
" SORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 27 | \n",
" C0278882 | \n",
" REGORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 28 | \n",
" C0278882 | \n",
" DABRAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 29 | \n",
" C0278882 | \n",
" FOSTAMATINIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 30 | \n",
" C0278882 | \n",
" DABRAFENIB MESYLATE | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 31 | \n",
" C0278882 | \n",
" ENCORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 32 | \n",
" C0334438 | \n",
" SORAFENIB TOSYLATE | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 33 | \n",
" C0334438 | \n",
" VEMURAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 34 | \n",
" C0334438 | \n",
" SORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 35 | \n",
" C0334438 | \n",
" REGORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 36 | \n",
" C0334438 | \n",
" DABRAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 37 | \n",
" C0334438 | \n",
" FOSTAMATINIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 38 | \n",
" C0334438 | \n",
" DABRAFENIB MESYLATE | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
" 39 | \n",
" C0334438 | \n",
" ENCORAFENIB | \n",
" 673 | \n",
" 0.5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" disease_id drug_name gene_id jacc_idx\n",
"0 C0019624 SORAFENIB TOSYLATE 673 0.5\n",
"1 C0019624 VEMURAFENIB 673 0.5\n",
"2 C0019624 SORAFENIB 673 0.5\n",
"3 C0019624 REGORAFENIB 673 0.5\n",
"4 C0019624 DABRAFENIB 673 0.5\n",
"5 C0019624 FOSTAMATINIB 673 0.5\n",
"6 C0019624 DABRAFENIB MESYLATE 673 0.5\n",
"7 C0019624 ENCORAFENIB 673 0.5\n",
"8 C0278600 SORAFENIB TOSYLATE 673 0.5\n",
"9 C0278600 VEMURAFENIB 673 0.5\n",
"10 C0278600 SORAFENIB 673 0.5\n",
"11 C0278600 REGORAFENIB 673 0.5\n",
"12 C0278600 DABRAFENIB 673 0.5\n",
"13 C0278600 FOSTAMATINIB 673 0.5\n",
"14 C0278600 DABRAFENIB MESYLATE 673 0.5\n",
"15 C0278600 ENCORAFENIB 673 0.5\n",
"16 C0278851 SORAFENIB TOSYLATE 673 0.5\n",
"17 C0278851 VEMURAFENIB 673 0.5\n",
"18 C0278851 SORAFENIB 673 0.5\n",
"19 C0278851 REGORAFENIB 673 0.5\n",
"20 C0278851 DABRAFENIB 673 0.5\n",
"21 C0278851 FOSTAMATINIB 673 0.5\n",
"22 C0278851 DABRAFENIB MESYLATE 673 0.5\n",
"23 C0278851 ENCORAFENIB 673 0.5\n",
"24 C0278882 SORAFENIB TOSYLATE 673 0.5\n",
"25 C0278882 VEMURAFENIB 673 0.5\n",
"26 C0278882 SORAFENIB 673 0.5\n",
"27 C0278882 REGORAFENIB 673 0.5\n",
"28 C0278882 DABRAFENIB 673 0.5\n",
"29 C0278882 FOSTAMATINIB 673 0.5\n",
"30 C0278882 DABRAFENIB MESYLATE 673 0.5\n",
"31 C0278882 ENCORAFENIB 673 0.5\n",
"32 C0334438 SORAFENIB TOSYLATE 673 0.5\n",
"33 C0334438 VEMURAFENIB 673 0.5\n",
"34 C0334438 SORAFENIB 673 0.5\n",
"35 C0334438 REGORAFENIB 673 0.5\n",
"36 C0334438 DABRAFENIB 673 0.5\n",
"37 C0334438 FOSTAMATINIB 673 0.5\n",
"38 C0334438 DABRAFENIB MESYLATE 673 0.5\n",
"39 C0334438 ENCORAFENIB 673 0.5"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_normal_rare_drug"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"df_normal_rare_drug=df_normal_rare_drug.drop_duplicates()"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"prueba=df_normal_rare_drug.groupby('drug_name').count()"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" disease_id | \n",
" gene_id | \n",
" jacc_idx | \n",
"
\n",
" \n",
" drug_name | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" DABRAFENIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" DABRAFENIB MESYLATE | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" ENCORAFENIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" FOSTAMATINIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" REGORAFENIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" SORAFENIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" SORAFENIB TOSYLATE | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" VEMURAFENIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" disease_id gene_id jacc_idx\n",
"drug_name \n",
"DABRAFENIB 5 5 5\n",
"DABRAFENIB MESYLATE 5 5 5\n",
"ENCORAFENIB 5 5 5\n",
"FOSTAMATINIB 5 5 5\n",
"REGORAFENIB 5 5 5\n",
"SORAFENIB 5 5 5\n",
"SORAFENIB TOSYLATE 5 5 5\n",
"VEMURAFENIB 5 5 5"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prueba"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
"drug=prueba[prueba[\"disease_id\"]>4]"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" disease_id | \n",
" gene_id | \n",
" jacc_idx | \n",
"
\n",
" \n",
" drug_name | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" DABRAFENIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" DABRAFENIB MESYLATE | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" ENCORAFENIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" FOSTAMATINIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" REGORAFENIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" SORAFENIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" SORAFENIB TOSYLATE | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" VEMURAFENIB | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" disease_id gene_id jacc_idx\n",
"drug_name \n",
"DABRAFENIB 5 5 5\n",
"DABRAFENIB MESYLATE 5 5 5\n",
"ENCORAFENIB 5 5 5\n",
"FOSTAMATINIB 5 5 5\n",
"REGORAFENIB 5 5 5\n",
"SORAFENIB 5 5 5\n",
"SORAFENIB TOSYLATE 5 5 5\n",
"VEMURAFENIB 5 5 5"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"drug"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
"drug.to_excel(\"drugs_C0869083_ppi.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"drug_all=df_normal_rare_drug[df_normal_rare_drug[\"disease_id\"]==\"C0869083\"]"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" disease_id | \n",
" drug_name | \n",
" gene_id | \n",
" jacc_idx | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [disease_id, drug_name, gene_id, jacc_idx]\n",
"Index: []"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"drug_all"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"drug_all.to_excel(\"drugs_all_C0549463_ppi.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}