Commit d0411a6d authored by Belen Otero Carrasco's avatar Belen Otero Carrasco

final

parent a15793e2
This diff is collapsed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"import mysql.connector\n",
"from pandas import DataFrame\n",
"from sklearn.metrics import jaccard_score"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"drugs = pd.read_excel((\"farmacos reposicionados lista.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('Dexamethasone ', 'thalidomide', 'Amphetamine', 'Baclofen', 'CISPLATIN', 'doxorubicin ', 'methotrexate', 'cytarabine ', 'Cocaine', 'Deferoxamine', 'Furosemide ', 'Methamphetamine', 'Methylprednisolone ', 'alcohol', 'Silversulfadiazine ', 'Dexamethasone ', 'Acetaminophen ', 'Carbamazepine ', 'Copper', 'Dexamethasone', 'Gentamicin', 'Methionine', 'Valproic acid ', 'Tamoxifen', 'Carvedilol', 'caffeine ', 'Amiodarone', 'Aspirin', 'Carbamazepine', 'Cyclosporine', 'Dantrolene', 'Enalapril ', 'Enalaprilat', 'Isoproterenol', 'Losartan ', 'Methamphetamine', 'Metoprolol', 'Sirolimus', 'cocaine', 'amphetamine ', 'Valproic acid', 'Valproic acid', 'Estradiol', 'Glibenclamide', 'ACETAMINOPHEN', 'ALBENDAZOLE', 'ALCOHOL', 'CEFTRIAXONE', 'CIPROFLOXACIN', 'DOXYCYCLINE', 'FLUCONAZOLE', 'SULFAMETHOXAZOLE', 'SORAFENIB', 'DABRAFENIB', 'VEMURAFENIB', 'SORAFENIB TOSYLATE', 'REGORAFENIB', 'FOSTAMATINIB', 'DABRAFENIB MESYLATE', 'ENCORAFENIB')\n"
]
}
],
"source": [
"def convert(lista):\n",
" return tuple(i for i in lista)\n",
" \n",
"# Driver function\n",
"list_drug = drugs[\"DRUG\"]\n",
"print(convert(list_drug))\n",
"list_drug = convert(list_drug)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"far = f''' SELECT d.drug_id,d.drug_name, c.class_name\n",
"FROM disnet_drugslayer.drug d\n",
"JOIN disnet_drugslayer.drug_drugbank ddb ON d.drug_id = ddb.drug_id\n",
"JOIN disnet_drugslayer.drug_rxcui dc on ddb.drugbank_id = dc.drugbank_id\n",
"JOIN disnet_drugslayer.rxcui_categories rc on dc.rxcui = rc.rxcui\n",
"JOIN disnet_drugslayer.categories c on rc.class_id = c.class_id \n",
"where d.drug_name in {list_drug};\n",
"'''\n",
"far = pd.read_sql(far, con=disnet_db_ares)\n",
"far = far.drop_duplicates()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('CHEMBL160', 'CHEMBL160', 'CHEMBL160', 'CHEMBL160', 'CHEMBL160', 'CHEMBL160', 'CHEMBL160', 'CHEMBL160', 'CHEMBL160', 'CHEMBL160', 'CHEMBL701', 'CHEMBL701', 'CHEMBL701', 'CHEMBL701', 'CHEMBL405', 'CHEMBL405', 'CHEMBL405', 'CHEMBL405', 'CHEMBL405', 'CHEMBL405', 'CHEMBL405', 'CHEMBL405', 'CHEMBL106', 'CHEMBL106', 'CHEMBL106', 'CHEMBL106', 'CHEMBL106', 'CHEMBL106', 'CHEMBL106', 'CHEMBL113', 'CHEMBL113', 'CHEMBL113', 'CHEMBL113', 'CHEMBL113', 'CHEMBL113', 'CHEMBL113', 'CHEMBL113', 'CHEMBL1433', 'CHEMBL1433', 'CHEMBL1433', 'CHEMBL13', 'CHEMBL13', 'CHEMBL13', 'CHEMBL13', 'CHEMBL13', 'CHEMBL13', 'CHEMBL109', 'CHEMBL109', 'CHEMBL109', 'CHEMBL109', 'CHEMBL109', 'CHEMBL109', 'CHEMBL112', 'CHEMBL112', 'CHEMBL112', 'CHEMBL112', 'CHEMBL112', 'CHEMBL112', 'CHEMBL1336', 'CHEMBL1336', 'CHEMBL1336', 'CHEMBL2068237', 'CHEMBL2068237', 'CHEMBL2068237', 'CHEMBL1483', 'CHEMBL1483', 'CHEMBL1483', 'CHEMBL1483', 'CHEMBL1483', 'CHEMBL1483', 'CHEMBL8', 'CHEMBL8', 'CHEMBL8', 'CHEMBL8', 'CHEMBL8', 'CHEMBL34259', 'CHEMBL34259', 'CHEMBL34259', 'CHEMBL34259', 'CHEMBL34259', 'CHEMBL34259', 'CHEMBL34259', 'CHEMBL34259', 'CHEMBL34259', 'CHEMBL34259', 'CHEMBL34259', 'CHEMBL108', 'CHEMBL108', 'CHEMBL108', 'CHEMBL108', 'CHEMBL108', 'CHEMBL108', 'CHEMBL108', 'CHEMBL108', 'CHEMBL108', 'CHEMBL108', 'CHEMBL108', 'CHEMBL108', 'CHEMBL108', 'CHEMBL578', 'CHEMBL578', 'CHEMBL578', 'CHEMBL578', 'CHEMBL83', 'CHEMBL83', 'CHEMBL83', 'CHEMBL83', 'CHEMBL191', 'CHEMBL191', 'CHEMBL191', 'CHEMBL191', 'CHEMBL35', 'CHEMBL35', 'CHEMBL556', 'CHEMBL556', 'CHEMBL135', 'CHEMBL135', 'CHEMBL135', 'CHEMBL135', 'CHEMBL413', 'CHEMBL413', 'CHEMBL413', 'CHEMBL413', 'CHEMBL413', 'CHEMBL413', 'CHEMBL545', 'CHEMBL545', 'CHEMBL545', 'CHEMBL545', 'CHEMBL370805', 'CHEMBL370805', 'CHEMBL370805', 'CHEMBL370805', 'CHEMBL25', 'CHEMBL25', 'CHEMBL25', 'CHEMBL25', 'CHEMBL25', 'CHEMBL25', 'CHEMBL650', 'CHEMBL650', 'CHEMBL650', 'CHEMBL650', 'CHEMBL650', 'CHEMBL650', 'CHEMBL650', 'CHEMBL650', 'CHEMBL803', 'CHEMBL803', 'CHEMBL803', 'CHEMBL803', 'CHEMBL803', 'CHEMBL53463', 'CHEMBL53463', 'CHEMBL53463', 'CHEMBL53463', 'CHEMBL443', 'CHEMBL443', 'CHEMBL443', 'CHEMBL443', 'CHEMBL443', 'CHEMBL443', 'CHEMBL468', 'CHEMBL468', 'CHEMBL468', 'CHEMBL468', 'CHEMBL468', 'CHEMBL434', 'CHEMBL434', 'CHEMBL434', 'CHEMBL434', 'CHEMBL434', 'CHEMBL434', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL633', 'CHEMBL723', 'CHEMBL723', 'CHEMBL723', 'CHEMBL723', 'CHEMBL723', 'CHEMBL723', 'CHEMBL723', 'CHEMBL723', 'CHEMBL723', 'CHEMBL161', 'CHEMBL161', 'CHEMBL1201288', 'CHEMBL1201288', 'CHEMBL384467', 'CHEMBL384467', 'CHEMBL384467', 'CHEMBL384467', 'CHEMBL384467', 'CHEMBL384467', 'CHEMBL384467', 'CHEMBL384467', 'CHEMBL384467', 'CHEMBL1201201', 'CHEMBL1201201', 'CHEMBL1201201', 'CHEMBL1201201', 'CHEMBL1201201', 'CHEMBL1201201', 'CHEMBL1201201', 'CHEMBL1201201', 'CHEMBL1229517', 'CHEMBL1229517', 'CHEMBL1229517', 'CHEMBL1229517', 'CHEMBL1229517', 'CHEMBL1946170', 'CHEMBL1946170', 'CHEMBL1946170', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL2028663', 'CHEMBL55643', 'CHEMBL577', 'CHEMBL577', 'CHEMBL577', 'CHEMBL3301612', 'CHEMBL2103830', 'CHEMBL2103830')\n"
]
}
],
"source": [
"# Driver function\n",
"li_drug = far[\"drug_id\"]\n",
"print(convert(li_drug))\n",
"li_drug = convert(li_drug)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"sql = f'''SELECT atc.drug_id,atc.ATC_code_id\n",
"FROM disnet_drugslayer.ATC_code atc\n",
"JOIN disnet_drugslayer.drug_disease dd ON atc.drug_id = dd.drug_id\n",
"JOIN disnet_drugslayer.disease d ON dd.disease_id = d.disease_id\n",
"where atc.drug_id in {li_drug}\n",
"'''"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"drug_atc = pd.read_sql(sql, con=disnet_db_ares)\n",
"drug_atc = drug_atc.drop_duplicates()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"drug_atc_fil = drug_atc[\"ATC_code_id\"].str[0:3]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"drug_atc_filter = drug_atc[\"ATC_code_id\"].str[0:1]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"drug_atc[\"ATC_LEVEL_1\"] = drug_atc_filter\n",
"drug_atc[\"ATC_LEVEL_2\"] = drug_atc_fil"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"count_drugs = pd.value_counts(drug_atc[\"ATC_LEVEL_1\"])"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"count_drug = pd.value_counts(drug_atc[\"ATC_LEVEL_2\"])"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"count_drugs.to_excel(\"count_drug_atc_1.xlsx\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from pandas import DataFrame\n",
"from statsmodels.stats.diagnostic import lilliefors\n",
"from scipy.stats import mannwhitneyu, levene\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from statannot import add_stat_annotation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SQL- QUERIES"
]
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {},
"outputs": [],
"source": [
"q1='''SELECT DISTINCT\n",
" ds.disease_id, drug_name,dg.gene_id,dg.score\n",
" FROM\n",
" disnet_biolayer.disease_gene dg \n",
" JOIN disnet_biolayer.disease ds ON ds.disease_id = dg.disease_id\n",
" JOIN disnet_biolayer.encodes e ON dg.gene_id = e.gene_id\n",
" JOIN disnet_drugslayer.has_code hc ON hc.code = e.protein_id\n",
" JOIN disnet_drugslayer.drug_target dt ON hc.id = dt.target_id\n",
" JOIN disnet_drugslayer.drug d ON dt.drug_id = d.drug_id\n",
" WHERE\n",
" ds.ddf_type = \"disease\"\n",
" and hc.entity_id = 3\n",
" AND dg.gene_id in (SELECT distinct(gene_id) FROM disnet_biolayer.disease_gene\n",
" where disease_id = \"C0869083\")\n",
" and drug_name = \"DABRAFENIB\"\n",
" and ds.disease_id in ('C0028326',\n",
" 'C0041409',\n",
" 'C1527404',\n",
" 'C0020255',\n",
" 'C0153633',\n",
" 'C0019624',\n",
" 'C0278600',\n",
" 'C0278851',\n",
" 'C0278882',\n",
" 'C0334438',\n",
" 'C3263719')\n",
"\n",
"'''"
]
},
{
"cell_type": "code",
"execution_count": 157,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_sql(q1, con=disnet_db_ares)"
]
},
{
"cell_type": "code",
"execution_count": 159,
"metadata": {},
"outputs": [],
"source": [
"#df.to_excel(\"gen_score_83_dabrafenib.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {},
"outputs": [],
"source": [
"q2=f'''SELECT disease_name,disease_id FROM disnet_biolayer.disease \n",
"where disease_id in ('C0028326',\n",
" 'C0041409',\n",
" 'C1527404',\n",
" 'C0020255',\n",
" 'C0153633',\n",
" 'C0019624',\n",
" 'C0278600',\n",
" 'C0278851',\n",
" 'C0278882',\n",
" 'C0334438',\n",
" 'C3263719') '''"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [],
"source": [
"df_name = pd.read_sql(q2, con=disnet_db_ares)"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [],
"source": [
"#df_name.to_excel(\"name_dis_83.xlsx\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SCORE-GEN DISNET"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"q3='''SELECT * FROM disnet_biolayer.disease_gene'''"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"df_score = pd.read_sql(q3, con=disnet_db_ares)\n",
"df_score=df_score.drop(columns=[\"gene_id\",\"sio_id\",\"source_id\"])\n",
"#df_score.to_excel(\"score_disnet_forR.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 358209.000000\n",
"mean 0.139935\n",
"std 0.131801\n",
"min 0.010000\n",
"25% 0.050000\n",
"50% 0.100000\n",
"75% 0.200000\n",
"max 1.000000\n",
"Name: score, dtype: float64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_score[\"score\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.3455059976805721, 0.0009999999999998899)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lilliefors(df_score[\"score\"], dist ='norm')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"GDAS = pd.read_excel((\"GDAS.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"x = \"disease\"\n",
"y = \"value\"\n",
"ax = sns.boxplot(data=GDAS, x=x, y=y)\n",
"add_stat_annotation(ax, data=GDAS, x=x, y=y,\n",
" box_pairs=[(\"DISNET\", \"Dejerine-Sottas\"), (\"DISNET\", \"Schwartz-Jampel\"), (\"DISNET\", \"Seckel\"),(\"DISNET\", \"Dahlberg-Borer-Newcomer\"),(\"DISNET\", \"Acromegaloid facial \")],\n",
" test='Mann-Whitney', text_format='star', loc='outside', verbose=2)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sqlalchemy import create_engine\n",
"from sklearn import preprocessing\n",
"import mysql.connector\n",
"from pandas import DataFrame\n",
"from sklearn.metrics import jaccard_score\n",
"from numpy import logical_and as l_and, logical_not as l_not"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"def get_dummies(data,col_index,col_col):\n",
" \n",
" df_final= pd.get_dummies(data.set_index(col_index),col_col).reset_index()\n",
" df_final=df_final.drop_duplicates()\n",
" df_final=df_final.groupby(col_index).sum().reset_index()\n",
" \n",
" return df_final\n",
"\n",
"def convert(lista):\n",
" return tuple(i for i in lista)\n",
"\n",
"\n",
"def get_normal_diseases(rare_disease):\n",
" '''\n",
" Esta funcion devuelve un dataframe con la enfermedad, los medicamentos y los targets que son utiles\n",
" para la enfermedad rara que se le pase como parametro'''\n",
" \n",
" \n",
" q= f'''SELECT DISTINCT\n",
" ds.disease_id, drug_name,dg.gene_id\n",
" FROM\n",
" disnet_biolayer.disease_gene dg \n",
" JOIN disnet_biolayer.disease ds ON ds.disease_id = dg.disease_id\n",
" JOIN disnet_biolayer.encodes e ON dg.gene_id = e.gene_id\n",
" JOIN disnet_drugslayer.target_has_code hc ON hc.code_id = e.protein_id\n",
" JOIN disnet_drugslayer.drug_target dt ON hc.target_id = dt.target_id\n",
" JOIN disnet_drugslayer.drug d ON dt.drug_id = d.drug_id\n",
" WHERE\n",
" \n",
" ds.ddf_type = \"disease\"\n",
" AND dg.gene_id in (SELECT distinct(gene_id) FROM disnet_biolayer.disease_gene\n",
" where disease_id = \"{rare_disease}\")\n",
" ;'''\n",
" \n",
" df = pd.read_sql(q, con=disnet_db_ares)\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"rare_diseases= ['C0011195', 'C0023944', 'C0024054', 'C0024901', 'C0027877', 'C0036391', 'C0265202', 'C0268059', \n",
" 'C0549463', 'C0751337', 'C1852146', 'C0796280']"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"for disease in rare_diseases:\n",
" matrix_jac_gen = pd.read_excel(\"./file/Input_6/m_jacc_gen_name.xlsx\",engine='openpyxl')\n",
" # get useful diseases (disease that shares genes) for a rare disease \n",
" df_useful_diseases = get_normal_diseases(disease)\n",
" useful_diseases = df_useful_diseases['disease_id'].drop_duplicates().tolist()\n",
" df = matrix_jac_gen[matrix_jac_gen.rare_dis_id == disease]\n",
" df_useful = df[['rare_dis_id'] + useful_diseases].set_index('rare_dis_id').transpose()\n",
" df_normal_dis_jacc=pd.DataFrame(df_useful[disease].nlargest(5)).reset_index().rename(columns={'index':'disease_id', disease:'jacc_idx'})\n",
" df_normal_dis_jacc.to_excel(f\"{disease}_jacc.xlsx\")\n",
" df_normal_rare_drug=pd.merge(df_useful_diseases,df_normal_dis_jacc,on=\"disease_id\",how=\"inner\")\n",
" df_normal_rare_drug=df_normal_rare_drug.drop_duplicates()\n",
" drug_dis=df_normal_rare_drug.groupby('drug_name').count()\n",
" drug_dis.to_excel(f\"{disease}_drugs_dr.xlsx\")\n",
" # Drug repositioning direct for each rare disease\n",
" drug_direct=df_normal_rare_drug[df_normal_rare_drug[\"disease_id\"]== disease]\n",
" drug_direct.to_csv(f\"{disease}_drugs_dr_direct.csv\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sqlalchemy import create_engine\n",
"from sklearn import preprocessing\n",
"from pandas import DataFrame\n",
"from sklearn.metrics import jaccard_score"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def get_dummies(data,col_index,col_col):\n",
" \n",
" df_final= pd.get_dummies(data.set_index(col_index),col_col).reset_index()\n",
" df_final=df_final.drop_duplicates()\n",
" df_final=df_final.groupby(col_index).sum().reset_index()\n",
" \n",
" return df_final\n",
"\n",
"def convert(lista):\n",
" return tuple(i for i in lista)\n",
"\n",
"\n",
"def get_normal_diseases(rare_disease_dis):\n",
" '''\n",
" Esta funcion devuelve un dataframe con la enfermedad, los medicamentos y los targets que son utiles\n",
" para la enfermedad rara que se le pase como parametro'''\n",
" \n",
" \n",
" q= f'''SELECT DISTINCT\n",
" ds.disease_id, drug_name,dg.gene_id,lm.disnet_id\n",
" FROM\n",
" disnet_biolayer.disease_gene dg \n",
" JOIN disnet_biolayer.disease ds ON ds.disease_id = dg.disease_id\n",
" JOIN edsssdb.layersmappings lm on dg.disease_id = lm.cui\n",
" JOIN edsssdb.disease_symptom dsy ON lm.disnet_id = dsy.disease_id\n",
" JOIN edsssdb.symptom s ON dsy.cui = s.cui\n",
" JOIN disnet_biolayer.encodes e ON dg.gene_id = e.gene_id\n",
" JOIN disnet_drugslayer.has_code hc ON hc.code = e.protein_id\n",
" JOIN disnet_drugslayer.drug_target dt ON hc.id = dt.target_id\n",
" JOIN disnet_drugslayer.drug d ON dt.drug_id = d.drug_id\n",
" \n",
" WHERE\n",
" hc.entity_id = 3\n",
" and ds.ddf_type = \"disease\"\n",
" AND dg.gene_id in (SELECT distinct(gene_id) FROM disnet_biolayer.disease_gene\n",
" where disease_id = \"{rare_disease_dis}\")\n",
" \n",
" ;'''\n",
" \n",
" df = pd.read_sql(q, con=disnet_db_ares)\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"rare_diseases= ['C0011195', 'C0023944', 'C0024054', 'C0024901', 'C0027877', 'C0036391', 'C0265202', 'C0268059', 'C0549463', 'C0751337', 'C0869083', 'C1852146', 'C0796280']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for disease in rare_diseases:\n",
" matrix_jac_syn = pd.read_excel((\"./file/Input_6/mat_jacc_syn_cuis.xlsx\"),engine='openpyxl')\n",
" # get useful diseases (disease that shares genes) for a rare disease \n",
" df_useful_diseases = get_normal_diseases(disease)\n",
" useful_diseases = df_useful_diseases['disease_id'].drop_duplicates().tolist()\n",
" df = matrix_jac_syn[matrix_jac_syn.rare_dis_id == disease]\n",
" df_useful = df[['rare_dis_id'] + useful_diseases].set_index('rare_dis_id').transpose()\n",
" df_normal_dis_jacc=pd.DataFrame(df_useful[disease].nlargest(5)).reset_index().rename(columns={'index':'disease_id', disease:'jacc_idx'})\n",
" df_normal_dis_jacc.to_excel(f\"{disease}_jacc.xlsx\")\n",
" df_normal_rare_drug=pd.merge(df_useful_diseases,df_normal_dis_jacc,on=\"disease_id\",how=\"inner\")\n",
" df_normal_rare_drug=df_normal_rare_drug.drop_duplicates()\n",
" drug_dis=df_normal_rare_drug.groupby('drug_name').count()\n",
" drug_dis.to_excel(f\"{disease}_drugs_dr.xlsx\")\n",
" # Drug repositioning direct for each rare disease\n",
" drug_direct=df_normal_rare_drug[df_normal_rare_drug[\"disease_id\"]== disease]\n",
" drug_direct.to_excel(f\"{disease}_drugs_dr_direct.xlsx\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sqlalchemy import create_engine\n",
"from sklearn import preprocessing\n",
"from pandas import DataFrame\n",
"from sklearn.metrics import jaccard_score"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def get_dummies(data,col_index,col_col):\n",
" \n",
" df_final= pd.get_dummies(data.set_index(col_index),col_col).reset_index()\n",
" df_final=df_final.drop_duplicates()\n",
" df_final=df_final.groupby(col_index).sum().reset_index()\n",
" \n",
" return df_final\n",
"\n",
"def convert(lista):\n",
" return tuple(i for i in lista)\n",
"\n",
"\n",
"def get_normal_diseases(rare_disease):\n",
" '''\n",
" Esta funcion devuelve un dataframe con la enfermedad, los medicamentos y los targets que son utiles\n",
" para la enfermedad rara que se le pase como parametro'''\n",
" \n",
" \n",
" q= f'''SELECT DISTINCT\n",
" ds.disease_id, drug_name,dg.gene_id\n",
" FROM disnet_biolayer.gene_pathway gp\n",
" JOIN disnet_biolayer.pathway p on gp.pathway_id = p.pathway_id\n",
" JOIN disnet_biolayer.disease_gene dg ON gp.gene_id = dg.gene_id\n",
" JOIN disnet_biolayer.disease ds ON ds.disease_id = dg.disease_id\n",
" JOIN disnet_biolayer.encodes e ON dg.gene_id = e.gene_id\n",
" JOIN disnet_drugslayer.has_code hc ON hc.code = e.protein_id\n",
" JOIN disnet_drugslayer.drug_target dt ON hc.id = dt.target_id\n",
" JOIN disnet_drugslayer.drug d ON dt.drug_id = d.drug_id\n",
" WHERE\n",
" hc.entity_id = 3\n",
" and ds.ddf_type = \"disease\"\n",
" AND dg.gene_id in (SELECT distinct(gene_id) FROM disnet_biolayer.disease_gene \n",
" where disease_id = \"{rare_disease}\")\n",
" ;'''\n",
" \n",
" df = pd.read_sql(q, con=disnet_db_ares)\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"rare_diseases= ['C0011195', 'C0023944', 'C0024054', 'C0024901', 'C0027877', 'C0036391', 'C0265202', 'C0268059', 'C0549463', 'C0751337', 'C0869083', 'C1852146', 'C0796280']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for disease in rare_diseases:\n",
" matrix_jac_pw = pd.read_excel((\"./file/Input_6/m_jacc_pw_name.xlsx\"),engine='openpyxl')\n",
" # get useful diseases (disease that shares genes) for a rare disease \n",
" df_useful_diseases = get_normal_diseases(disease)\n",
" useful_diseases = df_useful_diseases['disease_id'].drop_duplicates().tolist()\n",
" df = matrix_jac_pw[matrix_jac_pw.rare_dis_id == disease]\n",
" df_useful = df[['rare_dis_id'] + useful_diseases].set_index('rare_dis_id').transpose()\n",
" df_normal_dis_jacc=pd.DataFrame(df_useful[disease].nlargest(5)).reset_index().rename(columns={'index':'disease_id', disease:'jacc_idx'})\n",
" df_normal_dis_jacc.to_excel(f\"{disease}_jacc.xlsx\")\n",
" df_normal_rare_drug=pd.merge(df_useful_diseases,df_normal_dis_jacc,on=\"disease_id\",how=\"inner\")\n",
" df_normal_rare_drug=df_normal_rare_drug.drop_duplicates()\n",
" drug_dis=df_normal_rare_drug.groupby('drug_name').count()\n",
" drug_dis.to_excel(f\"{disease}_drugs_dr.xlsx\")\n",
" # Drug repositioning direct for each rare disease\n",
" drug_direct=df_normal_rare_drug[df_normal_rare_drug[\"disease_id\"]== disease]\n",
" drug_direct.to_excel(f\"{disease}_drugs_dr_direct.xlsx\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sqlalchemy import create_engine\n",
"from sklearn import preprocessing\n",
"from pandas import DataFrame\n",
"from sklearn.metrics import jaccard_score"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def get_dummies(data,col_index,col_col):\n",
" \n",
" df_final= pd.get_dummies(data.set_index(col_index),col_col).reset_index()\n",
" df_final=df_final.drop_duplicates()\n",
" df_final=df_final.groupby(col_index).sum().reset_index()\n",
" \n",
" return df_final\n",
"\n",
"def convert(lista):\n",
" return tuple(i for i in lista)\n",
"\n",
"\n",
"def get_normal_diseases(rare_disease):\n",
" '''\n",
" Esta funcion devuelve un dataframe con la enfermedad, los medicamentos y los targets que son utiles\n",
" para la enfermedad rara que se le pase como parametro'''\n",
" \n",
" \n",
" q= f'''SELECT DISTINCT\n",
" ds.disease_id, drug_name,dg.gene_id\n",
" FROM\n",
" disnet_biolayer.disease_gene dg \n",
" JOIN disnet_biolayer.disease ds ON ds.disease_id = dg.disease_id\n",
" JOIN disnet_biolayer.encodes e_uno ON dg.gene_id = e_uno.gene_id\n",
" JOIN disnet_drugslayer.has_code hc ON hc.code = e_uno.protein_id\n",
" JOIN disnet_biolayer.ppi ppi on ppi.protein1_id = e_uno.protein_id\n",
" JOIN disnet_biolayer.encodes e_dos on e_dos.protein_id = ppi.protein2_id\n",
" JOIN disnet_drugslayer.drug_target dt ON hc.id = dt.target_id\n",
" JOIN disnet_drugslayer.drug d ON dt.drug_id = d.drug_id\n",
" WHERE\n",
" hc.entity_id = 3\n",
" \n",
" and ds.ddf_type = \"disease\"\n",
" and ppi.quality = \"GOLD\"\n",
" AND dg.gene_id in (SELECT distinct(gene_id) FROM disnet_biolayer.disease_gene\n",
" where disease_id = \"{rare_disease}\")\n",
" ;'''\n",
" \n",
" df = pd.read_sql(q, con=disnet_db_ares)\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rare_diseases= ['C0011195', 'C0023944', 'C0024054', 'C0024901', 'C0027877', 'C0036391', 'C0265202', 'C0268059', 'C0549463', 'C0751337', 'C0869083', 'C1852146', 'C0796280']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for disease in rare_diseases:\n",
" matrix_jac_ppi = pd.read_excel((\"./file/Input_6/mat_jacc_pip.xlsx\"),engine='openpyxl')\n",
" # get useful diseases (disease that shares genes) for a rare disease \n",
" df_useful_diseases = get_normal_diseases(disease)\n",
" useful_diseases = df_useful_diseases['disease_id'].drop_duplicates().tolist()\n",
" df = matrix_jac_ppi[matrix_jac_ppi.rare_dis_id == disease]\n",
" df_useful = df[['rare_dis_id'] + useful_diseases].set_index('rare_dis_id').transpose()\n",
" df_normal_dis_jacc=pd.DataFrame(df_useful[disease].nlargest(5)).reset_index().rename(columns={'index':'disease_id', disease:'jacc_idx'})\n",
" df_normal_dis_jacc.to_excel(f\"{disease}_jacc.xlsx\")\n",
" df_normal_rare_drug=pd.merge(df_useful_diseases,df_normal_dis_jacc,on=\"disease_id\",how=\"inner\")\n",
" df_normal_rare_drug=df_normal_rare_drug.drop_duplicates()\n",
" drug_dis=df_normal_rare_drug.groupby('drug_name').count()\n",
" drug_dis.to_excel(f\"{disease}_drugs_dr.xlsx\")\n",
" # Drug repositioning direct for each rare disease\n",
" drug_direct=df_normal_rare_drug[df_normal_rare_drug[\"disease_id\"]== disease]\n",
" drug_direct.to_excel(f\"{disease}_drugs_dr_direct.xlsx\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sqlalchemy import create_engine\n",
"from sklearn import preprocessing\n",
"from pandas import DataFrame\n",
"from sklearn.metrics import jaccard_score"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def get_dummies(data,col_index,col_col):\n",
" \n",
" df_final= pd.get_dummies(data.set_index(col_index),col_col).reset_index()\n",
" df_final=df_final.drop_duplicates()\n",
" df_final=df_final.groupby(col_index).sum().reset_index()\n",
" \n",
" return df_final\n",
"\n",
"def convert(lista):\n",
" return tuple(i for i in lista)\n",
"\n",
"\n",
"def get_normal_diseases(rare_disease):\n",
" '''\n",
" Esta funcion devuelve un dataframe con la enfermedad, los medicamentos y los targets que son utiles\n",
" para la enfermedad rara que se le pase como parametro'''\n",
" \n",
" \n",
" q= f'''SELECT DISTINCT\n",
" d.disease_id, drug_name,dg.gene_id\n",
" FROM\n",
" disnet_biolayer.disease_variant dv\n",
" JOIN disnet_biolayer.disease d on dv.disease_id = d.disease_id\n",
" JOIN disnet_biolayer.variant v on v.variant_id = dv.variant_id\n",
" JOIN disnet_biolayer.disease_gene dg ON d.disease_id = dg.disease_id\n",
" JOIN disnet_biolayer.encodes e ON dg.gene_id = e.gene_id\n",
" JOIN disnet_drugslayer.has_code hc ON hc.code = e.protein_id\n",
" JOIN disnet_drugslayer.drug_target dt ON hc.id = dt.target_id\n",
" JOIN disnet_drugslayer.drug d ON dt.drug_id = d.drug_id\n",
" WHERE\n",
" hc.entity_id = 3\n",
" and v.chromosome is not null\n",
" and d.ddf_type = \"disease\"\n",
" AND dg.gene_id in (SELECT distinct(gene_id) FROM disnet_biolayer.disease_gene\n",
" where disease_id = \"{rare_disease}\")\n",
" ;'''\n",
" \n",
" df = pd.read_sql(q, con=disnet_db_ares)\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rare_diseases= ['C0011195', 'C0023944', 'C0024054', 'C0024901', 'C0027877', 'C0036391', 'C0265202', 'C0268059', 'C0549463', 'C0751337', 'C0869083', 'C1852146', 'C0796280']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for disease in rare_diseases:\n",
" matrix_jac_var = pd.read_excel((\"./file/Input_6/mat_jacc_variant.xlsx\"),engine='openpyxl')\n",
" # get useful diseases (disease that shares genes) for a rare disease \n",
" df_useful_diseases = get_normal_diseases(disease)\n",
" useful_diseases = df_useful_diseases['disease_id'].drop_duplicates().tolist()\n",
" df = matrix_jac_var[matrix_jac_var.rare_dis_id == disease]\n",
" df_useful = df[['rare_dis_id'] + useful_diseases].set_index('rare_dis_id').transpose()\n",
" df_normal_dis_jacc=pd.DataFrame(df_useful[disease].nlargest(5)).reset_index().rename(columns={'index':'disease_id', disease:'jacc_idx'})\n",
" df_normal_dis_jacc.to_excel(f\"{disease}_jacc.xlsx\")\n",
" df_normal_rare_drug=pd.merge(df_useful_diseases,df_normal_dis_jacc,on=\"disease_id\",how=\"inner\")\n",
" df_normal_rare_drug=df_normal_rare_drug.drop_duplicates()\n",
" drug_dis=df_normal_rare_drug.groupby('drug_name').count()\n",
" drug_dis.to_excel(f\"{disease}_drugs_dr.xlsx\")\n",
" # Drug repositioning direct for each rare disease\n",
" drug_direct=df_normal_rare_drug[df_normal_rare_drug[\"disease_id\"]== disease]\n",
" drug_direct.to_excel(f\"{disease}_drugs_dr_direct.xlsx\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from pandas import DataFrame"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# LOAD DATA"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"syn = pd.read_excel((\"./file/Input_7/SYN/drugs_C0549463_sintarget_syn.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"gen =pd.read_excel((\"./file/Input_7/GEN/drug_sintarget_C0265202.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"ppi=pd.read_excel((\"./file/Input_7/PPI/drugs_sintar_ppi_C0869083.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"var=pd.read_excel((\"./file/Input_7/VAR/drugs_C1852146_sintarget_var.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"syn= syn['drug_name'].apply(str)\n",
"syn= syn.tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gen= gen['drug_name'].apply(str)\n",
"gen= gen.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"ppi= ppi['drug_name'].apply(str)\n",
"ppi= ppi.tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"var= var['drug_name'].apply(str)\n",
"var= var.tolist()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# DRUGS IN COMMON"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_uno = list(set.intersection(*map(set, [syn,gen])))\n",
"drugs_in_all_enf_uno = pd.DataFrame(drugs_in_all_enf_uno)\n",
"#drugs_in_all_enf_uno.to_excel(\"drug_in_common_enfC0023944_sintarget.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_dos = list(set.intersection(*map(set, [syn,gen,ppi])))\n",
"drugs_in_all_enf_dos = pd.DataFrame(drugs_in_all_enf_uno)\n",
"#drugs_in_all_enf_dos.to_excel(\"drug_in_common_enfC0024901_sintarget.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_tres = list(set.intersection(*map(set, [syn,var,ppi])))\n",
"drugs_in_all_enf_tres = pd.DataFrame(drugs_in_all_enf_uno)\n",
"#drugs_in_all_enf_tres.to_excel(\"drug_in_common_enfC0751337_sintarget.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_cua = list(set.intersection(*map(set, [syn,var,ppi,pw,gen])))\n",
"drugs_in_all_enf_cua = pd.DataFrame(drugs_in_all_enf_uno)\n",
"#drugs_in_all_enf_cua.to_excel(\"drug_in_common_en95_sintarget.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_cin = list(set.intersection(*map(set, [syn,var,pw,gen])))\n",
"drugs_in_all_enf_cin = pd.DataFrame(drugs_in_all_enf_uno)\n",
"#drugs_in_all_enf_cin.to_excel(\"drug_in_common_enC0027877_sintarget.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_seis = list(set.intersection(*map(set, [syn,var,pw])))\n",
"drugs_in_all_enf_seis = pd.DataFrame(drugs_in_all_enf_uno)\n",
"#drugs_in_all_enf_seis.to_excel(\"drug_in_common_enfC1852146_sintarget.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_sie = list(set.intersection(*map(set, [syn,gen,ppi])))\n",
"drugs_in_all_enf_sie = pd.DataFrame(drugs_in_all_enf_uno)\n",
"#drugs_in_all_enf_sie.to_excel(\"drug_in_common_enfC0265202_sintarget.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_ocho = list(set.intersection(*map(set, [syn,gen,ppi,var])))\n",
"drugs_in_all_enf_ocho = pd.DataFrame(drugs_in_all_enf_uno)\n",
"#drugs_in_all_enf_ocho.to_excel(\"drug_in_common_enfC0869083_sintarget.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_nue = list(set.intersection(*map(set, [syn,var,ppi])))\n",
"drugs_in_all_enf_nue = pd.DataFrame(drugs_in_all_enf_uno)\n",
"#drugs_in_all_enf_nue.to_excel(\"drug_in_common_enfC0549463_sintarget.xlsx\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from pandas import DataFrame"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# DATA LOAD"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"syn = pd.read_excel((\"./file/Input_8/SYMPTOM/drugs_all_C0549463_syn.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"gen =pd.read_excel((\"./file/Input_8/GENES/drugs_all_C0549463_gen.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"ppi=pd.read_excel((\"./file/Input_8/PPI/drugs_all_C0549463_ppi.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"pw=pd.read_excel((\"./file/Input_8/PATHWAY/drugs_C0549463_pw.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"var=pd.read_excel((\"./file/Input_8/VARIANT/drugs_all_C0549463_v.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"gen= gen['drug_name'].apply(str)\n",
"gen= gen.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"syn= syn['drug_name'].apply(str)\n",
"syn= syn.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"ppi= ppi['drug_name'].apply(str)\n",
"ppi= ppi.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"var= var['drug_name'].apply(str)\n",
"var= var.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"pw= pw['drug_name'].apply(str)\n",
"pw= pw.tolist()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# DRUGS IN COMMON"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_uno = list(set.intersection(*map(set, [syn,gen,ppi,pw,var])))\n",
"drugs_in_all_enf_uno = pd.DataFrame(drugs_in_all_enf_uno)\n",
"#drugs_in_all_enf_uno.to_excel(\"drug_in_common_enf_C0011195_target.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_dos = list(set.intersection(*map(set, [syn,gen,ppi,pw,var])))\n",
"drugs_in_all_enf_dos = pd.DataFrame(drugs_in_all_enf_dos)\n",
"#drugs_in_all_enf_dos.to_excel(\"drug_in_common_enf_C0036391_target.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_tres = list(set.intersection(*map(set, [syn,ppi,pw])))\n",
"drugs_in_all_enf_tres = pd.DataFrame(drugs_in_all_enf_tres)\n",
"#drugs_in_all_enf_tres.to_excel(\"drug_in_common_enf_C0869083_target.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_cuatro = list(set.intersection(*map(set, [syn,gen,ppi,pw])))\n",
"drugs_in_all_enf_cuatro = pd.DataFrame(drugs_in_all_enf_cuatro)\n",
"#drugs_in_all_enf_cuatro.to_excel(\"drug_in_common_enf_C0265202_target.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf_cinco = list(set.intersection(*map(set, [syn,gen,ppi,pw,var])))\n",
"drugs_in_all_enf_cinco = pd.DataFrame(drugs_in_all_enf_cinco)\n",
"#drugs_in_all_enf_cinco.to_excel(\"drug_in_common_enf_C0549463_target.xlsx\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from pandas import DataFrame"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# DATA LOAD"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"paths = pd.read_csv(\"./file/Input_9/Paths/enf_uno_drugs.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"triplet_with =pd.read_excel((\"./file/Input_9/With_tar/drug_in_common_enf_C0011195_target_NEW.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"triplet_without=pd.read_excel((\"./file/Input_9/Without_tar/drug_in_common_en95_sintarget.xlsx\"),engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"paths= paths['drug_name'].apply(str)\n",
"paths= paths.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"triplet_with= triplet_with['drug_name'].apply(str)\n",
"triplet_with= triplet_with.tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"triplet_without= triplet_without['drug_name'].apply(str)\n",
"triplet_without= triplet_without.tolist()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# DRUGS IN COMMON"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drugs_in_all_enf = list(set.intersection(*map(set, [triplet_with,paths,triplet_without])))\n",
"drugs_in_all_enf = pd.DataFrame(drugs_in_all_enf)\n",
"drugs_in_all_enf.to_excel(\"enf_uno_drugs_P-TC_new.xlsx\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
UMLS_CUI;disease_id;OrphaCode;Orphanet_name;UMLS_name
C0011195;DIS006265;64748;Dejerine-Sottas syndrome;Dejerine-Sottas Disease (disorder)
C0023944;DIS007965;2406;Locked-in syndrome;Locked-In Syndrome
C0024054;DIS004718;844;Lown-Ganong-Levine syndrome;Lown-Ganong-Levine Syndrome
C0024901;DIS004602;79456;Diffuse cutaneous mastocytosis;Mastocytosis -Diffuse Cutaneous
C0027877;DIS006521;168486;Congenital neuronal ceroid lipofuscinosis;Neuronal Ceroid-Lipofuscinoses
C0036391;DIS010678;800;Schwartz-Jampel syndrome;Schwartz-Jampel Syndrome
C0265202;DIS007060;808;Seckel syndrome;Seckel syndrome
C0268059;DIS009381;446;Neonatal hemochromatosis;Neonatal hemochromatosis
C0549463;DIS005733;538931;X-linked lymphoproliferative disease due to SH2D1A deficiency;X-Linked Lymphoproliferative Disorder
C0751337;DIS004547;437572;MYH7-related late-onset scapuloperoneal muscular dystrophy;X-Linked Emery-Dreifuss Muscular Dystrophy
C0869083;DIS006533;1563;Dahlberg-Borer-Newcomer syndrome;Other specified congenital malformation syndromes-not elsewhere classified in ICD10CM
C1852146;DIS004797;493342;Vibratory urticaria;Vibratory urticaria
C0796280;DIS005984;965;Acromegaloid facial appearance syndrome;Congenital malformation syndromes affecting facial appearance
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment