{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sqlalchemy import create_engine\n", "from sklearn import preprocessing\n", "import mysql.connector\n", "from pandas import DataFrame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Load data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "cases_csbj = pd.read_csv(\"./Data/Input/DISNET/final_cases_csbj.tsv\", sep='\\t')\n", "cases_csbj = cases_csbj.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "triplets_csbj = pd.read_excel(\"./Data/Input/DISNET/triplets_chembl_disnet.xlsx\",engine='openpyxl')\n", "triplets_csbj =triplets_csbj.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "triplets_csbj =triplets_csbj.rename(columns={\"Original Condition CUI\": \"disease_id\"})" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "join_csbj = cases_csbj.merge(triplets_csbj,how = \"inner\",on = [\"drug_id\",\"disease_id\"])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "join_csbj_diseases = join_csbj[\"New Condition CUI\"]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "join_csbj_diseases = pd.DataFrame(join_csbj_diseases).drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "join_csbj_diseases =join_csbj_diseases.rename(columns={\"New Condition CUI\": \"disease_id\"})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. DRUG - GENE - TARGET" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "dis_gen = pd.read_csv('./Data/Input/DISNET/dis_genes.tsv', sep='\\t')\n", "dis_gen = dis_gen.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "gen_dise_join = join_csbj_diseases.merge(dis_gen,how = \"inner\",on = \"disease_id\")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "drug_gen = pd.read_csv('./Data/Input/DISNET/drug_gen.tsv', sep='\\t')\n", "drug_gen = drug_gen.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "gen_dise_join_dru = gen_dise_join.merge(drug_gen,how = \"inner\",on = \"gene_id\")" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "score_gdas_csbj = gen_dise_join_dru[gen_dise_join_dru[\"drug_id\"]== \"CHEMBL1581\"]" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "score_gdas_csbj.to_csv(\"score_gdas_csbj.tsv\", sep='\\t')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.PATHWAYS" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 2.1 Pathways direct " ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "dis_path_direct = pd.read_csv('./Data/Input/DISNET/disease_pathway.tsv', sep='\\t')" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "direct_dise_pw = join_csbj_diseases.merge(dis_path_direct,how = \"inner\",on = \"disease_id\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 2.2 Pathways via genes" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "dis_gen_pw = pd.read_csv('./Data/Input/DISNET/dis_gen_pw.tsv', sep='\\t')\n", "dis_gen_pw = dis_gen_pw.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "join_csbj_filter = join_csbj.drop(columns = [\"disease_id\",\"drug_id\",\"gene_id\",\"Original Condition\",\"Drugs\",\"New Condition\"])" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "join_csbj_filter = join_csbj_filter.rename(columns={\"New Condition CUI\": \"disease_id\"})" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "pws_via_gen = join_csbj_filter.merge(dis_gen_pw,how = \"inner\",on = [\"disease_id\",\"pathway_id\"])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }