{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "4d974299", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sqlalchemy import create_engine\n", "import mysql.connector" ] }, { "cell_type": "code", "execution_count": null, "id": "7d7450e2", "metadata": {}, "outputs": [], "source": [ "triples_repodb = pd.read_csv(\"triples_filter_repodb_final.tsv\", sep='\\,')\n", "triples_repodb = triples_repodb.drop(columns=['Unnamed: 0'])" ] }, { "cell_type": "code", "execution_count": 6, "id": "8f4f9844", "metadata": {}, "outputs": [], "source": [ "drug_gen = pd.read_csv('drug_gen.tsv', sep='\\t')\n", "drug_gen = drug_gen.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 8, "id": "899450d6", "metadata": {}, "outputs": [], "source": [ "tri_gen_target= triples_repodb.merge(drug_gen, left_on = \"drug\",right_on = \"drug_id\",how= \"inner\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "b1edb7eb", "metadata": {}, "outputs": [], "source": [ "tri_gen_target = tri_gen_target.drop([\"drug\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 16, "id": "d17dd8b4", "metadata": {}, "outputs": [], "source": [ "tri_gen_target = tri_gen_target.rename(columns={\"disease1\": \"disease_id\"})" ] }, { "cell_type": "code", "execution_count": 19, "id": "2c5af632", "metadata": {}, "outputs": [], "source": [ "tri_gen_target = tri_gen_target.rename(columns={\"disease2\": \"disease_id_new\"})" ] }, { "cell_type": "code", "execution_count": 12, "id": "f8f0a4e7", "metadata": {}, "outputs": [], "source": [ "dis_gen = pd.read_csv('dis_gen.tsv', sep='\\t')\n", "dis_gen = dis_gen.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 21, "id": "31a63d57", "metadata": {}, "outputs": [], "source": [ "dis_gen_new = dis_gen.rename(columns={\"disease_id\": \"disease2\"})" ] }, { "cell_type": "code", "execution_count": 17, "id": "9db0ad6c", "metadata": {}, "outputs": [], "source": [ "disease_gen_one = tri_gen_target.merge(dis_gen, on = [\"gene_id\",\"disease_id\"],how=\"inner\")" ] }, { "cell_type": "code", "execution_count": 22, "id": "0cdd6380", "metadata": {}, "outputs": [], "source": [ "disease_gen_one = disease_gen_one.merge(dis_gen_new, on = [\"gene_id\",\"disease2\"],how=\"inner\")" ] }, { "cell_type": "code", "execution_count": 53, "id": "3e3fb7f9", "metadata": {}, "outputs": [], "source": [ "triples_final_drege = disease_gen_one.drop([\"score_x\",\"score_y\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 54, "id": "12980000", "metadata": {}, "outputs": [], "source": [ "triples_final_drege = triples_final_drege.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 55, "id": "95d6a637", "metadata": {}, "outputs": [], "source": [ "triples_final_drege = triples_final_drege.drop([\"gene_id\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 56, "id": "55455401", "metadata": {}, "outputs": [], "source": [ "triples_final_drege = triples_final_drege.drop_duplicates()" ] }, { "cell_type": "markdown", "id": "6e7cd640", "metadata": {}, "source": [ "# CSBJ" ] }, { "cell_type": "code", "execution_count": 37, "id": "32cdd921", "metadata": {}, "outputs": [], "source": [ "triplets_csbj = pd.read_excel(\"triplets_chembl_disnet.xlsx\",engine='openpyxl')\n", "triplets_csbj =triplets_csbj.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 40, "id": "868f7e2c", "metadata": {}, "outputs": [], "source": [ "tri_gen_target_csbj= triplets_csbj.merge(drug_gen, on = \"drug_id\",how= \"inner\")" ] }, { "cell_type": "code", "execution_count": 42, "id": "c6bad999", "metadata": {}, "outputs": [], "source": [ "tri_gen_target_csbj = tri_gen_target_csbj.rename(columns={\"Original Condition CUI\": \"disease_id\"})" ] }, { "cell_type": "code", "execution_count": 43, "id": "d86d80dd", "metadata": {}, "outputs": [], "source": [ "disease_gen_one_csbj = tri_gen_target_csbj.merge(dis_gen, on = [\"gene_id\",\"disease_id\"],how=\"inner\")" ] }, { "cell_type": "code", "execution_count": 45, "id": "54fed905", "metadata": {}, "outputs": [], "source": [ "dis_gen_new = dis_gen.rename(columns={\"disease_id\": \"New Condition CUI\"})" ] }, { "cell_type": "code", "execution_count": 48, "id": "174647b5", "metadata": {}, "outputs": [], "source": [ "disease_gen_one_csbj_twounion = disease_gen_one_csbj.merge(dis_gen_new, on = [\"gene_id\",\"New Condition CUI\"],how=\"inner\")" ] }, { "cell_type": "code", "execution_count": 50, "id": "b8ee708d", "metadata": {}, "outputs": [], "source": [ "triplets_csbj_drege = disease_gen_one_csbj_twounion[[\"disease_id\",\"New Condition CUI\",\"drug_id\"]]" ] }, { "cell_type": "code", "execution_count": 65, "id": "8453abae", "metadata": {}, "outputs": [], "source": [ "triplets_csbj_drege =triplets_csbj_drege.drop_duplicates()" ] }, { "cell_type": "markdown", "id": "ad972954", "metadata": {}, "source": [ "# FINAL TRIPLES DREGE" ] }, { "cell_type": "code", "execution_count": 61, "id": "aad768b8", "metadata": {}, "outputs": [], "source": [ "triples_final_drege = triples_final_drege.rename(columns={\"disease2\": \"New Condition CUI\"})" ] }, { "cell_type": "code", "execution_count": 67, "id": "e6837711", "metadata": {}, "outputs": [], "source": [ "triples_final_drege_repo_csbj = pd.concat([triplets_csbj_drege,triples_final_drege])" ] }, { "cell_type": "code", "execution_count": 70, "id": "3e675cd1", "metadata": {}, "outputs": [], "source": [ "triples_final_drege_repo_csbj = triples_final_drege_repo_csbj.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 72, "id": "75d2b323", "metadata": {}, "outputs": [], "source": [ "triples_final_drege_repo_csbj.to_excel(\"triples_final_drege_repo_csbj.xlsx\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }