{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sqlalchemy import create_engine\n", "from sklearn import preprocessing\n", "import mysql.connector\n", "from pandas import DataFrame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. DATA " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "duples_repodb = pd.read_csv(\"./Data/Input/Drug Repurposing/repoDB_all_disdru.tsv\", sep='\\t')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "duples_csbj = pd.read_csv(\"./Data/Input/Drug Repurposing/duplas_CSBJ.tsv\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "duples_csbj = duples_csbj.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "duples_csbj = duples_csbj.rename(columns={\"Disease CUI\": \"disease_id\"})" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | disease_id | \n", "pathway_id | \n", "
---|---|---|
0 | \n", "C0020538 | \n", "WP554 | \n", "
1 | \n", "C0018799 | \n", "WP1544 | \n", "
2 | \n", "C0018799 | \n", "WP1528 | \n", "
3 | \n", "C0027947 | \n", "WP229 | \n", "
4 | \n", "C0013369 | \n", "WP229 | \n", "
... | \n", "... | \n", "... | \n", "
659 | \n", "C0268274 | \n", "WP4153 | \n", "
660 | \n", "C0085131 | \n", "WP4153 | \n", "
661 | \n", "C0036161 | \n", "WP4153 | \n", "
662 | \n", "C0268275 | \n", "WP4153 | \n", "
663 | \n", "C0162666 | \n", "WP4236 | \n", "
664 rows × 2 columns
\n", "