{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from pandas import DataFrame\n", "from scipy import stats\n", "from statsmodels.stats.diagnostic import lilliefors\n", "from scipy.stats import mannwhitneyu, levene" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# DISNET" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "dis_gen = pd.read_csv('dis_genes.tsv', sep='\\t')\n", "dis_gen = dis_gen.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "disnet_score = dis_gen[\"score\"]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "disnet_score = pd.DataFrame(disnet_score)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.13747265487982685" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dis_gen[\"score\"].mean()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 353628.000000\n", "mean 0.137473\n", "std 0.129536\n", "min 0.010000\n", "25% 0.050000\n", "50% 0.100000\n", "75% 0.130000\n", "max 1.000000\n", "Name: score, dtype: float64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dis_gen[\"score\"].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# REPODB" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "cases_repodb_target = pd.read_csv(\"score_gdas_repodb_target_final.tsv\", sep='\\t')\n", "cases_repodb_target = cases_repodb_target.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | disease_id | \n", "drug_id | \n", "gene_id | \n", "score | \n", "disease_new | \n", "
---|---|---|---|---|---|
0 | \n", "C0007134 | \n", "CHEMBL1908360 | \n", "2475 | \n", "0.50 | \n", "C0003873 | \n", "
1 | \n", "C0007134 | \n", "CHEMBL1908360 | \n", "2475 | \n", "0.50 | \n", "C0004153 | \n", "
2 | \n", "C0007134 | \n", "CHEMBL1908360 | \n", "2475 | \n", "0.50 | \n", "C0006413 | \n", "
3 | \n", "C0007134 | \n", "CHEMBL1908360 | \n", "2475 | \n", "0.50 | \n", "C0007131 | \n", "
4 | \n", "C0007134 | \n", "CHEMBL1908360 | \n", "2475 | \n", "0.50 | \n", "C0007137 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
7769 | \n", "C0025202 | \n", "CHEMBL1131 | \n", "5915 | \n", "0.02 | \n", "C0033860 | \n", "
7770 | \n", "C0025202 | \n", "CHEMBL1131 | \n", "6256 | \n", "0.02 | \n", "C0033860 | \n", "
7771 | \n", "C0010674 | \n", "CHEMBL1520 | \n", "8654 | \n", "0.03 | \n", "C0242350 | \n", "
7772 | \n", "C0026769 | \n", "CHEMBL1201563 | \n", "3454 | \n", "0.03 | \n", "C0751967 | \n", "
7773 | \n", "C0026769 | \n", "CHEMBL1201563 | \n", "3455 | \n", "0.02 | \n", "C0751967 | \n", "
7774 rows × 5 columns
\n", "\n", " | score | \n", "Drug_repositioning_type | \n", "
---|---|---|
0 | \n", "0.10 | \n", "DISNET | \n", "
1 | \n", "0.10 | \n", "DISNET | \n", "
2 | \n", "0.10 | \n", "DISNET | \n", "
3 | \n", "0.10 | \n", "DISNET | \n", "
4 | \n", "0.10 | \n", "DISNET | \n", "
... | \n", "... | \n", "... | \n", "
7769 | \n", "0.02 | \n", "DREGE | \n", "
7770 | \n", "0.02 | \n", "DREGE | \n", "
7771 | \n", "0.03 | \n", "DREGE | \n", "
7772 | \n", "0.03 | \n", "DREGE | \n", "
7773 | \n", "0.02 | \n", "DREGE | \n", "
361805 rows × 2 columns
\n", "