From 44c15158f3eb4c6acacca23e40de7b0153137f80 Mon Sep 17 00:00:00 2001 From: aalvarezp Date: Tue, 13 May 2025 10:10:23 +0200 Subject: [PATCH] File update --- .gitignore | 16 +- .idea/misc.xml | 5 +- ...medicine-and-single-cell-for-alzheimer.iml | 2 +- code/scrna_ppi_analysis.ipynb | 585 ++- figures/betweenness_boxplot.svg | 3216 +++++++++++++++++ figures/closeness_boxplot.svg | 1989 ++++++++++ figures/clustering_boxplot.svg | 2202 +++++++++++ 7 files changed, 7907 insertions(+), 108 deletions(-) create mode 100644 figures/betweenness_boxplot.svg create mode 100644 figures/closeness_boxplot.svg create mode 100644 figures/clustering_boxplot.svg diff --git a/.gitignore b/.gitignore index d2f4cee..a61d9d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,16 @@ -# Ignore .idea folder +# Byte-compiled / cache +__pycache__/ +*.so + +# Jupyter notebook checkpoints +.ipynb_checkpoints/ + + + +# System files +.DS_Store + +# IDEs and editors +.vscode/ .idea/ +*.swp diff --git a/.idea/misc.xml b/.idea/misc.xml index d56657a..db8786c 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,7 @@ - + + + \ No newline at end of file diff --git a/.idea/network-medicine-and-single-cell-for-alzheimer.iml b/.idea/network-medicine-and-single-cell-for-alzheimer.iml index d0876a7..f571432 100644 --- a/.idea/network-medicine-and-single-cell-for-alzheimer.iml +++ b/.idea/network-medicine-and-single-cell-for-alzheimer.iml @@ -2,7 +2,7 @@ - + \ No newline at end of file diff --git a/code/scrna_ppi_analysis.ipynb b/code/scrna_ppi_analysis.ipynb index 78d7ebc..c4d7bd7 100644 --- a/code/scrna_ppi_analysis.ipynb +++ b/code/scrna_ppi_analysis.ipynb @@ -12,23 +12,24 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "6e3db0831d32394b" }, { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, - "ExecuteTime": { - "end_time": "2025-02-27T09:37:56.583250Z", - "start_time": "2025-02-27T09:37:48.632535Z" - }, "pycharm": { "name": "#%%\n" + }, + "ExecuteTime": { + "end_time": "2025-05-07T13:08:52.533981Z", + "start_time": "2025-05-07T13:08:47.590128Z" } }, "source": [ - "# import scanpy as sc\n", + "import scanpy as sc\n", "import sys\n", "import pandas as pd\n", "import networkx as nx\n", @@ -48,7 +49,7 @@ "import functions" ], "outputs": [], - "execution_count": 10 + "execution_count": 3 }, { "metadata": { @@ -124,7 +125,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "7c9eb390f9654651" }, { "metadata": { @@ -218,7 +220,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "b327b0c0db43aaee" }, { "metadata": { @@ -250,7 +253,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "1af23243eaf29f9d" }, { "cell_type": "markdown", @@ -266,7 +270,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "fdd1e2ccbd382133" }, { "cell_type": "code", @@ -280,7 +285,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "43637bd25e62f2dd" }, { "cell_type": "code", @@ -294,7 +300,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "c9c7639db9679fd" }, { "cell_type": "code", @@ -308,7 +315,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "9867bba3fbce26b4" }, { "cell_type": "markdown", @@ -321,7 +329,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "1ea50780fc249bf" }, { "cell_type": "code", @@ -335,7 +344,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "bdeb091f50cba282" }, { "cell_type": "code", @@ -372,7 +382,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "9aba2e6e2fcbf462" }, { "cell_type": "code", @@ -399,7 +410,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "5b7875f61f3fa7f7" }, { "cell_type": "code", @@ -422,7 +434,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "8383266313fe69e5" }, { "cell_type": "markdown", @@ -431,6 +444,22 @@ "\n", "Scanpy, when running sc.tl.rank_genes_groups(), automatically prioritises adata.raw if it exists. In this case, we saved the raw array in adata.raw to begin with (which is correct), but Scanpy is using that raw array instead of the normalised, log-transformed .X array.\n", "\n", + "**sc.tl.rank_genes_groups()** performs a differential expression analysis between groups of cells defined in adata_type.obs['disease']. In this case, compares cells labelled as 'Alzheimer disease' against those labelled as 'normal', which is taken as reference group.\n", + "\n", + "**Wilcoxon rank-sum test** is a non-parametric test which finely adapts to scRNA-seq data. This test compares, for each gene, if its expression is significantly different in one group with respect to the other.\n", + "- Null hyphotesis (H₀): the distribution of gene expression is equal in both groups (Alzheimer and normal).\n", + "- Alternative hypothesis (H₁): the distributions are different (the test is bilateral by default).\n", + "\n", + "The test does not assume normality, only that the values are ordinal, which is ideal for scRNA-seq (where the data are usually noisy and with zeros).\n", + "\n", + "**sc.get.rank_genes_groups_df()** extracts the results in DataFrame format, one per case 'Alzheimer disease' vs 'normal'.\n", + "\n", + "Then, a threshold is applied:\n", + "- **abs(logfoldchange) > 0.25** in order for the change to be biologically relevant.\n", + "- **pvals_adj < 0.05** in order for the change to be statistically significant.\n", + "\n", + "This indicates that there is sufficient evidence to reject the null hypothesis that that gene is equally expressed between groups.The test does not assume normality, only that the values are ordinal, which is ideal for scRNA-seq (where the d\n", + "\n", "Files **degs_{cell_type}_total.csv** stores all differentially expressed genes found per each cell type, identified by the ENSEMBL ID." ], "metadata": { @@ -438,7 +467,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "2e30f0c5e3294df0" }, { "cell_type": "code", @@ -593,22 +623,159 @@ " # Extract results of DEGs for \"disease\"\" condition (compared to normal)\n", " degs_disease = sc.get.rank_genes_groups_df(adata_type, group='Alzheimer disease')\n", "\n", + " # All DEGs independetly of the logFC and p-adj\n", + " print(f'{len(degs_disease)} total DEGs found for {type} for Alzheimer disease')\n", + "\n", + " # degs_disease.to_csv(f'../data/complete/degs_{type}.csv', index=False)\n", + "\n", " # Filter by logFC y p-adj\n", " degs_disease_filtered = degs_disease[\n", " (degs_disease['logfoldchanges'].abs() >= 0.25) &\n", " (degs_disease['pvals_adj'] <= 0.05)\n", " ]\n", "\n", - " print(f'{len(degs_disease_filtered)} DEGs found for {type} for Alzheimer disease')\n", + " print(f'{len(degs_disease_filtered)} significant DEGs found for {type} for Alzheimer disease')\n", "\n", - " degs_disease_filtered.to_csv(f'../data/complete/degs_{type}_total.csv', index=False)" + " # degs_disease_filtered.to_csv(f'../data/complete/degs_{type}_total.csv', index=False)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "id": "d632f920ae57bec0" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### 3.1. Fraction expressed\n", + "\n", + "The percentage (or proportion) of cells within a group that express a given gene.\n", + "\n", + "For example, suppose you have a cell type (e.g., astrocytes) with 100 cells. For a specific gene (e.g., GEN1), you look at how many of those 100 cells have an expression greater than 0 for that gene. If GEN1 is expressed (i.e., has a value greater than 0) in 25 of those cells, then: fraction_expressed = 25 / 100 = 0.25." + ], + "id": "92db8d7ff52f3e4b" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "fractions = []\n", + "\n", + "for type, adata_type in tqdm(data_per_type.items(), desc= 'Analyzing cell types...'):\n", + "\n", + " gene_names = adata_type.var_names\n", + "\n", + " # Compute fraction of cells expressing each gene in each group\n", + " fractions_type = {}\n", + "\n", + " fraction = ((adata_type.X > 0).sum(axis=0) / adata_type.shape[0]) * 100\n", + " fraction = np.asarray(fraction).flatten()\n", + "\n", + " # Create dataframe with gene names and fractions\n", + " df = pd.DataFrame({\n", + " 'gene': gene_names,\n", + " 'fraction_expressed': fraction,\n", + " 'cell_type': type\n", + " })\n", + "\n", + " fractions.append(df)\n", + "\n", + "# Convert to DataFrame for easy viewing\n", + "fraction_df = pd.concat(fractions, ignore_index=True)\n", + "\n", + "print(fraction_df)" + ], + "id": "eec92c5171f93ed4" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "## Fraction expressed but instead of in a dataframe, in a matrix form with genes as columns and cell types as rows\n", + "\n", + "genes = adata.var_names\n", + "\n", + "# Esto te da los grupos (por ejemplo, tipos celulares)\n", + "groupby = adata.obs['cell_type'].unique()\n", + "\n", + "# Creamos un diccionario para guardar la fracción expresada\n", + "fraction_expressed = {}\n", + "\n", + "for group in tqdm(groupby):\n", + " # Subconjunto de células de este tipo\n", + " cells = adata[adata.obs['cell_type'] == group]\n", + "\n", + " # Matriz de expresión para los genes de interés (X es la matriz de expresión)\n", + " X = cells[:, genes].X\n", + "\n", + " # Convertimos a denso si es necesario (algunas veces es sparse)\n", + " if not isinstance(X, np.ndarray):\n", + " X = X.toarray()\n", + "\n", + " # Cálculo: cuántas células tienen expresión > 0 para cada gen\n", + " frac = (X > 0).sum(axis=0) / X.shape[0]\n", + "\n", + " fraction_expressed[group] = frac\n", + "\n", + "# Convertimos a DataFrame para visualizar\n", + "fraction_expressed_df = pd.DataFrame(fraction_expressed, index=genes).T\n", + "print(fraction_expressed_df)" + ], + "id": "a75080c359691c8d" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "sns.set_context(\"notebook\")\n", + "sns.set_style(\"whitegrid\")\n", + "\n", + "plt.figure(figsize=(5,4))\n", + "\n", + "flierprops = dict(marker='o', markersize=3, linestyle='none')\n", + "\n", + "sns.boxplot(fraction_df, y = 'fraction_expressed', x = 'cell_type', flierprops=flierprops)\n", + "\n", + "plt.tick_params(axis='both', which='both', bottom=True, left=True)\n", + "\n", + "plt.xlabel(\"\")\n", + "plt.ylabel(\"Fraction of cells for each cell type that express a given gene\", fontsize = 5)\n", + "plt.xticks(rotation = 45, fontsize = 5)\n", + "plt.yticks(fontsize = 5)\n", + "\n", + "sns.despine()\n", + "plt.tight_layout()\n", + "plt.show()" + ], + "id": "3dde199003b423bc" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Percentage of zeros in 'fraction_expressed' per 'cell_type'. Conditioned by the number of DEGs per cell type (less DEGs, more zeros)\n", + "\n", + "percentage_zeros = (\n", + " fraction_df.assign(is_zero=fraction_df['fraction_expressed'] == 0)\n", + " .groupby('cell_type')['is_zero']\n", + " .mean()\n", + " .multiply(100)\n", + ")\n", + "\n", + "print(\"Percentage of zeros per cell_type:\\n\", percentage_zeros)" + ], + "id": "3b0374928af98c82" }, { "cell_type": "markdown", @@ -626,7 +793,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "1a79dc025e43d3" }, { "cell_type": "code", @@ -641,7 +809,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "cdf1a85b269314b5" }, { "cell_type": "code", @@ -675,7 +844,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "71f608e94690e6f1" }, { "cell_type": "code", @@ -699,7 +869,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "320def8ad16af8cb" }, { "cell_type": "code", @@ -858,7 +1029,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "677c54fe169190da" }, { "cell_type": "markdown", @@ -870,7 +1042,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "4b2adc20d87588c9" }, { "cell_type": "code", @@ -889,7 +1062,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "de00e0b46cc4fd78" }, { "cell_type": "code", @@ -904,7 +1078,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "b58d0adb9e49f1df" }, { "cell_type": "code", @@ -929,7 +1104,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "320848226dd257" }, { "cell_type": "markdown", @@ -945,7 +1121,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "b8f5241a08d75a8d" }, { "cell_type": "code", @@ -959,7 +1136,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "e44ba2befa4ea4b2" }, { "cell_type": "code", @@ -1001,7 +1179,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "389ae6f91a195fc2" }, { "cell_type": "code", @@ -1026,7 +1205,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "83c782bed4b5b159" }, { "cell_type": "markdown", @@ -1043,7 +1223,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "dad8b5ac45e9241b" }, { "cell_type": "code", @@ -1059,7 +1240,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "62418fa14954172e" }, { "cell_type": "code", @@ -1073,7 +1255,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "486eff5f3bbfb63f" }, { "cell_type": "code", @@ -1095,7 +1278,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "da4460d1aae38f56" }, { "cell_type": "code", @@ -1109,7 +1293,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "d507efc9b2068b6a" }, { "cell_type": "code", @@ -1135,7 +1320,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "1105abd8ce45459d" }, { "cell_type": "markdown", @@ -1153,7 +1339,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "ba65d0b5e37d00" }, { "cell_type": "code", @@ -1195,7 +1382,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "debe4e5f76e493f1" }, { "cell_type": "markdown", @@ -1222,7 +1410,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "14c6ef51514baa51" }, { "cell_type": "code", @@ -1236,7 +1425,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "36ec09d23b5deac6" }, { "cell_type": "code", @@ -1420,7 +1610,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "26ab0c73b66197fb" }, { "cell_type": "code", @@ -1444,7 +1635,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "fab2e3fc09d91232" }, { "cell_type": "code", @@ -1458,7 +1650,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "aad0d6cb90a2a129" }, { "cell_type": "markdown", @@ -1470,7 +1663,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "73076752d5a07029" }, { "cell_type": "code", @@ -1484,7 +1678,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "24ffb1d66118d53d" }, { "cell_type": "code", @@ -1541,7 +1736,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "9ec6f81c3535120b" }, { "cell_type": "code", @@ -1555,7 +1751,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "756ed2c087c6a80b" }, { "cell_type": "markdown", @@ -1567,7 +1764,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "c00d209c275d105" }, { "cell_type": "code", @@ -1582,7 +1780,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "3650bf9c9f06e457" }, { "cell_type": "code", @@ -1675,7 +1874,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "8ff326052036394c" }, { "cell_type": "markdown", @@ -1687,7 +1887,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "811f2c043899cde8" }, { "cell_type": "code", @@ -1702,7 +1903,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "35b6e0963296b42" }, { "cell_type": "code", @@ -1717,7 +1919,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "cc2ecf08f2268100" }, { "cell_type": "markdown", @@ -1729,7 +1932,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "5161ee29eae2c753" }, { "cell_type": "code", @@ -1745,7 +1949,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "cd0c8bf7020ff320" }, { "cell_type": "code", @@ -1771,7 +1976,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "eb51c1f6f73d00ef" }, { "cell_type": "code", @@ -1820,7 +2026,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "7c9916fcf5f41771" }, { "cell_type": "markdown", @@ -1832,7 +2039,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "fc90b950ca3b48da" }, { "cell_type": "code", @@ -1847,7 +2055,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "2baf164cd35de867" }, { "cell_type": "code", @@ -1869,7 +2078,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "2316a71810b114b0" }, { "cell_type": "code", @@ -1884,7 +2094,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "1ae100391c3b3b2b" }, { "cell_type": "code", @@ -1909,7 +2120,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "79248fec0321c6e6" }, { "cell_type": "markdown", @@ -1921,7 +2133,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "1568a88bb60744d7" }, { "cell_type": "code", @@ -1935,7 +2148,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "ed3f1cc6e47a628b" }, { "cell_type": "code", @@ -2027,7 +2241,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "972930a0f2d2d237" }, { "cell_type": "markdown", @@ -2045,7 +2260,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "fb4bab50846dc8ed" }, { "cell_type": "code", @@ -2061,7 +2277,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "729f9cc8a9cb0da2" }, { "cell_type": "code", @@ -2075,7 +2292,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "ec240b52f8abf701" }, { "cell_type": "code", @@ -2096,7 +2314,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "e0f294118c7c8b50" }, { "cell_type": "code", @@ -2111,7 +2330,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "a14c03fb7c7f181b" }, { "cell_type": "markdown", @@ -2123,7 +2343,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "cebc1f904da014e8" }, { "cell_type": "code", @@ -2156,7 +2377,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "9d2d785a496d0b74" }, { "cell_type": "code", @@ -2171,7 +2393,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "91d4e2e356a3135a" }, { "cell_type": "code", @@ -2192,7 +2415,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "7b0416a26b27e657" }, { "cell_type": "code", @@ -2206,7 +2430,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "d6685ea1ded8af48" }, { "cell_type": "code", @@ -2220,7 +2445,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "1566902d75d5d3c5" }, { "cell_type": "markdown", @@ -2232,7 +2458,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "82770eeeea3b2858" }, { "cell_type": "code", @@ -2246,7 +2473,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "4af0f8f342eb46f1" }, { "cell_type": "code", @@ -2305,7 +2533,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "d98d305963c7d4ee" }, { "cell_type": "markdown", @@ -2317,7 +2546,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "3d0220c29266cc6a" }, { "cell_type": "code", @@ -2334,7 +2564,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "3237a956b8eb2b7c" }, { "cell_type": "code", @@ -2374,7 +2605,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "950b80af953c2896" }, { "cell_type": "code", @@ -2388,7 +2620,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "251c73b3b42201a1" }, { "cell_type": "code", @@ -2458,7 +2691,135 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "9f37530477a84199" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Subset sizes calculation\n", + "subset_sizes = combined_data.groupby(\"dataset\")[\"protein_id\"].nunique()\n", + "\n", + "# Generate color palette\n", + "ordered_datasets = subset_sizes.sort_values(ascending=False).index\n", + "combined_data[\"dataset\"] = pd.Categorical(combined_data[\"dataset\"], categories=ordered_datasets, ordered=True)\n", + "palette = {dataset: '#79C4FF' for dataset in ordered_datasets}\n", + "\n", + "# Assign different color to module\n", + "palette[\"General module for AD\"] = \"#D3D3D3\"\n", + "\n", + "fig, ax = plt.subplots(figsize=(9, 5))\n", + "\n", + "sns.set_context(\"paper\")\n", + "sns.set_style(\"whitegrid\")\n", + "\n", + "sns.boxplot(data=combined_data, x=\"dataset\", y=\"betweenness_centrality\", ax=ax, palette=palette, order=ordered_datasets,dodge=False)\n", + "\n", + "ax.tick_params(axis='both', which='both', bottom=True, left=True)\n", + "\n", + "plt.title('Betweenness centrality distribution for the Alzheimer disease module and each cell type', fontsize=16)\n", + "plt.xlabel('')\n", + "ax.set_xticklabels([f\"{titles[i]}\\nn = {subset_sizes[dataset]}\" for i, dataset in enumerate(ordered_datasets)],\n", + " rotation=45, ha=\"center\", fontsize=12)\n", + "\n", + "plt.yticks(fontsize=12)\n", + "plt.yscale('log')\n", + "plt.ylabel('log(Betweenness centrality)', fontsize=12)\n", + "sns.despine()\n", + "\n", + "plt.tight_layout()\n", + "# plt.savefig('../figures/betweenness_boxplot.pdf', format='pdf', dpi=1200)\n", + "plt.savefig('../figures/betweenness_boxplot.svg', format='svg', dpi=1200)\n", + "plt.show()" + ], + "id": "a75971402585ab7" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Subset sizes calculation\n", + "subset_sizes = combined_data.groupby(\"dataset\")[\"protein_id\"].nunique()\n", + "\n", + "# Generate color palette\n", + "ordered_datasets = subset_sizes.sort_values(ascending=False).index\n", + "combined_data[\"dataset\"] = pd.Categorical(combined_data[\"dataset\"], categories=ordered_datasets, ordered=True)\n", + "palette = {dataset: '#79C4FF' for dataset in ordered_datasets}\n", + "\n", + "# Assign different color to module\n", + "palette[\"General module for AD\"] = \"#D3D3D3\"\n", + "\n", + "fig, ax = plt.subplots(figsize=(9, 5))\n", + "\n", + "sns.set_context(\"paper\")\n", + "sns.set_style(\"whitegrid\")\n", + "\n", + "sns.boxplot(data=combined_data, x=\"dataset\", y=\"closeness_centrality\", ax=ax, palette=palette, order=ordered_datasets,dodge=False)\n", + "\n", + "ax.tick_params(axis='both', which='both', bottom=True, left=True)\n", + "\n", + "plt.title('Closeness centrality distribution for the Alzheimer disease module and each cell type', fontsize=16)\n", + "plt.xlabel('')\n", + "ax.set_xticklabels([f\"{titles[i]}\\nn = {subset_sizes[dataset]}\" for i, dataset in enumerate(ordered_datasets)],\n", + " rotation=45, ha=\"center\", fontsize=12)\n", + "\n", + "plt.yticks(fontsize=12)\n", + "plt.ylabel('Closeness centrality', fontsize=12)\n", + "sns.despine()\n", + "\n", + "plt.tight_layout()\n", + "# plt.savefig('../figures/closeness_boxplot.pdf', format='pdf', dpi=1200)\n", + "plt.savefig('../figures/closeness_boxplot.svg', format='svg', dpi=1200)\n", + "plt.show()" + ], + "id": "b3f8b51fb337d705" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Subset sizes calculation\n", + "subset_sizes = combined_data.groupby(\"dataset\")[\"protein_id\"].nunique()\n", + "\n", + "# Generate color palette\n", + "ordered_datasets = subset_sizes.sort_values(ascending=False).index\n", + "combined_data[\"dataset\"] = pd.Categorical(combined_data[\"dataset\"], categories=ordered_datasets, ordered=True)\n", + "palette = {dataset: '#79C4FF' for dataset in ordered_datasets}\n", + "\n", + "# Assign different color to module\n", + "palette[\"General module for AD\"] = \"#D3D3D3\"\n", + "\n", + "fig, ax = plt.subplots(figsize=(9, 5))\n", + "\n", + "sns.set_context(\"paper\")\n", + "sns.set_style(\"whitegrid\")\n", + "\n", + "sns.boxplot(data=combined_data, x=\"dataset\", y=\"clustering_coefficient\", ax=ax, palette=palette, order=ordered_datasets,dodge=False)\n", + "\n", + "ax.tick_params(axis='both', which='both', bottom=True, left=True)\n", + "\n", + "plt.title('Clustering coefficient distribution for the Alzheimer disease module and each cell type', fontsize=16)\n", + "plt.xlabel('')\n", + "ax.set_xticklabels([f\"{titles[i]}\\nn = {subset_sizes[dataset]}\" for i, dataset in enumerate(ordered_datasets)],\n", + " rotation=45, ha=\"center\", fontsize=12)\n", + "\n", + "plt.yticks(fontsize=12)\n", + "plt.ylabel('Clustering coefficient', fontsize=12)\n", + "sns.despine()\n", + "\n", + "plt.tight_layout()\n", + "# plt.savefig('../figures/clustering_boxplot.pdf', format='pdf', dpi=1200)\n", + "plt.savefig('../figures/clustering_boxplot.svg', format='svg', dpi=1200)\n", + "plt.show()" + ], + "id": "f5044677900aa100" }, { "cell_type": "markdown", @@ -2470,7 +2831,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "e6f887b5b178def1" }, { "cell_type": "code", @@ -2484,7 +2846,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "ab727ee10b040538" }, { "cell_type": "code", @@ -2531,7 +2894,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "6b57bbe54b5b28" }, { "cell_type": "code", @@ -2564,7 +2928,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "2581372c52955a56" }, { "cell_type": "code", @@ -2592,7 +2957,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "e139b54ba0f81255" }, { "cell_type": "code", @@ -2616,7 +2982,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "cdce48893bc12655" }, { "cell_type": "markdown", @@ -2630,7 +2997,8 @@ "pycharm": { "name": "#%% md\n" } - } + }, + "id": "8bd41103417ab828" }, { "cell_type": "code", @@ -2647,7 +3015,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "ff63fb10fa5b4b3b" }, { "cell_type": "code", @@ -2663,7 +3032,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "1023e511329af34a" }, { "cell_type": "code", @@ -2677,7 +3047,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "800b8523df769f3" }, { "cell_type": "code", @@ -2693,7 +3064,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "54f44484637389" }, { "cell_type": "code", @@ -2715,7 +3087,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "f9b8f634f6a63750" }, { "cell_type": "code", @@ -2746,7 +3119,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "153d1bce0152d14c" }, { "cell_type": "code", @@ -2777,7 +3151,8 @@ "pycharm": { "name": "#%%\n" } - } + }, + "id": "7a6486a2755f8775" } ], "metadata": { @@ -2801,4 +3176,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/figures/betweenness_boxplot.svg b/figures/betweenness_boxplot.svg new file mode 100644 index 0000000..36224bc --- /dev/null +++ b/figures/betweenness_boxplot.svg @@ -0,0 +1,3216 @@ + + + + + + + + 2025-05-06T16:22:01.883648 + image/svg+xml + + + Matplotlib v3.9.4, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/figures/closeness_boxplot.svg b/figures/closeness_boxplot.svg new file mode 100644 index 0000000..b246205 --- /dev/null +++ b/figures/closeness_boxplot.svg @@ -0,0 +1,1989 @@ + + + + + + + + 2025-05-06T16:23:51.444955 + image/svg+xml + + + Matplotlib v3.9.4, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/figures/clustering_boxplot.svg b/figures/clustering_boxplot.svg new file mode 100644 index 0000000..a2e5dec --- /dev/null +++ b/figures/clustering_boxplot.svg @@ -0,0 +1,2202 @@ + + + + + + + + 2025-05-06T16:25:06.321864 + image/svg+xml + + + Matplotlib v3.9.4, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 2.24.1