Comparing PRE vs POST heatmaps

ac24b62c · Joaquin Torres · 8616e8a5 · ac24b62c · ac24b62c
Commit ac24b62c authored Apr 22, 2024 by Joaquin Torres
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 17900 additions and 17814 deletions

EDA.ipynb EDA.ipynb +95 -9

EDA_plots/heatmaps_one_hot.svg EDA_plots/heatmaps_one_hot.svg +17805 -17805

No files found.
--- a/EDA.ipynb
+++ b/EDA.ipynb
@@ -72,13 +72,13 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_12292\\2495984927.py:18: SettingWithCopyWarning: \n",
+      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_15848\\2495984927.py:18: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  conj_post['Group'] = 'Post'\n",
-      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_12292\\2495984927.py:19: SettingWithCopyWarning: \n",
+      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_15848\\2495984927.py:19: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
@@ -225,7 +225,7 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_12292\\1073322024.py:3: FutureWarning: The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n",
+      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_15848\\1073322024.py:3: FutureWarning: The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n",
      "  bd['Risk_stigma'] = bd['Risk_stigma'].replace(99.0, mode_stigma)\n"
     ]
    }
@@ -854,7 +854,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -935,12 +935,14 @@
    "        plt.Line2D([0], [0], marker='o', color='w', label='Individual Factors', markerfacecolor='green', markersize=10)\n",
    "    ], bbox_to_anchor=(-0.1, -0.1), fontsize = 20)\n",
    "\n",
-    "    plt.title(\"\\n\\n\" + plot_title, fontdict={'fontsize': 30, 'fontweight': 'bold'})"
+    "    plt.title(\"\\n\\n\" + plot_title, fontdict={'fontsize': 30, 'fontweight': 'bold'})\n",
+    "\n",
+    "    return corr_matrix"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
@@ -957,12 +959,21 @@
   "source": [
    "fig, axs = plt.subplots(3, 3, figsize=(50, 50))\n",
    "plt.subplots_adjust(hspace=0.75, wspace=2)\n",
+    "corr_mats = [] # List of tuples (m1, m2) to store the 3 pairs of matrices to compare (pre vs post)\n",
    "\n",
    "# Go through possible values for 'Situacion_tratamiento' and 'Group'\n",
    "for sit_tto in range(1,4):\n",
-    "    for group in range(1,4):\n",
+    "    # ALL\n",
-    "        plt.subplot(3, 3, 3*(sit_tto-1) + group)  # Calculate the subplot position dynamically\n",
+    "    plt.subplot(3, 3, 3*(sit_tto-1) + 1)  # Calculate the subplot position dynamically\n",
-    "        plot_heatmap(sit_tto, group)\n",
+    "    _ = plot_heatmap(sit_tto, 1)\n",
+    "    # PRE\n",
+    "    plt.subplot(3, 3, 3*(sit_tto-1) + 2) \n",
+    "    corr_matrix_pre = plot_heatmap(sit_tto, 2)\n",
+    "    # POST\n",
+    "    plt.subplot(3, 3, 3*(sit_tto-1) + 3)\n",
+    "    corr_matrix_post = plot_heatmap(sit_tto, 3)\n",
+    "\n",
+    "    corr_mats.append((corr_matrix_pre, corr_matrix_post))\n",
    "        \n",
    "# Adjust layout to prevent overlapping titles\n",
    "plt.tight_layout()\n",
@@ -970,6 +981,81 @@
    "# Save the figure in SVG format in the \"./EDA_plots\" folder\n",
    "plt.savefig('./EDA_plots/heatmaps_one_hot.svg', dpi=550, bbox_inches='tight')"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Finding significative differences between PRE and POST"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def find_diff (sit_tto:int, m_pre, m_post):\n",
+    "    if sit_tto == 1:\n",
+    "        cols = [target_var + '_REDEF'] + corr_cols\n",
+    "    else:\n",
+    "        cols = corr_cols\n",
+    "    # Go through matrices\n",
+    "    for i, var_i in enumerate(cols):\n",
+    "        for j, var_j in enumerate(cols):\n",
+    "            # If difference greater than certain threshold, print variables \n",
+    "            diff = abs(m_pre[i][j] - m_post[i][j])\n",
+    "            if diff > 0.25:\n",
+    "                print(f\"{var_i}--{var_j}: {diff:.2f}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "------SIT_TTO 1: NO FILTERING------\n",
+      "Alucinogenos_DXCIE_REDEF--Ed_Secondary more technical education: 0.55\n",
+      "Alucinogenos_DXCIE_REDEF--JobIn_Stable: 0.64\n",
+      "Alucinogenos_DXCIE_REDEF--SocInc_live alone: 0.53\n",
+      "Frec30_2-3 días‎/semana--Alucinogenos_DXCIE_REDEF: 0.54\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "------SIT_TTO 2: ABANDONO-----\n",
+      "Alucinogenos_DXCIE_REDEF--Ed_Secondary more technical education: 0.51\n",
+      "Alucinogenos_DXCIE_REDEF--JobIn_Stable: 0.58\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "------SIT_TTO 3: ALTA-----\n",
+      "Hous_Unstable--Ed_Secondary Education: 0.62\n",
+      "Opiaceos_DxCIE_REDEF--Hous_Unstable: 0.53\n",
+      "BZD_DxCIE_REDEF--Ed_Tertiary: 0.60\n",
+      "Alucinogenos_DXCIE_REDEF--Ed_Primary education: 0.61\n",
+      "Alucinogenos_DXCIE_REDEF--JobIn_Non-stable: 0.54\n",
+      "Frec30_Desconocido--JobIn_Stable: 0.60\n",
+      "Frec30_Desconocido--JobIn_Unemployed: 0.66\n",
+      "Frec30_No consumio--BZD_DxCIE_REDEF: 0.55\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"------SIT_TTO 1: NO FILTERING------\")\n",
+    "find_diff(1, corr_mats[0][0], corr_mats[0][1])\n",
+    "print(\"\\n\\n\\n\")\n",
+    "print(\"------SIT_TTO 2: ABANDONO-----\")\n",
+    "find_diff(2, corr_mats[1][0], corr_mats[1][1])\n",
+    "print(\"\\n\\n\\n\")\n",
+    "print(\"------SIT_TTO 3: ALTA-----\")\n",
+    "find_diff(3, corr_mats[2][0], corr_mats[2][1])"
+   ]
  }
 ],
 "metadata": {
--- a/EDA_plots/heatmaps_one_hot.svg
+++ b/EDA_plots/heatmaps_one_hot.svg