Commit ac24b62c authored by Joaquin Torres's avatar Joaquin Torres

Comparing PRE vs POST heatmaps

parent 8616e8a5
...@@ -72,13 +72,13 @@ ...@@ -72,13 +72,13 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_12292\\2495984927.py:18: SettingWithCopyWarning: \n", "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_15848\\2495984927.py:18: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n", "A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n", "Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n", "\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" conj_post['Group'] = 'Post'\n", " conj_post['Group'] = 'Post'\n",
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_12292\\2495984927.py:19: SettingWithCopyWarning: \n", "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_15848\\2495984927.py:19: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n", "A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n", "Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n", "\n",
...@@ -225,7 +225,7 @@ ...@@ -225,7 +225,7 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_12292\\1073322024.py:3: FutureWarning: The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n", "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_15848\\1073322024.py:3: FutureWarning: The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n",
" bd['Risk_stigma'] = bd['Risk_stigma'].replace(99.0, mode_stigma)\n" " bd['Risk_stigma'] = bd['Risk_stigma'].replace(99.0, mode_stigma)\n"
] ]
} }
...@@ -854,7 +854,7 @@ ...@@ -854,7 +854,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -935,12 +935,14 @@ ...@@ -935,12 +935,14 @@
" plt.Line2D([0], [0], marker='o', color='w', label='Individual Factors', markerfacecolor='green', markersize=10)\n", " plt.Line2D([0], [0], marker='o', color='w', label='Individual Factors', markerfacecolor='green', markersize=10)\n",
" ], bbox_to_anchor=(-0.1, -0.1), fontsize = 20)\n", " ], bbox_to_anchor=(-0.1, -0.1), fontsize = 20)\n",
"\n", "\n",
" plt.title(\"\\n\\n\" + plot_title, fontdict={'fontsize': 30, 'fontweight': 'bold'})" " plt.title(\"\\n\\n\" + plot_title, fontdict={'fontsize': 30, 'fontweight': 'bold'})\n",
"\n",
" return corr_matrix"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
...@@ -957,12 +959,21 @@ ...@@ -957,12 +959,21 @@
"source": [ "source": [
"fig, axs = plt.subplots(3, 3, figsize=(50, 50))\n", "fig, axs = plt.subplots(3, 3, figsize=(50, 50))\n",
"plt.subplots_adjust(hspace=0.75, wspace=2)\n", "plt.subplots_adjust(hspace=0.75, wspace=2)\n",
"corr_mats = [] # List of tuples (m1, m2) to store the 3 pairs of matrices to compare (pre vs post)\n",
"\n", "\n",
"# Go through possible values for 'Situacion_tratamiento' and 'Group'\n", "# Go through possible values for 'Situacion_tratamiento' and 'Group'\n",
"for sit_tto in range(1,4):\n", "for sit_tto in range(1,4):\n",
" for group in range(1,4):\n", " # ALL\n",
" plt.subplot(3, 3, 3*(sit_tto-1) + group) # Calculate the subplot position dynamically\n", " plt.subplot(3, 3, 3*(sit_tto-1) + 1) # Calculate the subplot position dynamically\n",
" plot_heatmap(sit_tto, group)\n", " _ = plot_heatmap(sit_tto, 1)\n",
" # PRE\n",
" plt.subplot(3, 3, 3*(sit_tto-1) + 2) \n",
" corr_matrix_pre = plot_heatmap(sit_tto, 2)\n",
" # POST\n",
" plt.subplot(3, 3, 3*(sit_tto-1) + 3)\n",
" corr_matrix_post = plot_heatmap(sit_tto, 3)\n",
"\n",
" corr_mats.append((corr_matrix_pre, corr_matrix_post))\n",
" \n", " \n",
"# Adjust layout to prevent overlapping titles\n", "# Adjust layout to prevent overlapping titles\n",
"plt.tight_layout()\n", "plt.tight_layout()\n",
...@@ -970,6 +981,81 @@ ...@@ -970,6 +981,81 @@
"# Save the figure in SVG format in the \"./EDA_plots\" folder\n", "# Save the figure in SVG format in the \"./EDA_plots\" folder\n",
"plt.savefig('./EDA_plots/heatmaps_one_hot.svg', dpi=550, bbox_inches='tight')" "plt.savefig('./EDA_plots/heatmaps_one_hot.svg', dpi=550, bbox_inches='tight')"
] ]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Finding significative differences between PRE and POST"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"def find_diff (sit_tto:int, m_pre, m_post):\n",
" if sit_tto == 1:\n",
" cols = [target_var + '_REDEF'] + corr_cols\n",
" else:\n",
" cols = corr_cols\n",
" # Go through matrices\n",
" for i, var_i in enumerate(cols):\n",
" for j, var_j in enumerate(cols):\n",
" # If difference greater than certain threshold, print variables \n",
" diff = abs(m_pre[i][j] - m_post[i][j])\n",
" if diff > 0.25:\n",
" print(f\"{var_i}--{var_j}: {diff:.2f}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"------SIT_TTO 1: NO FILTERING------\n",
"Alucinogenos_DXCIE_REDEF--Ed_Secondary more technical education: 0.55\n",
"Alucinogenos_DXCIE_REDEF--JobIn_Stable: 0.64\n",
"Alucinogenos_DXCIE_REDEF--SocInc_live alone: 0.53\n",
"Frec30_2-3 días‎/semana--Alucinogenos_DXCIE_REDEF: 0.54\n",
"\n",
"\n",
"\n",
"\n",
"------SIT_TTO 2: ABANDONO-----\n",
"Alucinogenos_DXCIE_REDEF--Ed_Secondary more technical education: 0.51\n",
"Alucinogenos_DXCIE_REDEF--JobIn_Stable: 0.58\n",
"\n",
"\n",
"\n",
"\n",
"------SIT_TTO 3: ALTA-----\n",
"Hous_Unstable--Ed_Secondary Education: 0.62\n",
"Opiaceos_DxCIE_REDEF--Hous_Unstable: 0.53\n",
"BZD_DxCIE_REDEF--Ed_Tertiary: 0.60\n",
"Alucinogenos_DXCIE_REDEF--Ed_Primary education: 0.61\n",
"Alucinogenos_DXCIE_REDEF--JobIn_Non-stable: 0.54\n",
"Frec30_Desconocido--JobIn_Stable: 0.60\n",
"Frec30_Desconocido--JobIn_Unemployed: 0.66\n",
"Frec30_No consumio--BZD_DxCIE_REDEF: 0.55\n"
]
}
],
"source": [
"print(\"------SIT_TTO 1: NO FILTERING------\")\n",
"find_diff(1, corr_mats[0][0], corr_mats[0][1])\n",
"print(\"\\n\\n\\n\")\n",
"print(\"------SIT_TTO 2: ABANDONO-----\")\n",
"find_diff(2, corr_mats[1][0], corr_mats[1][1])\n",
"print(\"\\n\\n\\n\")\n",
"print(\"------SIT_TTO 3: ALTA-----\")\n",
"find_diff(3, corr_mats[2][0], corr_mats[2][1])"
]
} }
], ],
"metadata": { "metadata": {
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment