EDA.ipynb 45.6 KB
Newer Older
Joaquin Torres's avatar
Joaquin Torres committed
1 2 3 4 5 6
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
7
    "**Exploratory Data Analysis** \\\n",
Joaquin Torres's avatar
Joaquin Torres committed
8
    "_Author: Joaquín Torres Bravo_"
Joaquin Torres's avatar
Joaquin Torres committed
9 10 11 12 13 14
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
15
    "### Libraries"
Joaquin Torres's avatar
Joaquin Torres committed
16 17 18 19
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
20
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
21 22 23 24 25 26
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
27
    "import numpy as np\n",
Joaquin Torres's avatar
Joaquin Torres committed
28 29
    "from pypair.association import binary_binary, continuous_continuous, binary_continuous # Correlations\n",
    "# Feature Importance\n",
Joaquin Torres's avatar
Joaquin Torres committed
30 31 32 33
    "from sklearn.feature_selection import VarianceThreshold\n",
    "from sklearn.feature_selection import SelectKBest\n",
    "from sklearn.feature_selection import f_classif\n",
    "from sklearn.feature_selection import mutual_info_classif"
Joaquin Torres's avatar
Joaquin Torres committed
34 35 36 37 38 39
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
40
    "### First Steps"
Joaquin Torres's avatar
Joaquin Torres committed
41 42 43 44
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
45
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
46 47 48
   "metadata": {},
   "outputs": [],
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
49
    "bd_all = pd.read_spss('./input/data.sav')\n",
Joaquin Torres's avatar
Joaquin Torres committed
50 51 52 53
    "\n",
    "# Filter the dataset to work only with alcohol patients\n",
    "bd = bd_all[bd_all['Alcohol_DxCIE'] == 'Sí']\n",
    "\n",
Joaquin Torres's avatar
Joaquin Torres committed
54
    "# Filter the dataset to work only with 'Abandono' or 'Alta' patients\n",
Joaquin Torres's avatar
Joaquin Torres committed
55 56 57 58 59
    "bd = bd[(bd['Situacion_tratamiento'] == 'Abandono') | (bd['Situacion_tratamiento'] == 'Alta terapéutica')]"
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
60
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
61
   "metadata": {},
Joaquin Torres's avatar
Joaquin Torres committed
62
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
63
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
64
    "# Pre-pandemic group\n",
Joaquin Torres's avatar
Joaquin Torres committed
65 66 67 68 69 70
    "conj_pre = bd[bd['Pandemia_inicio_fin_tratamiento'] == 'Inicio y fin prepandemia']\n",
    "# Pre-pandemic abandono\n",
    "pre_abandono = conj_pre[conj_pre['Situacion_tratamiento'] == 'Abandono']\n",
    "# Pre-pandemic alta\n",
    "pre_alta = conj_pre[conj_pre['Situacion_tratamiento'] == 'Alta terapéutica']\n",
    "\n",
Joaquin Torres's avatar
Joaquin Torres committed
71 72
    "# Post-pandemic group\n",
    "# Merging last two classes\n",
Joaquin Torres's avatar
Joaquin Torres committed
73 74 75 76 77 78 79 80 81 82 83 84 85
    "conj_post = bd[(bd['Pandemia_inicio_fin_tratamiento'] == 'Inicio prepandemia y fin en pandemia') | \n",
    "               (bd['Pandemia_inicio_fin_tratamiento'] == 'inicio y fin en pandemia')]\n",
    "# Post-pandemic abandono\n",
    "post_abandono = conj_post[conj_post['Situacion_tratamiento'] == 'Abandono']\n",
    "# Post-pandemic alta\n",
    "post_alta = conj_post[conj_post['Situacion_tratamiento'] == 'Alta terapéutica']\n",
    "\n",
    "# Concatenate the two data frames and add a new column to distinguish between them. Useful for plots\n",
    "conj_post['Group'] = 'Post'\n",
    "conj_pre['Group'] = 'Pre'\n",
    "combined_pre_post = pd.concat([conj_post, conj_pre])"
   ]
  },
86 87
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
88
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
89
   "metadata": {},
Joaquin Torres's avatar
Joaquin Torres committed
90
   "outputs": [],
91 92 93 94 95 96 97 98 99 100 101
   "source": [
    "# Printing size of different datasets\n",
    "print(f\"PRE: {len(conj_pre)}\")\n",
    "print(f\"\\tALTA: {len(pre_alta)}\")\n",
    "print(f\"\\tABANDONO: {len(pre_abandono)}\")\n",
    "\n",
    "print(f\"POST: {len(conj_post)}\")\n",
    "print(f\"\\tALTA: {len(post_alta)}\")\n",
    "print(f\"\\tABANDONO: {len(post_abandono)}\")"
   ]
  },
Joaquin Torres's avatar
Joaquin Torres committed
102
  {
103
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
104
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
105
   "metadata": {},
Joaquin Torres's avatar
Joaquin Torres committed
106
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
   "source": [
    "print(\"PRE\")\n",
    "print(conj_pre.info())\n",
    "print (\"-------------------------------\")\n",
    "print(\"PRE-ABANDONO\")\n",
    "print(pre_abandono.info())\n",
    "print (\"-------------------------------\")\n",
    "print(\"PRE-ALTA\")\n",
    "print(pre_alta.info())\n",
    "print (\"-------------------------------\")\n",
    "\n",
    "print(\"\\n\\n\\n\")\n",
    "\n",
    "print (\"POST\")\n",
    "print(conj_post.info())\n",
    "print (\"-------------------------------\")\n",
    "print(\"POST-ABANDONO\")\n",
    "print(post_abandono.info())\n",
    "print (\"-------------------------------\")\n",
    "print(\"POST-ALTA\")\n",
    "print(post_alta.info())\n",
    "print (\"-------------------------------\")"
   ]
  },
  {
   "cell_type": "markdown",
Joaquin Torres's avatar
Joaquin Torres committed
133 134
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
135
    "### Missing and Unknown Values"
136 137 138 139
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
140
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
141
   "metadata": {},
Joaquin Torres's avatar
Joaquin Torres committed
142
   "outputs": [],
143
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
144 145
    "# 9.0 represents unknown according to Variables.docx \n",
    "print(bd['Social_inclusion'].unique())\n",
Joaquin Torres's avatar
Joaquin Torres committed
146
    "# Obtain mode for this feature\n",
Joaquin Torres's avatar
Joaquin Torres committed
147
    "mode_soc_inc = bd['Social_inclusion'].mode()[0]\n",
Joaquin Torres's avatar
Joaquin Torres committed
148
    "# Replace unknown value by the mode\n",
Joaquin Torres's avatar
Joaquin Torres committed
149
    "bd['Social_inclusion'] = bd['Social_inclusion'].replace('9.0', mode_soc_inc)\n",
Joaquin Torres's avatar
Joaquin Torres committed
150 151
    "print(bd['Social_inclusion'].unique())\n",
    "\n",
Joaquin Torres's avatar
Joaquin Torres committed
152 153 154
    "print(bd['Alterations_early_childhood_develop'].unique())\n",
    "mode_alt = bd['Alterations_early_childhood_develop'].mode()[0]\n",
    "bd['Alterations_early_childhood_develop'] = bd['Alterations_early_childhood_develop'].replace('9', mode_alt)\n",
Joaquin Torres's avatar
Joaquin Torres committed
155 156
    "print(bd['Alterations_early_childhood_develop'].unique())\n",
    "\n",
157
    "print(bd['Risk_stigma'].unique())\n",
Joaquin Torres's avatar
Joaquin Torres committed
158 159
    "mode_stigma = bd['Risk_stigma'].mode()[0]\n",
    "bd['Risk_stigma'] = bd['Risk_stigma'].replace(99.0, mode_stigma)\n",
Joaquin Torres's avatar
Joaquin Torres committed
160 161
    "print(bd['Risk_stigma'].unique())\n",
    "\n",
162
    "print(bd['NumHijos'].unique())\n",
Joaquin Torres's avatar
Joaquin Torres committed
163 164
    "mode_hijos = bd['NumHijos'].mode()[0]\n",
    "bd['NumHijos'] = bd['NumHijos'].replace(99.0, mode_hijos)\n",
165 166 167
    "print(bd['NumHijos'].unique())"
   ]
  },
Joaquin Torres's avatar
Joaquin Torres committed
168 169
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
170
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
171
   "metadata": {},
Joaquin Torres's avatar
Joaquin Torres committed
172
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
173 174 175
   "source": [
    "print(f\"Total missing values Age: {bd['Age'].isnull().sum()}\")\n",
    "print(f\"Total missing values Años_consumo_droga: {bd['Años_consumo_droga'].isnull().sum()}\")\n",
176 177
    "print(f\"Total missing values Risk_stigma: {bd['Risk_stigma'].isnull().sum()}\")\n",
    "print(f\"Total missing values NumHijos: {bd['NumHijos'].isnull().sum()}\")\n",
Joaquin Torres's avatar
Joaquin Torres committed
178 179 180 181
    "\n",
    "print(\"\\tCONJUNTO PREPANDEMIA\")\n",
    "print(f\"\\t\\tMissing values Age: {conj_pre['Age'].isnull().sum()}\")\n",
    "print(f\"\\t\\tMissing values Años_consumo_droga: {conj_pre['Años_consumo_droga'].isnull().sum()}\")\n",
182 183
    "print(f\"\\t\\tMissing values Risk_stigma: {conj_pre['Risk_stigma'].isnull().sum()}\")\n",
    "print(f\"\\t\\tMissing values NumHijos: {conj_pre['NumHijos'].isnull().sum()}\")\n",
Joaquin Torres's avatar
Joaquin Torres committed
184 185 186
    "\n",
    "print(\"\\tCONJUNTO POSTPANDEMIA\")\n",
    "print(f\"\\t\\tMissing values Age: {conj_post['Age'].isnull().sum()}\")\n",
187 188 189
    "print(f\"\\t\\tMissing values Años_consumo_droga: {conj_post['Años_consumo_droga'].isnull().sum()}\")\n",
    "print(f\"\\t\\tMissing values Risk_stigma: {conj_post['Risk_stigma'].isnull().sum()}\")\n",
    "print(f\"\\t\\tMissing values NumHijos: {conj_post['NumHijos'].isnull().sum()}\")"
Joaquin Torres's avatar
Joaquin Torres committed
190 191
   ]
  },
Joaquin Torres's avatar
Joaquin Torres committed
192 193
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
194
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
195
   "metadata": {},
Joaquin Torres's avatar
Joaquin Torres committed
196
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
197
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
198
    "# Replace NaN values by the mode\n",
Joaquin Torres's avatar
Joaquin Torres committed
199 200 201 202 203 204 205 206 207 208 209 210 211
    "age_mode = bd['Age'].mode()[0]\n",
    "bd['Age'].fillna(age_mode, inplace=True)\n",
    "\n",
    "años_consumo_mode = bd['Años_consumo_droga'].mode()[0]\n",
    "bd['Años_consumo_droga'].fillna(años_consumo_mode, inplace=True)\n",
    "\n",
    "risk_stigma_mode = bd['Risk_stigma'].mode()[0]\n",
    "bd['Risk_stigma'].fillna(risk_stigma_mode, inplace=True)\n",
    "\n",
    "num_hijos_mode = bd['NumHijos'].mode()[0]\n",
    "bd['NumHijos'].fillna(num_hijos_mode, inplace=True)"
   ]
  },
Joaquin Torres's avatar
Joaquin Torres committed
212 213 214 215
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
216
    "### Distribution of Variables"
Joaquin Torres's avatar
Joaquin Torres committed
217 218 219 220
   ]
  },
  {
   "cell_type": "code",
221
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
222 223 224
   "metadata": {},
   "outputs": [],
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
225
    "# Discrete attributes\n",
Joaquin Torres's avatar
Joaquin Torres committed
226 227
    "disc_atts = ['Education', 'Social_protection', 'Job_insecurity', 'Housing',\n",
    "        'Alterations_early_childhood_develop', 'Social_inclusion',\n",
228 229 230 231
    "        'Risk_stigma', 'Sex', 'NumHijos', 'Smoking', 'Biological_vulnerability',\n",
    "        'Opiaceos_DxCIE', 'Cannabis_DXCIE', 'BZD_DxCIE', 'Cocaina_DxCIE',\n",
    "        'Alucinogenos_DXCIE', 'Tabaco_DXCIE', 'FrecuenciaConsumo30Dias',\n",
    "        'OtrosDx_Psiquiatrico', 'Tx_previos', 'Readmisiones_estudios', 'Nreadmision'\n",
Joaquin Torres's avatar
Joaquin Torres committed
232 233
    "        ]\n",
    "\n",
Joaquin Torres's avatar
Joaquin Torres committed
234
    "# Numerical attributes\n",
Joaquin Torres's avatar
Joaquin Torres committed
235 236 237 238 239 240 241
    "num_atts = ['Structural_conflic', 'Adherencia_tto_recalc', 'Age', 'Años_consumo_droga', 'Tiempo_tx']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
242
    "#### Discrete"
Joaquin Torres's avatar
Joaquin Torres committed
243 244 245 246 247 248
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
249
    "##### Countplots"
Joaquin Torres's avatar
Joaquin Torres committed
250 251 252 253
   ]
  },
  {
   "cell_type": "code",
254
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
255
   "metadata": {},
256
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
257 258 259 260
   "source": [
    "fig, axs = plt.subplots(len(disc_atts), 1, figsize=(15, 5*len(disc_atts)))\n",
    "plt.subplots_adjust(hspace=0.75, wspace=1.25)\n",
    "\n",
Joaquin Torres's avatar
Joaquin Torres committed
261
    "# Generate countplot for each attribute\n",
Joaquin Torres's avatar
Joaquin Torres committed
262
    "for i, disc_att in enumerate(disc_atts):\n",
Joaquin Torres's avatar
Joaquin Torres committed
263
    "    # For each possible value of the attribute, consider the PRE-POST and ALTA-ABANDONO combinations in the same subplot\n",
Joaquin Torres's avatar
Joaquin Torres committed
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
    "    ax = sns.countplot(x=disc_att, data=combined_pre_post, hue=combined_pre_post[['Situacion_tratamiento', 'Group']].apply(tuple, axis=1),\n",
    "                       hue_order=[('Abandono', 'Pre'),('Alta terapéutica', 'Pre'), ('Abandono', 'Post'), ('Alta terapéutica', 'Post')],\n",
    "                       ax=axs[i])\n",
    "    ax.set_title(disc_att, fontsize=16, fontweight='bold')\n",
    "    ax.get_legend().set_title(\"Groups\")\n",
    "    \n",
    "    # Adding count annotations\n",
    "    for p in ax.patches:\n",
    "        if p.get_label() == '_nolegend_':\n",
    "            ax.annotate(format(p.get_height(), '.0f'), \n",
    "                        (p.get_x() + p.get_width() / 2., p.get_height()), \n",
    "                        ha = 'center', va = 'center', \n",
    "                        xytext = (0, 9), \n",
    "                        textcoords = 'offset points')\n",
    "\n",
    "plt.tight_layout()\n",
Joaquin Torres's avatar
Joaquin Torres committed
280
    "plt.savefig('./results/plots/distributions/countplots.svg', dpi=600, bbox_inches='tight')"
Joaquin Torres's avatar
Joaquin Torres committed
281 282 283 284 285 286
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
287
    "##### Normalized Countplots"
Joaquin Torres's avatar
Joaquin Torres committed
288 289 290 291
   ]
  },
  {
   "cell_type": "code",
292
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
293 294 295
   "metadata": {},
   "outputs": [],
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
296
    "# Function to plot normalized countplot \n",
Joaquin Torres's avatar
Joaquin Torres committed
297 298 299 300
    "def plot_count_perc_norm(i: int, group:int, disc_att:str) -> None:\n",
    "    \"\"\"\n",
    "        group: 1 (all), 2 (pre), 3 (post) \n",
    "    \"\"\"\n",
Joaquin Torres's avatar
Joaquin Torres committed
301
    "    \n",
Joaquin Torres's avatar
Joaquin Torres committed
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
    "    # Define data to work with based on group\n",
    "    if group == 1:\n",
    "        df = bd \n",
    "    elif group == 2:\n",
    "        df = conj_pre\n",
    "    elif group == 3:\n",
    "        df = conj_post\n",
    "\n",
    "    # GOAL: find percentage of each possible category within the total of its situacion_tto subset\n",
    "    # Group data by 'Situacion_tratamiento' and 'Education' and count occurrences\n",
    "    grouped_counts = df.groupby(['Situacion_tratamiento', disc_att]).size().reset_index(name='count')\n",
    "    # Calculate total count for each 'Situacion_tratamiento' group\n",
    "    total_counts = df.groupby('Situacion_tratamiento')[disc_att].count()\n",
    "    # Divide each count by its corresponding total count and calculate percentage\n",
    "    grouped_counts['percentage'] = grouped_counts.apply(lambda row: row['count'] / total_counts[row['Situacion_tratamiento']] * 100, axis=1)\n",
    "    \n",
    "    # Follow the same order in plot as in computations\n",
    "    col_order = grouped_counts[grouped_counts['Situacion_tratamiento'] == 'Abandono'][disc_att].tolist()\n",
    "\n",
    "    # Create countplot and split each bar into two based on the value of sit_tto\n",
    "    ax = sns.countplot(x=disc_att, hue='Situacion_tratamiento', data=df, order=col_order, ax=axs[i, group-2])\n",
    "\n",
    "    # Adjust y-axis to represent percentages out of the total count\n",
    "    ax.set_ylim(0, 100)\n",
    "\n",
    "    percentages = grouped_counts['percentage']\n",
    "    for i, p in enumerate(ax.patches):\n",
    "        # Skip going over the legend values\n",
    "        if p.get_label() == \"_nolegend_\":\n",
    "            # Set height to corresponding percentage and annotate result\n",
    "            height = percentages[i]\n",
    "            p.set_height(height)\n",
    "            ax.annotate(f'{height:.2f}%', (p.get_x() + p.get_width() / 2., height),\n",
    "                        ha='center', va='bottom', fontsize=6, color='black', xytext=(0, 5),\n",
    "                        textcoords='offset points')"
   ]
  },
  {
   "cell_type": "code",
341
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
342
   "metadata": {},
343
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
   "source": [
    "fig, axs = plt.subplots(len(disc_atts), 2, figsize=(15, 7*len(disc_atts)))\n",
    "plt.subplots_adjust(hspace=0.75, wspace=1.5)\n",
    "\n",
    "for i, disc_att in enumerate(disc_atts):\n",
    "    # # 1: ALL    \n",
    "    # plot_count_perc_norm(i, 1, disc_att)\n",
    "    # axs[i, 0].set_title(\"\\nALL\")\n",
    "    # axs[i, 0].set_xlabel(disc_att, fontweight='bold')\n",
    "    # axs[i, 0].set_ylabel(\"% of total within its Sit_tto group\")\n",
    "    # axs[i, 0].tick_params(axis='x', rotation=90)\n",
    "    \n",
    "    # 2: PRE\n",
    "    plot_count_perc_norm(i, 2, disc_att)\n",
    "    axs[i, 0].set_title(\"\\nPRE\")\n",
    "    axs[i, 0].set_xlabel(disc_att, fontweight='bold')\n",
    "    axs[i, 0].set_ylabel(\"% of total within its Sit_tto group\")\n",
    "    axs[i, 0].tick_params(axis='x', rotation=90)\n",
    "\n",
    "    # 3: POST\n",
    "    plot_count_perc_norm(i, 3, disc_att)\n",
    "    axs[i, 1].set_title(\"\\nPOST\")\n",
    "    axs[i, 1].set_xlabel(disc_att, fontweight='bold')\n",
    "    axs[i, 1].set_ylabel(\"% of total within its Sit_tto group\")\n",
    "    axs[i, 1].tick_params(axis='x', rotation=90)\n",
    "\n",
    "plt.tight_layout()\n",
Joaquin Torres's avatar
Joaquin Torres committed
371
    "plt.savefig('./results/plots/distributions/norm_countplots.svg', dpi=600, bbox_inches='tight')"
Joaquin Torres's avatar
Joaquin Torres committed
372 373 374 375 376 377
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
378
    "#### Numerical"
Joaquin Torres's avatar
Joaquin Torres committed
379 380 381 382 383 384
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
385
    "##### Summary Stats"
Joaquin Torres's avatar
Joaquin Torres committed
386 387 388 389
   ]
  },
  {
   "cell_type": "code",
390
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
391
   "metadata": {},
392
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
393 394 395 396 397 398 399 400
   "source": [
    "print(bd[num_atts].describe())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
401
    "##### Boxplots"
Joaquin Torres's avatar
Joaquin Torres committed
402 403 404 405
   ]
  },
  {
   "cell_type": "code",
406
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
407
   "metadata": {},
408
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
409 410 411 412 413 414 415 416
   "source": [
    "fig, axs = plt.subplots(len(num_atts), 1, figsize=(12, 5*len(num_atts)))\n",
    "plt.subplots_adjust(hspace=0.75, wspace=1.5)\n",
    "\n",
    "for i, num_att in enumerate(num_atts):\n",
    "    plt.subplot(len(num_atts), 1, i+1)\n",
    "    sns.boxplot(\n",
    "        data=combined_pre_post,\n",
Joaquin Torres's avatar
Joaquin Torres committed
417 418 419
    "        x = num_att, # attribute value in the x axis\n",
    "        y = 'Group', # pre and post in y axis\n",
    "        hue='Situacion_tratamiento', # side by side abandono vs alta\n",
Joaquin Torres's avatar
Joaquin Torres committed
420 421 422
    "    )\n",
    "\n",
    "plt.tight_layout()\n",
Joaquin Torres's avatar
Joaquin Torres committed
423
    "plt.savefig('./results/plots/distributions/boxplots.svg', dpi=600, bbox_inches='tight')"
Joaquin Torres's avatar
Joaquin Torres committed
424 425 426 427 428 429
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
430
    "##### Histograms"
Joaquin Torres's avatar
Joaquin Torres committed
431 432 433 434
   ]
  },
  {
   "cell_type": "code",
435
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
436
   "metadata": {},
437
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
   "source": [
    "fig, axs = plt.subplots(len(num_atts), 3, figsize=(15, 6*len(num_atts)))\n",
    "plt.subplots_adjust(hspace=0.75, wspace=1.5)\n",
    "\n",
    "for i, num_att in enumerate(num_atts):\n",
    "\n",
    "    # 1: All alcohol patients\n",
    "    sns.histplot(data=bd,x=num_att,bins=15, hue='Situacion_tratamiento', stat='probability', common_norm=False, kde=True,\n",
    "                 line_kws={'lw': 5}, alpha = 0.4, ax=axs[i, 0])\n",
    "    axs[i, 0].set_title(f\"\\nDistr. of {num_att}  - ALL\")\n",
    "\n",
    "    # 2: PRE\n",
    "    sns.histplot(data=conj_pre,x=num_att,bins=15, hue='Situacion_tratamiento', stat='probability', common_norm=False, kde=True, \n",
    "                 line_kws={'lw': 5}, alpha = 0.4, ax=axs[i, 1])\n",
    "    axs[i, 1].set_title(f\"\\nDistr. of {num_att}  - PRE\")\n",
    "\n",
    "    # Subplot 3: POST\n",
    "    sns.histplot(data=conj_post,x=num_att,bins=15, hue='Situacion_tratamiento', stat='probability', common_norm=False, kde=True, \n",
    "                 line_kws={'lw': 5}, alpha = 0.4, ax=axs[i, 2])\n",
    "    axs[i, 2].set_title(f\"\\nDistr. of {num_att}  - POST\")\n",
    "\n",
    "plt.tight_layout()\n",
Joaquin Torres's avatar
Joaquin Torres committed
460
    "plt.savefig('./results/plots/distributions/histograms.svg', dpi=600, bbox_inches='tight')"
Joaquin Torres's avatar
Joaquin Torres committed
461 462 463 464 465 466
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
467
    "### Correlation Analysis"
Joaquin Torres's avatar
Joaquin Torres committed
468 469 470 471 472 473
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
474
    "#### Groups of Variables"
Joaquin Torres's avatar
Joaquin Torres committed
475 476 477 478
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
479
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
480 481 482 483 484 485 486 487 488 489
   "metadata": {},
   "outputs": [],
   "source": [
    "social_vars = ['Education', 'Social_protection', 'Job_insecurity', 'Housing', 'Alterations_early_childhood_develop', \n",
    "            'Social_inclusion', 'Risk_stigma', 'Structural_conflic']\n",
    "ind_vars = ['Age', 'Sex', 'NumHijos', 'Smoking', 'Biological_vulnerability', 'Opiaceos_DxCIE', \n",
    "            'Cannabis_DXCIE', 'BZD_DxCIE', 'Cocaina_DxCIE', 'Alucinogenos_DXCIE', 'Tabaco_DXCIE', \n",
    "            'FrecuenciaConsumo30Dias', 'Años_consumo_droga','OtrosDx_Psiquiatrico', 'Tx_previos', 'Adherencia_tto_recalc'] \n",
    "target_var = 'Situacion_tratamiento'\n",
    "\n",
Joaquin Torres's avatar
Joaquin Torres committed
490
    "# Columns that are already numeric and we do not need to redefine \n",
Joaquin Torres's avatar
Joaquin Torres committed
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
    "no_redef_cols = ['Structural_conflic', 'Age', 'NumHijos', 'Años_consumo_droga', 'Adherencia_tto_recalc']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### One-hot Encoding"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Binary"
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
510
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
511 512 513
   "metadata": {},
   "outputs": [],
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
    "# --------------------------------------------------------------------------\n",
    "\n",
    "# 'Alterations_early_childhood_develop'\n",
    "alterations_mapping = {\n",
    "    'No alterations (first exposure at 11 or more years)' : 0,\n",
    "    'Alterations (first exposure before 11 years old)': 1,\n",
    "}\n",
    "\n",
    "bd['Alterations_early_childhood_develop_REDEF'] = bd['Alterations_early_childhood_develop'].map(alterations_mapping)\n",
    "\n",
    "# --------------------------------------------------------------------------\n",
    "\n",
    "# Social protection\n",
    "bd['Social_protection_REDEF'] = bd['Social_protection'].map({'No':0, 'Sí':1})\n",
    "\n",
    "# --------------------------------------------------------------------------\n",
    "\n",
    "# 'Risk_stigma'\n",
    "bd['Risk_stigma_REDEF'] = bd['Risk_stigma'].map({'No':0, 'Yes':1})\n",
    "\n",
Joaquin Torres's avatar
Joaquin Torres committed
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
    "# --------------------------------------------------------------------------\n",
    "\n",
    "# 'Sex'\n",
    "bd['Sex_REDEF'] = bd['Sex'].map({'Hombre':0, 'Mujer':1})\n",
    "\n",
    "# --------------------------------------------------------------------------\n",
    "\n",
    "# 'Smoking'\n",
    "bd['Smoking_REDEF'] = bd['Smoking'].map({'No':0, 'Sí':1})\n",
    "\n",
    "# --------------------------------------------------------------------------\n",
    "\n",
    "# 'Biological_vulnerability'\n",
    "bd['Biological_vulnerability_REDEF'] = bd['Biological_vulnerability'].map({'No':0, 'Sí':1})\n",
    "\n",
    "# --------------------------------------------------------------------------\n",
    "\n",
    "# 'Droga_DxCIE'\n",
    "bd['Opiaceos_DxCIE_REDEF'] = bd['Opiaceos_DxCIE'].map({'No': 0, 'Sí': 1})\n",
    "bd['Cannabis_DXCIE_REDEF'] = bd['Cannabis_DXCIE'].map({'No': 0, 'Sí': 1})\n",
    "bd['BZD_DxCIE_REDEF'] = bd['BZD_DxCIE'].map({'No': 0, 'Sí': 1})\n",
    "bd['Cocaina_DxCIE_REDEF'] = bd['Cocaina_DxCIE'].map({'No': 0, 'Sí': 1})\n",
    "bd['Alucinogenos_DXCIE_REDEF'] = bd['Alucinogenos_DXCIE'].map({'No': 0, 'Sí': 1})\n",
    "bd['Tabaco_DXCIE_REDEF'] = bd['Tabaco_DXCIE'].map({'No': 0, 'Sí': 1})\n",
    "\n",
    "# --------------------------------------------------------------------------\n",
    "\n",
    "# 'OtrosDx_Psiquiatrico'\n",
    "bd['OtrosDx_Psiquiatrico_REDEF'] = bd['OtrosDx_Psiquiatrico'].map({'No':0, 'Sí':1})\n",
    "\n",
    "# --------------------------------------------------------------------------\n",
    "\n",
    "# 'Tx_previos'\n",
    "bd['Tx_previos_REDEF'] = bd['Tx_previos'].map({'No':0, 'Sí':1})\n",
    "\n",
    "# --------------------------------------------------------------------------\n",
    "\n",
571 572 573
    "# 'Situacion_tratamiento (!!!!!)\n",
    "# Important to define properly\n",
    "bd['Situacion_tratamiento_REDEF'] = bd['Situacion_tratamiento'].map({'Abandono':1, 'Alta terapéutica':0})\n",
Joaquin Torres's avatar
Joaquin Torres committed
574 575 576 577 578 579 580 581
    "\n",
    "# --------------------------------------------------------------------------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
582
    "##### Categorical"
Joaquin Torres's avatar
Joaquin Torres committed
583 584 585 586
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
587
   "execution_count": null,
588 589
   "metadata": {},
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
590 591
   "source": [
    "# Specify columns to one hot encode; empty list otherwise\n",
592
    "one_hot_vars = ['Education', 'Job_insecurity', 'Housing', 'Social_inclusion', 'FrecuenciaConsumo30Dias']\n",
Joaquin Torres's avatar
Joaquin Torres committed
593 594 595 596 597 598 599 600 601 602 603 604
    "\n",
    "one_hots_vars_prefix = {\n",
    "    'Education': 'Ed',\n",
    "    'Job_insecurity': 'JobIn',\n",
    "    'Housing': 'Hous', \n",
    "    'Social_inclusion': 'SocInc',\n",
    "    'FrecuenciaConsumo30Dias': 'Frec30',\n",
    "}\n",
    "\n",
    "one_hot_cols_dic = {}\n",
    "\n",
    "for one_hot_var in one_hot_vars:\n",
Joaquin Torres's avatar
Joaquin Torres committed
605
    "    # Create one hot encoding version of attribute and concatenate new columns to main df using specified prefix\n",
Joaquin Torres's avatar
Joaquin Torres committed
606 607
    "    encoded_var = pd.get_dummies(bd[one_hot_var], prefix=one_hots_vars_prefix[one_hot_var])\n",
    "    bd = pd.concat([bd, encoded_var], axis=1)\n",
Joaquin Torres's avatar
Joaquin Torres committed
608
    "    one_hot_cols_dic[one_hot_var] = encoded_var.columns.tolist()"
Joaquin Torres's avatar
Joaquin Torres committed
609 610 611 612 613 614
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
615
    "#### Final Columns"
Joaquin Torres's avatar
Joaquin Torres committed
616 617 618 619
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
620
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
   "metadata": {},
   "outputs": [],
   "source": [
    "soc_vars_enc = []\n",
    "for soc_var in social_vars:\n",
    "    # If no need to redefine, append directly\n",
    "    if soc_var in no_redef_cols:\n",
    "        soc_vars_enc.append(soc_var)\n",
    "    # If need to redefine\n",
    "    else:\n",
    "        # Check if it was one-hot encoded\n",
    "        if soc_var in one_hot_vars:\n",
    "            # Append all one hot columns\n",
    "            soc_vars_enc = soc_vars_enc + one_hot_cols_dic[soc_var]\n",
    "        # If not, use redefined version through mapping\n",
    "        else:\n",
    "            soc_vars_enc.append(soc_var + '_REDEF')\n",
    "\n",
    "ind_vars_enc = []\n",
    "for ind_var in ind_vars:\n",
    "    # If no need to redefine, append directly\n",
    "    if ind_var in no_redef_cols:\n",
    "        ind_vars_enc.append(ind_var)\n",
    "    # If need to redefine\n",
    "    else:\n",
    "        # Check if it was one-hot encoded\n",
    "        if ind_var in one_hot_vars:\n",
    "            # Append all one hot columns\n",
    "            ind_vars_enc = ind_vars_enc + one_hot_cols_dic[ind_var]\n",
    "        # If not, use redefined version through mapping\n",
    "        else:\n",
    "            ind_vars_enc.append(ind_var + '_REDEF')\n",
    "\n",
    "# Final version of columns we need to use for correlation analysis\n",
    "corr_cols = soc_vars_enc + ind_vars_enc"
   ]
  },
Joaquin Torres's avatar
Joaquin Torres committed
658 659
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
660
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
661 662 663 664 665 666 667 668 669
   "metadata": {},
   "outputs": [],
   "source": [
    "# Drop unknown columns\n",
    "corr_cols = [corr_col for corr_col in corr_cols if corr_col not in ['Ed_Unknowledge', 'JobIn_unkwnodledge', 'Hous_unknowledge', 'Frec30_Desconocido']]\n",
    "soc_vars_enc = [corr_col for corr_col in soc_vars_enc if corr_col not in ['Ed_Unknowledge', 'JobIn_unkwnodledge', 'Hous_unknowledge', 'Frec30_Desconocido']]\n",
    "ind_vars_enc = [corr_col for corr_col in ind_vars_enc if corr_col not in ['Ed_Unknowledge', 'JobIn_unkwnodledge', 'Hous_unknowledge', 'Frec30_Desconocido']]"
   ]
  },
Joaquin Torres's avatar
Joaquin Torres committed
670 671 672 673
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
674
    "##### Renaming and Filtering"
Joaquin Torres's avatar
Joaquin Torres committed
675 676 677 678
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
679
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
680 681 682
   "metadata": {},
   "outputs": [],
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
683
    "# Keep target column and Pandemia_inicio_fin_tratamiento to update df and split again into PRE and POST\n",
Joaquin Torres's avatar
Joaquin Torres committed
684 685
    "columns_to_keep = corr_cols + ['Situacion_tratamiento','Situacion_tratamiento_REDEF', 'Pandemia_inicio_fin_tratamiento']\n",
    "bd = bd[columns_to_keep]"
Joaquin Torres's avatar
Joaquin Torres committed
686 687
   ]
  },
688 689
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
690
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
691 692
   "metadata": {},
   "outputs": [],
693
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
694
    "# Cleaning attribute names\n",
Joaquin Torres's avatar
Joaquin Torres committed
695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
    "name_mapping = {\n",
    "    'Ed_Not Complete primary school': 'Ed_Not_Complete_Primary',\n",
    "    'Ed_Primary education': 'Ed_Primary',\n",
    "    'Ed_Secondary Education': 'Ed_Secondary',\n",
    "    'Ed_Secondary more technical education': 'Ed_Secondary_Technical',\n",
    "    'Ed_Tertiary': 'Ed_Tertiary',\n",
    "    'Social_protection_REDEF': 'Social_Protection',\n",
    "    'JobIn_Non-stable': 'JobIn_Unstable',\n",
    "    'JobIn_Stable': 'JobIn_Stable',\n",
    "    'JobIn_Unemployed': 'JobIn_Unemployed',\n",
    "    'Hous_Institutional': 'Hous_Institutional',\n",
    "    'Hous_Stable': 'Hous_Stable',\n",
    "    'Hous_Unstable': 'Hous_Unstable',\n",
    "    'Alterations_early_childhood_develop_REDEF': 'Early_Alterations',\n",
    "    'SocInc_Live with families or friends': 'SocInc_Family_Friends',\n",
    "    'SocInc_live alone': 'SocInc_Alone',\n",
    "    'SocInc_live in institutions': 'SocInc_Instit',\n",
    "    'Risk_stigma_REDEF': 'Risk_Stigma',\n",
    "    'Structural_conflic': 'Structural_Conflict',\n",
Joaquin Torres's avatar
Joaquin Torres committed
714
    "    'Age': 'Age',\n",
Joaquin Torres's avatar
Joaquin Torres committed
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
    "    'Sex_REDEF': 'Sex',\n",
    "    'NumHijos': 'Num_Children',\n",
    "    'Smoking_REDEF': 'Smoking',\n",
    "    'Biological_vulnerability_REDEF': 'Bio_Vulner',\n",
    "    'Opiaceos_DxCIE_REDEF': 'Opiods_DXCIE',\n",
    "    'Cannabis_DXCIE_REDEF': 'Cannabis_DXCIE',\n",
    "    'BZD_DxCIE_REDEF': 'BZD_DXCIE',\n",
    "    'Cocaina_DxCIE_REDEF': 'Cocaine_DXCIE',\n",
    "    'Alucinogenos_DXCIE_REDEF': 'Hallucin_DXCIE',\n",
    "    'Tabaco_DXCIE_REDEF': 'Tobacco_DXCIE',\n",
    "    'Frec30_1 día/semana': 'Freq_1dpw',\n",
    "    'Frec30_2-3 días\\u200e/semana': 'Freq_2-3dpw',\n",
    "    'Frec30_4-6 días/semana': 'Freq_4-6dpw',\n",
    "    'Frec30_Menos de 1 día\\u200e/semana': 'Freq_l1dpw',\n",
    "    'Frec30_No consumio': 'Freq_None',\n",
    "    'Frec30_Todos los días': 'Freq_Everyday',\n",
    "    'Años_consumo_droga': 'Years_Drug_Use',\n",
    "    'OtrosDx_Psiquiatrico_REDEF': 'Other_Psychiatric_DX',\n",
    "    'Tx_previos_REDEF': 'Previous_Treatments',\n",
Joaquin Torres's avatar
Joaquin Torres committed
734 735 736 737
    "    'Adherencia_tto_recalc': 'Treatment_Adherence',\n",
    "    'Situacion_tratamiento_REDEF': 'Treatment_Outcome',\n",
    "    'Situacion_tratamiento': 'Situacion_tratamiento',\n",
    "    'Pandemia_inicio_fin_tratamiento': 'Pandemia_inicio_fin_tratamiento'\n",
Joaquin Torres's avatar
Joaquin Torres committed
738 739 740 741 742
    "}\n",
    "\n",
    "# Update lists of feature names\n",
    "corr_cols = [name_mapping[corr_col] for corr_col in corr_cols]\n",
    "soc_vars_enc = [name_mapping[col] for col in soc_vars_enc]\n",
Joaquin Torres's avatar
Joaquin Torres committed
743
    "ind_vars_enc = [name_mapping[col] for col in ind_vars_enc]"
Joaquin Torres's avatar
Joaquin Torres committed
744 745 746 747
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
748
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
749 750 751
   "metadata": {},
   "outputs": [],
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
752
    "# Export feature names\n",
Joaquin Torres's avatar
Joaquin Torres committed
753 754 755
    "np.save('./results/feature_names/all_features.npy', corr_cols)\n",
    "np.save('./results/feature_names/social_factors.npy', soc_vars_enc)\n",
    "np.save('./results/feature_names/individual_factors.npy', ind_vars_enc)"
Joaquin Torres's avatar
Joaquin Torres committed
756 757 758 759 760
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
761 762 763
   "metadata": {},
   "outputs": [],
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
764 765
    "# Renaming columns\n",
    "bd = bd.rename(columns=name_mapping)[list(name_mapping.values())]"
Joaquin Torres's avatar
Joaquin Torres committed
766 767 768 769
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
770
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
771 772 773
   "metadata": {},
   "outputs": [],
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
774
    "# Update main dfs\n",
Joaquin Torres's avatar
Joaquin Torres committed
775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
    "# Pre-pandemic\n",
    "conj_pre = bd[bd['Pandemia_inicio_fin_tratamiento'] == 'Inicio y fin prepandemia']\n",
    "# Pre-pandemic abandono\n",
    "pre_abandono = conj_pre[conj_pre['Situacion_tratamiento'] == 'Abandono']\n",
    "# Pre-pandemic alta\n",
    "pre_alta = conj_pre[conj_pre['Situacion_tratamiento'] == 'Alta terapéutica']\n",
    "\n",
    "# Post-pandemic\n",
    "# Merging last two classes to balance sets\n",
    "conj_post = bd[(bd['Pandemia_inicio_fin_tratamiento'] == 'Inicio prepandemia y fin en pandemia') | \n",
    "               (bd['Pandemia_inicio_fin_tratamiento'] == 'inicio y fin en pandemia')]\n",
    "# Post-pandemic abandono\n",
    "post_abandono = conj_post[conj_post['Situacion_tratamiento'] == 'Abandono']\n",
    "# Post-pandemic alta\n",
    "post_alta = conj_post[conj_post['Situacion_tratamiento'] == 'Alta terapéutica']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
796
    "#### Plotting Correlation Matrices"
797 798 799 800
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
801 802 803 804
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
805
    "binary_vars = [col for col in corr_cols if len(bd[col].unique()) == 2] + [name_mapping['Situacion_tratamiento_REDEF']] #, name_mapping['Risk_stigma_REDEF']]\n",
Joaquin Torres's avatar
Joaquin Torres committed
806
    "cont_vars = [name_mapping[col] for col in ['Structural_conflic', 'Age', 'NumHijos', 'Años_consumo_droga', 'Adherencia_tto_recalc']]"
807 808 809 810
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
811
   "execution_count": null,
812 813 814 815 816 817 818 819 820 821 822
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_corr_matrix(df, cols):\n",
    "    \n",
    "    # Initialize nxn matrix to zeroes\n",
    "    n = len(cols)\n",
    "    corr_matrix = np.zeros((n,n))\n",
    "\n",
    "    for i, var_i in enumerate(cols):\n",
    "        for j, var_j in enumerate(cols):\n",
823
    "            # Fill lower triangle of matrix\n",
824
    "            if i > j:\n",
Joaquin Torres's avatar
Joaquin Torres committed
825
    "                # Binary with binary correlation -> tetrachoric\n",
826 827
    "                if var_i in binary_vars and var_j in binary_vars:\n",
    "                    corr = binary_binary(df[var_i], df[var_j], measure='tetrachoric')\n",
Joaquin Torres's avatar
Joaquin Torres committed
828
    "                # Continuous with continuous correlation -> Spearman\n",
829 830 831 832 833
    "                elif var_i in cont_vars and var_j in cont_vars:\n",
    "                    # Returning nan sometimes:\n",
    "                    # corr_tuple = continuous_continuous(df[var_i], df[var_j], measure = 'spearman')\n",
    "                    # corr = corr_tuple[0]\n",
    "                    corr = df[var_i].corr(df[var_j], method='spearman')\n",
Joaquin Torres's avatar
Joaquin Torres committed
834
    "                # Binary vs Continuous correlation -> Point Biserial\n",
835 836 837 838 839 840 841
    "                else:\n",
    "                    if var_i in binary_vars:\n",
    "                        bin_var = var_i\n",
    "                        cont_var = var_j\n",
    "                    else:\n",
    "                        bin_var = var_j\n",
    "                        cont_var = var_i\n",
842
    "                    corr = binary_continuous(df[bin_var], df[cont_var], measure='point_biserial')\n",
843 844 845 846
    "                # Assign value to matrix\n",
    "                corr_matrix[i][j] = corr \n",
    "                      \n",
    "    return corr_matrix"
Joaquin Torres's avatar
Joaquin Torres committed
847 848 849 850
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
851
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
852 853 854 855 856 857 858 859 860 861 862 863
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_heatmap(sit_tto: int, group:int) -> None:\n",
    "    \"\"\"\n",
    "        sit_tto: 1 (include it as another var), 2 (only abandono), 3 (only alta)\n",
    "        group: 1 (all alcohol patients), 2 (pre), 3 (post)\n",
    "    \"\"\"\n",
    "\n",
    "    # Define columns based on sit_tto arg\n",
    "    if sit_tto == 1:\n",
    "        # Include target as another variable\n",
Joaquin Torres's avatar
Joaquin Torres committed
864
    "        cols = ['Treatment_Outcome'] + corr_cols\n",
Joaquin Torres's avatar
Joaquin Torres committed
865 866 867
    "    else:\n",
    "        cols = corr_cols\n",
    "        \n",
Joaquin Torres's avatar
Joaquin Torres committed
868
    "    # Title plot and select data based on group and sit_tto\n",
Joaquin Torres's avatar
Joaquin Torres committed
869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
    "    if group == 1:\n",
    "        plot_title = \"Correl Matrix - ALL\"\n",
    "        if sit_tto == 1:\n",
    "            bd_ca = bd[cols]\n",
    "        elif sit_tto == 2:\n",
    "            bd_ca = bd[bd['Situacion_tratamiento'] == 'Abandono'][cols]\n",
    "        elif sit_tto == 3:\n",
    "            bd_ca = bd[bd['Situacion_tratamiento'] == 'Alta terapéutica'][cols]\n",
    "    elif group == 2:\n",
    "        plot_title = \"Correl Matrix - PRE\"\n",
    "        if sit_tto == 1:    \n",
    "            bd_ca = conj_pre[cols]\n",
    "        elif sit_tto == 2:\n",
    "            bd_ca = pre_abandono[cols]\n",
    "        elif sit_tto == 3:\n",
    "            bd_ca = pre_alta[cols]\n",
    "    elif group == 3:\n",
    "        plot_title = \"Correl Matrix - POST\"\n",
    "        if sit_tto == 1:    \n",
    "            bd_ca = conj_post[cols]\n",
    "        elif sit_tto == 2:\n",
    "            bd_ca = post_abandono[cols]\n",
    "        elif sit_tto == 3:\n",
    "            bd_ca = post_alta[cols]\n",
    "            \n",
    "    # Complete title\n",
    "    if sit_tto == 2:\n",
    "        plot_title += \" - ABANDONO\"\n",
    "    elif sit_tto == 3:\n",
    "        plot_title += \" - ALTA\"\n",
    "\n",
900
    "    corr_matrix = get_corr_matrix(bd_ca, cols)\n",
Joaquin Torres's avatar
Joaquin Torres committed
901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
    "\n",
    "    # Create a mask for the upper triangle\n",
    "    mask = np.triu(np.ones_like(corr_matrix, dtype=bool))\n",
    "\n",
    "    # Create heatmap correlation matrix\n",
    "    dataplot = sns.heatmap(corr_matrix, mask=mask, xticklabels=cols, yticklabels=cols, cmap=\"coolwarm\", vmin=-1, vmax=1, annot=True, fmt=\".2f\", annot_kws={\"size\": 4})\n",
    "\n",
    "    # Group ind vs social vars by color and modify tick label names\n",
    "    for tick_label in dataplot.axes.xaxis.get_ticklabels():\n",
    "        if tick_label.get_text() in ind_vars_enc:\n",
    "            tick_label.set_color('green')\n",
    "        elif tick_label.get_text() in soc_vars_enc:\n",
    "            tick_label.set_color('purple')  \n",
    "    for tick_label in dataplot.axes.yaxis.get_ticklabels():\n",
    "        if tick_label.get_text() in ind_vars_enc:\n",
    "            tick_label.set_color('green')\n",
    "        elif tick_label.get_text() in soc_vars_enc:\n",
    "            tick_label.set_color('purple') \n",
    "\n",
    "    # Increase the size of xtick labels\n",
    "    # dataplot.tick_params(axis='x', labelsize=12)\n",
    "\n",
    "    # Increase the size of ytick labels\n",
    "    # dataplot.tick_params(axis='y', labelsize=12)\n",
    "\n",
    "    # Add legend and place it in lower left \n",
    "    plt.legend(handles=[\n",
    "        plt.Line2D([0], [0], marker='o', color='w', label='Social Factors', markerfacecolor='purple', markersize=10),\n",
    "        plt.Line2D([0], [0], marker='o', color='w', label='Individual Factors', markerfacecolor='green', markersize=10)\n",
    "    ], bbox_to_anchor=(-0.1, -0.1), fontsize = 20)\n",
    "\n",
932 933 934
    "    plt.title(\"\\n\\n\" + plot_title, fontdict={'fontsize': 30, 'fontweight': 'bold'})\n",
    "\n",
    "    return corr_matrix"
Joaquin Torres's avatar
Joaquin Torres committed
935 936 937 938
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
939
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
940
   "metadata": {},
Joaquin Torres's avatar
Joaquin Torres committed
941
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
942 943 944
   "source": [
    "fig, axs = plt.subplots(3, 3, figsize=(50, 50))\n",
    "plt.subplots_adjust(hspace=0.75, wspace=2)\n",
945
    "corr_mats = [] # List of tuples (m1, m2) to store the 3 pairs of matrices to compare (pre vs post)\n",
Joaquin Torres's avatar
Joaquin Torres committed
946
    "\n",
Joaquin Torres's avatar
Joaquin Torres committed
947
    "# Go through possible values for 'Situacion_tratamiento': 1 (include it as another var), 2 (only abandono), 3 (only alta)\n",
Joaquin Torres's avatar
Joaquin Torres committed
948
    "for sit_tto in range(1,4):\n",
949 950 951 952 953 954 955 956 957 958 959
    "    # ALL\n",
    "    plt.subplot(3, 3, 3*(sit_tto-1) + 1)  # Calculate the subplot position dynamically\n",
    "    _ = plot_heatmap(sit_tto, 1)\n",
    "    # PRE\n",
    "    plt.subplot(3, 3, 3*(sit_tto-1) + 2) \n",
    "    corr_matrix_pre = plot_heatmap(sit_tto, 2)\n",
    "    # POST\n",
    "    plt.subplot(3, 3, 3*(sit_tto-1) + 3)\n",
    "    corr_matrix_post = plot_heatmap(sit_tto, 3)\n",
    "\n",
    "    corr_mats.append((corr_matrix_pre, corr_matrix_post))\n",
Joaquin Torres's avatar
Joaquin Torres committed
960 961 962
    "        \n",
    "plt.tight_layout()\n",
    "\n",
Joaquin Torres's avatar
Joaquin Torres committed
963
    "plt.savefig('./results/plots/correlations/heatmaps_one_hot.svg', dpi=550, bbox_inches='tight')"
Joaquin Torres's avatar
Joaquin Torres committed
964
   ]
965 966 967 968 969
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
970
    "#### Finding Differences PRE vs POST"
971 972 973 974
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
975
   "execution_count": null,
976 977 978 979
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_diff (sit_tto:int, m_pre, m_post):\n",
Joaquin Torres's avatar
Joaquin Torres committed
980 981 982
    "\n",
    "    diff_list = []  # List to store tuples of (difference, variable_i, variable_j)\n",
    "\n",
983 984 985 986 987 988 989
    "    if sit_tto == 1:\n",
    "        cols = [target_var + '_REDEF'] + corr_cols\n",
    "    else:\n",
    "        cols = corr_cols\n",
    "    # Go through matrices\n",
    "    for i, var_i in enumerate(cols):\n",
    "        for j, var_j in enumerate(cols):\n",
Joaquin Torres's avatar
Joaquin Torres committed
990 991 992 993 994 995 996 997 998
    "            val_pre = m_pre[i][j]\n",
    "            val_post = m_post[i][j]\n",
    "            diff = abs(val_pre - val_post)\n",
    "            diff_list.append((diff, var_i, var_j, val_pre, val_post))\n",
    "    \n",
    "    # Sort the list based on the difference value in descending order\n",
    "    diff_list.sort(key=lambda x: x[0], reverse=True)\n",
    "            \n",
    "    # Print the sorted list\n",
999
    "    for diff, var_i, var_j, val_pre, val_post in diff_list[0:100]:\n",
Joaquin Torres's avatar
Joaquin Torres committed
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
    "        # Give ind vs soc vars their corresponding color\n",
    "        if var_i in ind_vars_enc:\n",
    "            print(colors.GREEN + var_i + colors.RESET, end=' ')\n",
    "        else:\n",
    "            print(colors.PURPLE + var_i + colors.PURPLE, end=' ')\n",
    "        print(\"& \", end='')\n",
    "        if var_j in ind_vars_enc:\n",
    "            print(colors.GREEN + var_j + colors.RESET, end=' ')\n",
    "        else:\n",
    "            print(colors.PURPLE + var_j + colors.RESET, end=' ')\n",
    "        print(f\"--> Diff: {diff:.2f} (PRE: {val_pre:.2f}; POST: {val_post:.2f})\")"
1011 1012 1013 1014
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
1015
   "execution_count": null,
1016
   "metadata": {},
Joaquin Torres's avatar
Joaquin Torres committed
1017
   "outputs": [],
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
   "source": [
    "class colors:\n",
    "    RED = '\\033[91m'\n",
    "    GREEN = '\\033[92m'\n",
    "    YELLOW = '\\033[93m'\n",
    "    BLUE = '\\033[94m'\n",
    "    PURPLE = '\\033[95m'\n",
    "    CYAN = '\\033[96m'\n",
    "    WHITE = '\\033[97m'\n",
    "    RESET = '\\033[0m'\n",
    "\n",
    "# Print colored text\n",
    "print(colors.RED + \"This is red text.\" + colors.RESET)\n",
    "print(colors.GREEN + \"This is green text.\" + colors.RESET)\n",
    "print(colors.BLUE + \"This is blue text.\" + colors.RESET)"
1033 1034 1035 1036
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
1037
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
1038
   "metadata": {},
Joaquin Torres's avatar
Joaquin Torres committed
1039 1040 1041 1042 1043 1044 1045 1046 1047
   "outputs": [],
   "source": [
    "print(\"------SIT_TTO 1: NO FILTERING------\")\n",
    "find_diff(1, corr_mats[0][0], corr_mats[0][1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
1048
   "metadata": {},
Joaquin Torres's avatar
Joaquin Torres committed
1049 1050 1051 1052 1053 1054 1055 1056 1057
   "outputs": [],
   "source": [
    "print(\"------SIT_TTO 2: ABANDONO-----\")\n",
    "find_diff(2, corr_mats[1][0], corr_mats[1][1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
1058
   "metadata": {},
Joaquin Torres's avatar
Joaquin Torres committed
1059 1060 1061 1062 1063 1064 1065 1066 1067 1068
   "outputs": [],
   "source": [
    "print(\"------SIT_TTO 3: ALTA-----\")\n",
    "find_diff(3, corr_mats[2][0], corr_mats[2][1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
1069
    "### Final Datasets"
Joaquin Torres's avatar
Joaquin Torres committed
1070 1071 1072 1073
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
1074
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
1075 1076 1077
   "metadata": {},
   "outputs": [],
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
1078
    "# Drop target\n",
Joaquin Torres's avatar
Joaquin Torres committed
1079 1080 1081
    "bd = bd.drop(columns=['Situacion_tratamiento'])\n",
    "# print(len(bd.columns))\n",
    "\n",
Joaquin Torres's avatar
Joaquin Torres committed
1082
    "# For conj_pre dataframe\n",
Joaquin Torres's avatar
Joaquin Torres committed
1083
    "conj_pre = bd[bd['Pandemia_inicio_fin_tratamiento'] == 'Inicio y fin prepandemia']\n",
Joaquin Torres's avatar
Joaquin Torres committed
1084 1085 1086
    "conj_pre = conj_pre.drop(columns=['Pandemia_inicio_fin_tratamiento'])\n",
    "\n",
    "# For conj_post dataframe\n",
Joaquin Torres's avatar
Joaquin Torres committed
1087 1088 1089 1090 1091 1092
    "conj_post = bd[(bd['Pandemia_inicio_fin_tratamiento'] == 'Inicio prepandemia y fin en pandemia') | \n",
    "                    (bd['Pandemia_inicio_fin_tratamiento'] == 'inicio y fin en pandemia')]\n",
    "conj_post = conj_post.drop(columns=['Pandemia_inicio_fin_tratamiento'])\n",
    "\n",
    "# print(conj_post.columns)\n",
    "# print(conj_pre.columns)"
Joaquin Torres's avatar
Joaquin Torres committed
1093 1094 1095 1096
   ]
  },
  {
   "cell_type": "code",
Joaquin Torres's avatar
Joaquin Torres committed
1097 1098 1099
   "execution_count": null,
   "metadata": {},
   "outputs": [],
1100
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
1101
    "# Numpy matrices for features and target\n",
Joaquin Torres's avatar
Joaquin Torres committed
1102 1103
    "X_pre, y_pre = conj_pre.loc[:, conj_pre.columns != \"Treatment_Outcome\"].to_numpy(), conj_pre.Treatment_Outcome\n",
    "X_post, y_post = conj_post.loc[:, conj_post.columns != \"Treatment_Outcome\"].to_numpy(), conj_post.Treatment_Outcome\n",
Joaquin Torres's avatar
Joaquin Torres committed
1104 1105 1106 1107 1108 1109 1110 1111 1112
    "feat = np.delete(conj_pre.columns.to_numpy(),-1) # Get labels and remove target "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
1113
    "# Export datasets\n",
Joaquin Torres's avatar
Joaquin Torres committed
1114 1115
    "conj_pre.to_csv('./results/datasets/pre_dataset.csv', index=False)\n",
    "conj_post.to_csv('./results/datasets/post_dataset.csv', index=False)"
Joaquin Torres's avatar
Joaquin Torres committed
1116 1117 1118 1119 1120 1121
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
1122
    "### Feature Analysis"
Joaquin Torres's avatar
Joaquin Torres committed
1123 1124
   ]
  },
Joaquin Torres's avatar
Joaquin Torres committed
1125 1126 1127 1128
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
1129
    "#### Mutual Info"
Joaquin Torres's avatar
Joaquin Torres committed
1130 1131
   ]
  },
Joaquin Torres's avatar
Joaquin Torres committed
1132 1133
  {
   "cell_type": "code",
1134
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
1135
   "metadata": {},
1136
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
1137
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
1138 1139 1140 1141
    "# Create subplots\n",
    "fig, axes = plt.subplots(1, 2, figsize=(12, 6))\n",
    "\n",
    "# PRE\n",
1142
    "importances_MI = mutual_info_classif(X_pre, y_pre)\n",
Joaquin Torres's avatar
Joaquin Torres committed
1143 1144 1145 1146 1147
    "feat_importances_MI = pd.Series(importances_MI, feat)\n",
    "feat_importances_MI.sort_values(inplace=True)\n",
    "axes[0].barh(feat_importances_MI[feat_importances_MI != 0][-20:].index, feat_importances_MI[feat_importances_MI != 0][-20:], color='teal')\n",
    "axes[0].set_xlabel(\"Mutual Information\")\n",
    "axes[0].set_title(\"PRE\")\n",
Joaquin Torres's avatar
Joaquin Torres committed
1148
    "\n",
Joaquin Torres's avatar
Joaquin Torres committed
1149
    "# POST\n",
1150
    "importances_MI = mutual_info_classif(X_post, y_post)\n",
Joaquin Torres's avatar
Joaquin Torres committed
1151 1152
    "feat_importances_MI = pd.Series(importances_MI, feat)\n",
    "feat_importances_MI.sort_values(inplace=True)\n",
Joaquin Torres's avatar
Joaquin Torres committed
1153 1154 1155
    "axes[1].barh(feat_importances_MI[feat_importances_MI != 0][-20:].index, feat_importances_MI[feat_importances_MI != 0][-20:], color='teal')\n",
    "axes[1].set_xlabel(\"Mutual Information\")\n",
    "axes[1].set_title(\"POST\")\n",
Joaquin Torres's avatar
Joaquin Torres committed
1156 1157
    "\n",
    "plt.tight_layout()\n",
Joaquin Torres's avatar
Joaquin Torres committed
1158
    "plt.savefig('./results/plots/feature_importance/mutual_info.svg', format='svg', dpi=1200)\n",
1159
    "plt.show()"
Joaquin Torres's avatar
Joaquin Torres committed
1160 1161 1162 1163 1164 1165
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
1166
    "#### ANOVA"
Joaquin Torres's avatar
Joaquin Torres committed
1167 1168 1169 1170
   ]
  },
  {
   "cell_type": "code",
1171
   "execution_count": null,
Joaquin Torres's avatar
Joaquin Torres committed
1172
   "metadata": {},
1173
   "outputs": [],
Joaquin Torres's avatar
Joaquin Torres committed
1174
   "source": [
Joaquin Torres's avatar
Joaquin Torres committed
1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189
    "# Create subplots\n",
    "fig, axes = plt.subplots(1, 2, figsize=(12, 6))\n",
    "\n",
    "# PRE\n",
    "selector = SelectKBest(f_classif, k=39)\n",
    "selector.fit(X_pre, y_pre)\n",
    "feat_importances_AN_pre = pd.Series(selector.pvalues_, feat)\n",
    "feat_importances_AN_pre.sort_values(inplace=True)\n",
    "axes[0].barh(feat_importances_AN_pre[feat_importances_AN_pre > 0.005][-20:].index, feat_importances_AN_pre[feat_importances_AN_pre > 0.005][-20:], color='teal')\n",
    "axes[0].set_xlabel(\"p-value ANOVA\")\n",
    "axes[0].set_title(\"PRE\")\n",
    "\n",
    "# POST\n",
    "selector = SelectKBest(f_classif, k=39)\n",
    "selector.fit(X_post, y_post)\n",
1190
    "feat_importances_AN_post = pd.Series(selector.pvalues_, feat)\n",
Joaquin Torres's avatar
Joaquin Torres committed
1191 1192 1193 1194
    "feat_importances_AN_post.sort_values(inplace=True)\n",
    "axes[1].barh(feat_importances_AN_post[feat_importances_AN_post > 0.005][-20:].index, feat_importances_AN_post[feat_importances_AN_post > 0.005][-20:], color='teal') \n",
    "axes[1].set_xlabel(\"p-value ANOVA\")\n",
    "axes[1].set_title(\"POST\")\n",
Joaquin Torres's avatar
Joaquin Torres committed
1195 1196
    "\n",
    "plt.tight_layout()\n",
Joaquin Torres's avatar
Joaquin Torres committed
1197
    "plt.savefig('./results/plots/feature_importance/ANOVA.svg', format='svg', dpi=1200)\n",
Joaquin Torres's avatar
Joaquin Torres committed
1198
    "plt.show()"
1199
   ]
Joaquin Torres's avatar
Joaquin Torres committed
1200
  },
Joaquin Torres's avatar
Joaquin Torres committed
1201 1202 1203 1204 1205 1206 1207
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Variance Threshold"
   ]
  },
Joaquin Torres's avatar
Joaquin Torres committed
1208 1209 1210 1211 1212
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235
   "source": [
    "# Create subplots\n",
    "fig, axes = plt.subplots(1, 2, figsize=(15, 6))\n",
    "\n",
    "# PRE\n",
    "variance_filter = VarianceThreshold(threshold=0)\n",
    "variance_filter.fit(X_pre)\n",
    "feat_importances_var_pre = pd.Series(variance_filter.variances_, feat)\n",
    "feat_importances_var_pre.sort_values(inplace=True)\n",
    "axes[0].barh(feat_importances_var_pre[feat_importances_var_pre > 0.05][-20:].index, feat_importances_var_pre[feat_importances_var_pre > 0.05][-20:], color='teal')\n",
    "axes[0].set_xlabel(\"Variance\")\n",
    "axes[0].set_title(\"PRE\")\n",
    "\n",
    "# POST\n",
    "variance_filter = VarianceThreshold(threshold=0)\n",
    "variance_filter.fit(X_post)\n",
    "feat_importances_var_post = pd.Series(variance_filter.variances_, feat)\n",
    "feat_importances_var_post.sort_values(inplace=True)\n",
    "axes[1].barh(feat_importances_var_post[feat_importances_var_post > 0.05][-20:].index, feat_importances_var_post[feat_importances_var_post > 0.05][-20:], color='teal')\n",
    "axes[1].set_xlabel(\"Variance\")\n",
    "axes[1].set_title(\"POST\")\n",
    "\n",
    "plt.tight_layout()\n",
Joaquin Torres's avatar
Joaquin Torres committed
1236
    "plt.savefig('./results/plots/feature_importance/var_threshold.svg', format='svg', dpi=1200)\n",
1237 1238
    "plt.show()"
   ]
Joaquin Torres's avatar
Joaquin Torres committed
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}