Updated paths EDA

c32883c4 · Joaquin Torres · 2460bf0c · c32883c4 · c32883c4 · c32883c4
Commit c32883c4 authored Jun 27, 2024 by Joaquin Torres
11 changed files
--- a/EDA/EDA.ipynb
+++ b/EDA/EDA.ipynb
@@ -16,7 +16,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -48,11 +48,11 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "bd_all = pd.read_spss('17_abril.sav')\n",
+    "bd_all = pd.read_spss('./input/17_abril.sav')\n",
    "\n",
    "# Filter the dataset to work only with alcohol patients\n",
    "bd = bd_all[bd_all['Alcohol_DxCIE'] == 'Sí']\n",
@@ -70,28 +70,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\2495984927.py:18: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "  conj_post['Group'] = 'Post'\n",
-      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\2495984927.py:19: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "  conj_pre['Group'] = 'Pre'\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Pre-pandemic\n",
    "conj_pre = bd[bd['Pandemia_inicio_fin_tratamiento'] == 'Inicio y fin prepandemia']\n",
@@ -117,22 +98,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "PRE: 22861\n",
-      "\tALTA: 2792\n",
-      "\tABANDONO: 20069\n",
-      "POST: 10677\n",
-      "\tALTA: 1882\n",
-      "\tABANDONO: 8795\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Printing size of different datasets\n",
    "print(f\"PRE: {len(conj_pre)}\")\n",
@@ -160,286 +128,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "PRE\n",
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 22861 entries, 0 to 85164\n",
-      "Data columns (total 35 columns):\n",
-      " #   Column                               Non-Null Count  Dtype   \n",
-      "---  ------                               --------------  -----   \n",
-      " 0   CODPROYECTO                          22861 non-null  float64 \n",
-      " 1   Education                            22861 non-null  object  \n",
-      " 2   Social_protection                    22861 non-null  object  \n",
-      " 3   Job_insecurity                       22861 non-null  object  \n",
-      " 4   Housing                              22861 non-null  object  \n",
-      " 5   Alterations_early_childhood_develop  22861 non-null  object  \n",
-      " 6   Social_inclusion                     22861 non-null  object  \n",
-      " 7   Risk_stigma                          21606 non-null  category\n",
-      " 8   Structural_conflic                   22861 non-null  float64 \n",
-      " 9   Age                                  22852 non-null  float64 \n",
-      " 10  Sex                                  22861 non-null  object  \n",
-      " 11  NumHijos                             21647 non-null  float64 \n",
-      " 12  Smoking                              22861 non-null  object  \n",
-      " 13  Biological_vulnerability             22861 non-null  object  \n",
-      " 14  Alcohol_DxCIE                        22861 non-null  object  \n",
-      " 15  Opiaceos_DxCIE                       22861 non-null  object  \n",
-      " 16  Cannabis_DXCIE                       22861 non-null  object  \n",
-      " 17  BZD_DxCIE                            22861 non-null  object  \n",
-      " 18  Cocaina_DxCIE                        22861 non-null  object  \n",
-      " 19  Alucinogenos_DXCIE                   22861 non-null  object  \n",
-      " 20  Tabaco_DXCIE                         22861 non-null  object  \n",
-      " 21  FrecuenciaConsumo30Dias              22861 non-null  object  \n",
-      " 22  Años_consumo_droga                   22342 non-null  float64 \n",
-      " 23  OtrosDx_Psiquiatrico                 22861 non-null  object  \n",
-      " 24  Tx_previos                           22861 non-null  object  \n",
-      " 25  Adherencia_tto_recalc                22861 non-null  float64 \n",
-      " 26  Tiempo_tx                            22861 non-null  float64 \n",
-      " 27  Readmisiones_estudios                22861 non-null  object  \n",
-      " 28  Situacion_tratamiento                22861 non-null  object  \n",
-      " 29  Periodos_COVID                       22861 non-null  object  \n",
-      " 30  Pandemia_inicio_fin_tratamiento      22861 non-null  object  \n",
-      " 31  Nreadmision                          22861 non-null  float64 \n",
-      " 32  Readmisiones_PRECOVID                22861 non-null  float64 \n",
-      " 33  Readmisiones_COVID                   22861 non-null  float64 \n",
-      " 34  Group                                22861 non-null  object  \n",
-      "dtypes: category(1), float64(10), object(24)\n",
-      "memory usage: 6.1+ MB\n",
-      "None\n",
-      "-------------------------------\n",
-      "PRE-ABANDONO\n",
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 20069 entries, 0 to 85164\n",
-      "Data columns (total 34 columns):\n",
-      " #   Column                               Non-Null Count  Dtype   \n",
-      "---  ------                               --------------  -----   \n",
-      " 0   CODPROYECTO                          20069 non-null  float64 \n",
-      " 1   Education                            20069 non-null  object  \n",
-      " 2   Social_protection                    20069 non-null  object  \n",
-      " 3   Job_insecurity                       20069 non-null  object  \n",
-      " 4   Housing                              20069 non-null  object  \n",
-      " 5   Alterations_early_childhood_develop  20069 non-null  object  \n",
-      " 6   Social_inclusion                     20069 non-null  object  \n",
-      " 7   Risk_stigma                          18919 non-null  category\n",
-      " 8   Structural_conflic                   20069 non-null  float64 \n",
-      " 9   Age                                  20061 non-null  float64 \n",
-      " 10  Sex                                  20069 non-null  object  \n",
-      " 11  NumHijos                             18958 non-null  float64 \n",
-      " 12  Smoking                              20069 non-null  object  \n",
-      " 13  Biological_vulnerability             20069 non-null  object  \n",
-      " 14  Alcohol_DxCIE                        20069 non-null  object  \n",
-      " 15  Opiaceos_DxCIE                       20069 non-null  object  \n",
-      " 16  Cannabis_DXCIE                       20069 non-null  object  \n",
-      " 17  BZD_DxCIE                            20069 non-null  object  \n",
-      " 18  Cocaina_DxCIE                        20069 non-null  object  \n",
-      " 19  Alucinogenos_DXCIE                   20069 non-null  object  \n",
-      " 20  Tabaco_DXCIE                         20069 non-null  object  \n",
-      " 21  FrecuenciaConsumo30Dias              20069 non-null  object  \n",
-      " 22  Años_consumo_droga                   19609 non-null  float64 \n",
-      " 23  OtrosDx_Psiquiatrico                 20069 non-null  object  \n",
-      " 24  Tx_previos                           20069 non-null  object  \n",
-      " 25  Adherencia_tto_recalc                20069 non-null  float64 \n",
-      " 26  Tiempo_tx                            20069 non-null  float64 \n",
-      " 27  Readmisiones_estudios                20069 non-null  object  \n",
-      " 28  Situacion_tratamiento                20069 non-null  object  \n",
-      " 29  Periodos_COVID                       20069 non-null  object  \n",
-      " 30  Pandemia_inicio_fin_tratamiento      20069 non-null  object  \n",
-      " 31  Nreadmision                          20069 non-null  float64 \n",
-      " 32  Readmisiones_PRECOVID                20069 non-null  float64 \n",
-      " 33  Readmisiones_COVID                   20069 non-null  float64 \n",
-      "dtypes: category(1), float64(10), object(23)\n",
-      "memory usage: 5.2+ MB\n",
-      "None\n",
-      "-------------------------------\n",
-      "PRE-ALTA\n",
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 2792 entries, 23 to 85159\n",
-      "Data columns (total 34 columns):\n",
-      " #   Column                               Non-Null Count  Dtype   \n",
-      "---  ------                               --------------  -----   \n",
-      " 0   CODPROYECTO                          2792 non-null   float64 \n",
-      " 1   Education                            2792 non-null   object  \n",
-      " 2   Social_protection                    2792 non-null   object  \n",
-      " 3   Job_insecurity                       2792 non-null   object  \n",
-      " 4   Housing                              2792 non-null   object  \n",
-      " 5   Alterations_early_childhood_develop  2792 non-null   object  \n",
-      " 6   Social_inclusion                     2792 non-null   object  \n",
-      " 7   Risk_stigma                          2687 non-null   category\n",
-      " 8   Structural_conflic                   2792 non-null   float64 \n",
-      " 9   Age                                  2791 non-null   float64 \n",
-      " 10  Sex                                  2792 non-null   object  \n",
-      " 11  NumHijos                             2689 non-null   float64 \n",
-      " 12  Smoking                              2792 non-null   object  \n",
-      " 13  Biological_vulnerability             2792 non-null   object  \n",
-      " 14  Alcohol_DxCIE                        2792 non-null   object  \n",
-      " 15  Opiaceos_DxCIE                       2792 non-null   object  \n",
-      " 16  Cannabis_DXCIE                       2792 non-null   object  \n",
-      " 17  BZD_DxCIE                            2792 non-null   object  \n",
-      " 18  Cocaina_DxCIE                        2792 non-null   object  \n",
-      " 19  Alucinogenos_DXCIE                   2792 non-null   object  \n",
-      " 20  Tabaco_DXCIE                         2792 non-null   object  \n",
-      " 21  FrecuenciaConsumo30Dias              2792 non-null   object  \n",
-      " 22  Años_consumo_droga                   2733 non-null   float64 \n",
-      " 23  OtrosDx_Psiquiatrico                 2792 non-null   object  \n",
-      " 24  Tx_previos                           2792 non-null   object  \n",
-      " 25  Adherencia_tto_recalc                2792 non-null   float64 \n",
-      " 26  Tiempo_tx                            2792 non-null   float64 \n",
-      " 27  Readmisiones_estudios                2792 non-null   object  \n",
-      " 28  Situacion_tratamiento                2792 non-null   object  \n",
-      " 29  Periodos_COVID                       2792 non-null   object  \n",
-      " 30  Pandemia_inicio_fin_tratamiento      2792 non-null   object  \n",
-      " 31  Nreadmision                          2792 non-null   float64 \n",
-      " 32  Readmisiones_PRECOVID                2792 non-null   float64 \n",
-      " 33  Readmisiones_COVID                   2792 non-null   float64 \n",
-      "dtypes: category(1), float64(10), object(23)\n",
-      "memory usage: 744.5+ KB\n",
-      "None\n",
-      "-------------------------------\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "POST\n",
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 10677 entries, 11 to 85156\n",
-      "Data columns (total 35 columns):\n",
-      " #   Column                               Non-Null Count  Dtype   \n",
-      "---  ------                               --------------  -----   \n",
-      " 0   CODPROYECTO                          10677 non-null  float64 \n",
-      " 1   Education                            10677 non-null  object  \n",
-      " 2   Social_protection                    10677 non-null  object  \n",
-      " 3   Job_insecurity                       10677 non-null  object  \n",
-      " 4   Housing                              10677 non-null  object  \n",
-      " 5   Alterations_early_childhood_develop  10677 non-null  object  \n",
-      " 6   Social_inclusion                     10677 non-null  object  \n",
-      " 7   Risk_stigma                          10085 non-null  category\n",
-      " 8   Structural_conflic                   10677 non-null  float64 \n",
-      " 9   Age                                  10676 non-null  float64 \n",
-      " 10  Sex                                  10677 non-null  object  \n",
-      " 11  NumHijos                             10103 non-null  float64 \n",
-      " 12  Smoking                              10677 non-null  object  \n",
-      " 13  Biological_vulnerability             10677 non-null  object  \n",
-      " 14  Alcohol_DxCIE                        10677 non-null  object  \n",
-      " 15  Opiaceos_DxCIE                       10677 non-null  object  \n",
-      " 16  Cannabis_DXCIE                       10677 non-null  object  \n",
-      " 17  BZD_DxCIE                            10677 non-null  object  \n",
-      " 18  Cocaina_DxCIE                        10677 non-null  object  \n",
-      " 19  Alucinogenos_DXCIE                   10677 non-null  object  \n",
-      " 20  Tabaco_DXCIE                         10677 non-null  object  \n",
-      " 21  FrecuenciaConsumo30Dias              10677 non-null  object  \n",
-      " 22  Años_consumo_droga                   10478 non-null  float64 \n",
-      " 23  OtrosDx_Psiquiatrico                 10677 non-null  object  \n",
-      " 24  Tx_previos                           10677 non-null  object  \n",
-      " 25  Adherencia_tto_recalc                10677 non-null  float64 \n",
-      " 26  Tiempo_tx                            10677 non-null  float64 \n",
-      " 27  Readmisiones_estudios                10677 non-null  object  \n",
-      " 28  Situacion_tratamiento                10677 non-null  object  \n",
-      " 29  Periodos_COVID                       10677 non-null  object  \n",
-      " 30  Pandemia_inicio_fin_tratamiento      10677 non-null  object  \n",
-      " 31  Nreadmision                          10677 non-null  float64 \n",
-      " 32  Readmisiones_PRECOVID                10677 non-null  float64 \n",
-      " 33  Readmisiones_COVID                   10677 non-null  float64 \n",
-      " 34  Group                                10677 non-null  object  \n",
-      "dtypes: category(1), float64(10), object(24)\n",
-      "memory usage: 2.9+ MB\n",
-      "None\n",
-      "-------------------------------\n",
-      "POST-ABANDONO\n",
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 8795 entries, 11 to 85156\n",
-      "Data columns (total 34 columns):\n",
-      " #   Column                               Non-Null Count  Dtype   \n",
-      "---  ------                               --------------  -----   \n",
-      " 0   CODPROYECTO                          8795 non-null   float64 \n",
-      " 1   Education                            8795 non-null   object  \n",
-      " 2   Social_protection                    8795 non-null   object  \n",
-      " 3   Job_insecurity                       8795 non-null   object  \n",
-      " 4   Housing                              8795 non-null   object  \n",
-      " 5   Alterations_early_childhood_develop  8795 non-null   object  \n",
-      " 6   Social_inclusion                     8795 non-null   object  \n",
-      " 7   Risk_stigma                          8308 non-null   category\n",
-      " 8   Structural_conflic                   8795 non-null   float64 \n",
-      " 9   Age                                  8794 non-null   float64 \n",
-      " 10  Sex                                  8795 non-null   object  \n",
-      " 11  NumHijos                             8325 non-null   float64 \n",
-      " 12  Smoking                              8795 non-null   object  \n",
-      " 13  Biological_vulnerability             8795 non-null   object  \n",
-      " 14  Alcohol_DxCIE                        8795 non-null   object  \n",
-      " 15  Opiaceos_DxCIE                       8795 non-null   object  \n",
-      " 16  Cannabis_DXCIE                       8795 non-null   object  \n",
-      " 17  BZD_DxCIE                            8795 non-null   object  \n",
-      " 18  Cocaina_DxCIE                        8795 non-null   object  \n",
-      " 19  Alucinogenos_DXCIE                   8795 non-null   object  \n",
-      " 20  Tabaco_DXCIE                         8795 non-null   object  \n",
-      " 21  FrecuenciaConsumo30Dias              8795 non-null   object  \n",
-      " 22  Años_consumo_droga                   8627 non-null   float64 \n",
-      " 23  OtrosDx_Psiquiatrico                 8795 non-null   object  \n",
-      " 24  Tx_previos                           8795 non-null   object  \n",
-      " 25  Adherencia_tto_recalc                8795 non-null   float64 \n",
-      " 26  Tiempo_tx                            8795 non-null   float64 \n",
-      " 27  Readmisiones_estudios                8795 non-null   object  \n",
-      " 28  Situacion_tratamiento                8795 non-null   object  \n",
-      " 29  Periodos_COVID                       8795 non-null   object  \n",
-      " 30  Pandemia_inicio_fin_tratamiento      8795 non-null   object  \n",
-      " 31  Nreadmision                          8795 non-null   float64 \n",
-      " 32  Readmisiones_PRECOVID                8795 non-null   float64 \n",
-      " 33  Readmisiones_COVID                   8795 non-null   float64 \n",
-      "dtypes: category(1), float64(10), object(23)\n",
-      "memory usage: 2.3+ MB\n",
-      "None\n",
-      "-------------------------------\n",
-      "POST-ALTA\n",
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 1882 entries, 258 to 85149\n",
-      "Data columns (total 34 columns):\n",
-      " #   Column                               Non-Null Count  Dtype   \n",
-      "---  ------                               --------------  -----   \n",
-      " 0   CODPROYECTO                          1882 non-null   float64 \n",
-      " 1   Education                            1882 non-null   object  \n",
-      " 2   Social_protection                    1882 non-null   object  \n",
-      " 3   Job_insecurity                       1882 non-null   object  \n",
-      " 4   Housing                              1882 non-null   object  \n",
-      " 5   Alterations_early_childhood_develop  1882 non-null   object  \n",
-      " 6   Social_inclusion                     1882 non-null   object  \n",
-      " 7   Risk_stigma                          1777 non-null   category\n",
-      " 8   Structural_conflic                   1882 non-null   float64 \n",
-      " 9   Age                                  1882 non-null   float64 \n",
-      " 10  Sex                                  1882 non-null   object  \n",
-      " 11  NumHijos                             1778 non-null   float64 \n",
-      " 12  Smoking                              1882 non-null   object  \n",
-      " 13  Biological_vulnerability             1882 non-null   object  \n",
-      " 14  Alcohol_DxCIE                        1882 non-null   object  \n",
-      " 15  Opiaceos_DxCIE                       1882 non-null   object  \n",
-      " 16  Cannabis_DXCIE                       1882 non-null   object  \n",
-      " 17  BZD_DxCIE                            1882 non-null   object  \n",
-      " 18  Cocaina_DxCIE                        1882 non-null   object  \n",
-      " 19  Alucinogenos_DXCIE                   1882 non-null   object  \n",
-      " 20  Tabaco_DXCIE                         1882 non-null   object  \n",
-      " 21  FrecuenciaConsumo30Dias              1882 non-null   object  \n",
-      " 22  Años_consumo_droga                   1851 non-null   float64 \n",
-      " 23  OtrosDx_Psiquiatrico                 1882 non-null   object  \n",
-      " 24  Tx_previos                           1882 non-null   object  \n",
-      " 25  Adherencia_tto_recalc                1882 non-null   float64 \n",
-      " 26  Tiempo_tx                            1882 non-null   float64 \n",
-      " 27  Readmisiones_estudios                1882 non-null   object  \n",
-      " 28  Situacion_tratamiento                1882 non-null   object  \n",
-      " 29  Periodos_COVID                       1882 non-null   object  \n",
-      " 30  Pandemia_inicio_fin_tratamiento      1882 non-null   object  \n",
-      " 31  Nreadmision                          1882 non-null   float64 \n",
-      " 32  Readmisiones_PRECOVID                1882 non-null   float64 \n",
-      " 33  Readmisiones_COVID                   1882 non-null   float64 \n",
-      "dtypes: category(1), float64(10), object(23)\n",
-      "memory usage: 501.9+ KB\n",
-      "None\n",
-      "-------------------------------\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print(\"PRE\")\n",
    "print(conj_pre.info())\n",
@@ -473,18 +164,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['Live with families or friends' 'live alone' 'live in institutions' '9.0']\n",
-      "['Live with families or friends' 'live alone' 'live in institutions']\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# 9.0 represents unknown according to Variables.docx \n",
    "print(bd['Social_inclusion'].unique())\n",
@@ -496,20 +178,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['No alterations (first exposure at 11 or more years)'\n",
-      " 'Alterations (first exposure before 11 years old)' '9']\n",
-      "['No alterations (first exposure at 11 or more years)'\n",
-      " 'Alterations (first exposure before 11 years old)']\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print(bd['Alterations_early_childhood_develop'].unique())\n",
    "mode_alt = bd['Alterations_early_childhood_develop'].mode()[0]\n",
@@ -519,28 +190,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[NaN, 'Yes', 'No']\n",
-      "Categories (3, object): [99.0, 'No', 'Yes']\n",
-      "[NaN, 'Yes', 'No']\n",
-      "Categories (2, object): ['No', 'Yes']\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\1073322024.py:3: FutureWarning: The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n",
-      "  bd['Risk_stigma'] = bd['Risk_stigma'].replace(99.0, mode_stigma)\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print(bd['Risk_stigma'].unique())\n",
    "mode_stigma = bd['Risk_stigma'].mode()[0]\n",
@@ -550,18 +202,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[nan  0.  1.  2.  3.  4.  5.  8. 10.  6. 11. 12.  9.  7. 99. 14. 15.]\n",
-      "[nan  0.  1.  2.  3.  4.  5.  8. 10.  6. 11. 12.  9.  7. 14. 15.]\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print(bd['NumHijos'].unique())\n",
    "mode_hijos = bd['NumHijos'].mode()[0]\n",
@@ -609,44 +252,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\3303146707.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
-      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
-      "\n",
-      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
-      "\n",
-      "\n",
-      "  bd['Age'].fillna(age_mode, inplace=True)\n",
-      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\3303146707.py:5: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
-      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
-      "\n",
-      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
-      "\n",
-      "\n",
-      "  bd['Años_consumo_droga'].fillna(años_consumo_mode, inplace=True)\n",
-      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\3303146707.py:8: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
-      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
-      "\n",
-      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
-      "\n",
-      "\n",
-      "  bd['Risk_stigma'].fillna(risk_stigma_mode, inplace=True)\n",
-      "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\3303146707.py:11: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
-      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
-      "\n",
-      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
-      "\n",
-      "\n",
-      "  bd['NumHijos'].fillna(num_hijos_mode, inplace=True)\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "age_mode = bd['Age'].mode()[0]\n",
    "bd['Age'].fillna(age_mode, inplace=True)\n",
@@ -734,8 +342,7 @@
    "# Adjust layout to prevent overlapping titles\n",
    "plt.tight_layout()\n",
    "\n",
-    "# Save the figure in SVG format with DPI=600 in the \"./EDA_plots\" folder\n",
-    "plt.savefig('./EDA_plots/countplots.svg', dpi=600, bbox_inches='tight')"
+    "plt.savefig('./output/plots/distributions/countplots.svg', dpi=600, bbox_inches='tight')"
   ]
  },
  {
@@ -830,8 +437,8 @@
    "# Adjust layout to prevent overlapping titles\n",
    "plt.tight_layout()\n",
    "\n",
-    "# Save the figure in SVG format with DPI=600 in the \"./EDA_plots\" folder\n",
-    "plt.savefig('./EDA_plots/norm_countplots.svg', dpi=600, bbox_inches='tight')"
+    "# Save the figure in SVG format with DPI=600 in the \"._plots\" folder\n",
+    "plt.savefig('./output/plots/distributions/norm_countplots.svg', dpi=600, bbox_inches='tight')"
   ]
  },
  {
@@ -886,7 +493,7 @@
    "plt.tight_layout()\n",
    "\n",
    "# Save the figure in SVG format with DPI=600 in the \"./EDA_plots\" folder\n",
-    "plt.savefig('./EDA_plots/boxplots.svg', dpi=600, bbox_inches='tight')"
+    "plt.savefig('./output/plots/distributions/boxplots.svg', dpi=600, bbox_inches='tight')"
   ]
  },
  {
@@ -926,7 +533,7 @@
    "plt.tight_layout()\n",
    "\n",
    "# Save the figure in SVG format with DPI=600 in the \"./EDA_plots\" folder\n",
-    "plt.savefig('./EDA_plots/histograms.svg', dpi=600, bbox_inches='tight')"
+    "plt.savefig('./output/plots/distributions/histograms.svg', dpi=600, bbox_inches='tight')"
   ]
  },
  {
@@ -945,7 +552,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1022,7 +629,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1036,7 +643,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1063,7 +670,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1098,7 +705,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1138,21 +745,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['Ed_Not Complete primary school', 'Ed_Primary education', 'Ed_Secondary Education', 'Ed_Secondary more technical education', 'Ed_Tertiary', 'Ed_Unknowledge', 'Social_protection_REDEF', 'JobIn_Non-stable', 'JobIn_Stable', 'JobIn_Unemployed', 'JobIn_unkwnodledge', 'Hous_Institutional', 'Hous_Stable', 'Hous_Unstable', 'Hous_unknowledge', 'Alterations_early_childhood_develop_REDEF', 'SocInc_Live with families or friends', 'SocInc_live alone', 'SocInc_live in institutions', 'Risk_stigma_REDEF', 'Structural_conflic']\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Export column names for future programs\n",
-    "np.save('./soc_vars_names.npy', soc_vars_enc)\n",
-    "np.save('./ind_vars_names.npy', soc_vars_enc)"
+    "np.save('./output/soc_vars_names.npy', soc_vars_enc)\n",
+    "np.save('./output/ind_vars_names.npy', soc_vars_enc)"
   ]
  },
  {
@@ -1164,7 +763,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1194,7 +793,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1204,7 +803,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1244,7 +843,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1358,7 +957,7 @@
    "plt.tight_layout()\n",
    "\n",
    "# Save the figure in SVG format in the \"./EDA_plots\" folder\n",
-    "plt.savefig('./EDA_plots/heatmaps_one_hot.svg', dpi=550, bbox_inches='tight')"
+    "plt.savefig('./output/plots/correlations/heatmaps_one_hot.svg', dpi=550, bbox_inches='tight')"
   ]
  },
  {
@@ -1489,69 +1088,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 33538 entries, 0 to 85164\n",
-      "Data columns (total 45 columns):\n",
-      " #   Column                                     Non-Null Count  Dtype   \n",
-      "---  ------                                     --------------  -----   \n",
-      " 0   Ed_Not Complete primary school             33538 non-null  bool    \n",
-      " 1   Ed_Primary education                       33538 non-null  bool    \n",
-      " 2   Ed_Secondary Education                     33538 non-null  bool    \n",
-      " 3   Ed_Secondary more technical education      33538 non-null  bool    \n",
-      " 4   Ed_Tertiary                                33538 non-null  bool    \n",
-      " 5   Ed_Unknowledge                             33538 non-null  bool    \n",
-      " 6   Social_protection_REDEF                    33538 non-null  int64   \n",
-      " 7   JobIn_Non-stable                           33538 non-null  bool    \n",
-      " 8   JobIn_Stable                               33538 non-null  bool    \n",
-      " 9   JobIn_Unemployed                           33538 non-null  bool    \n",
-      " 10  JobIn_unkwnodledge                         33538 non-null  bool    \n",
-      " 11  Hous_Institutional                         33538 non-null  bool    \n",
-      " 12  Hous_Stable                                33538 non-null  bool    \n",
-      " 13  Hous_Unstable                              33538 non-null  bool    \n",
-      " 14  Hous_unknowledge                           33538 non-null  bool    \n",
-      " 15  Alterations_early_childhood_develop_REDEF  33538 non-null  int64   \n",
-      " 16  SocInc_Live with families or friends       33538 non-null  bool    \n",
-      " 17  SocInc_live alone                          33538 non-null  bool    \n",
-      " 18  SocInc_live in institutions                33538 non-null  bool    \n",
-      " 19  Risk_stigma_REDEF                          33538 non-null  category\n",
-      " 20  Structural_conflic                         33538 non-null  float64 \n",
-      " 21  Age                                        33538 non-null  float64 \n",
-      " 22  Sex_REDEF                                  33538 non-null  int64   \n",
-      " 23  NumHijos                                   33538 non-null  float64 \n",
-      " 24  Smoking_REDEF                              33538 non-null  int64   \n",
-      " 25  Biological_vulnerability_REDEF             33538 non-null  int64   \n",
-      " 26  Opiaceos_DxCIE_REDEF                       33538 non-null  int64   \n",
-      " 27  Cannabis_DXCIE_REDEF                       33538 non-null  int64   \n",
-      " 28  BZD_DxCIE_REDEF                            33538 non-null  int64   \n",
-      " 29  Cocaina_DxCIE_REDEF                        33538 non-null  int64   \n",
-      " 30  Alucinogenos_DXCIE_REDEF                   33538 non-null  int64   \n",
-      " 31  Tabaco_DXCIE_REDEF                         33538 non-null  int64   \n",
-      " 32  Frec30_1 día/semana                        33538 non-null  bool    \n",
-      " 33  Frec30_2-3 días‎/semana                    33538 non-null  bool    \n",
-      " 34  Frec30_4-6 días/semana                     33538 non-null  bool    \n",
-      " 35  Frec30_Desconocido                         33538 non-null  bool    \n",
-      " 36  Frec30_Menos de 1 día‎/semana              33538 non-null  bool    \n",
-      " 37  Frec30_No consumio                         33538 non-null  bool    \n",
-      " 38  Frec30_Todos los días                      33538 non-null  bool    \n",
-      " 39  Años_consumo_droga                         33538 non-null  float64 \n",
-      " 40  OtrosDx_Psiquiatrico_REDEF                 33538 non-null  int64   \n",
-      " 41  Tx_previos_REDEF                           33538 non-null  int64   \n",
-      " 42  Adherencia_tto_recalc                      33538 non-null  float64 \n",
-      " 43  Pandemia_inicio_fin_tratamiento            33538 non-null  object  \n",
-      " 44  Situacion_tratamiento_REDEF                33538 non-null  int64   \n",
-      "dtypes: bool(24), category(1), float64(5), int64(14), object(1)\n",
-      "memory usage: 6.2+ MB\n",
-      "None\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Work with columns of interest\n",
    "cols_of_interest = corr_cols + ['Pandemia_inicio_fin_tratamiento'] + [target_var + \"_REDEF\"]\n",
@@ -1561,7 +1100,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1572,72 +1111,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 33538 entries, 0 to 85164\n",
-      "Data columns (total 41 columns):\n",
-      " #   Column                                     Non-Null Count  Dtype   \n",
-      "---  ------                                     --------------  -----   \n",
-      " 0   Ed_Not Complete primary school             33538 non-null  bool    \n",
-      " 1   Ed_Primary education                       33538 non-null  bool    \n",
-      " 2   Ed_Secondary Education                     33538 non-null  bool    \n",
-      " 3   Ed_Secondary more technical education      33538 non-null  bool    \n",
-      " 4   Ed_Tertiary                                33538 non-null  bool    \n",
-      " 5   Social_protection_REDEF                    33538 non-null  int64   \n",
-      " 6   JobIn_Non-stable                           33538 non-null  bool    \n",
-      " 7   JobIn_Stable                               33538 non-null  bool    \n",
-      " 8   JobIn_Unemployed                           33538 non-null  bool    \n",
-      " 9   Hous_Institutional                         33538 non-null  bool    \n",
-      " 10  Hous_Stable                                33538 non-null  bool    \n",
-      " 11  Hous_Unstable                              33538 non-null  bool    \n",
-      " 12  Alterations_early_childhood_develop_REDEF  33538 non-null  int64   \n",
-      " 13  SocInc_Live with families or friends       33538 non-null  bool    \n",
-      " 14  SocInc_live alone                          33538 non-null  bool    \n",
-      " 15  SocInc_live in institutions                33538 non-null  bool    \n",
-      " 16  Risk_stigma_REDEF                          33538 non-null  category\n",
-      " 17  Structural_conflic                         33538 non-null  float64 \n",
-      " 18  Age                                        33538 non-null  float64 \n",
-      " 19  Sex_REDEF                                  33538 non-null  int64   \n",
-      " 20  NumHijos                                   33538 non-null  float64 \n",
-      " 21  Smoking_REDEF                              33538 non-null  int64   \n",
-      " 22  Biological_vulnerability_REDEF             33538 non-null  int64   \n",
-      " 23  Opiaceos_DxCIE_REDEF                       33538 non-null  int64   \n",
-      " 24  Cannabis_DXCIE_REDEF                       33538 non-null  int64   \n",
-      " 25  BZD_DxCIE_REDEF                            33538 non-null  int64   \n",
-      " 26  Cocaina_DxCIE_REDEF                        33538 non-null  int64   \n",
-      " 27  Alucinogenos_DXCIE_REDEF                   33538 non-null  int64   \n",
-      " 28  Tabaco_DXCIE_REDEF                         33538 non-null  int64   \n",
-      " 29  Frec30_1 día/semana                        33538 non-null  bool    \n",
-      " 30  Frec30_2-3 días‎/semana                    33538 non-null  bool    \n",
-      " 31  Frec30_4-6 días/semana                     33538 non-null  bool    \n",
-      " 32  Frec30_Menos de 1 día‎/semana              33538 non-null  bool    \n",
-      " 33  Frec30_No consumio                         33538 non-null  bool    \n",
-      " 34  Frec30_Todos los días                      33538 non-null  bool    \n",
-      " 35  Años_consumo_droga                         33538 non-null  float64 \n",
-      " 36  OtrosDx_Psiquiatrico_REDEF                 33538 non-null  int64   \n",
-      " 37  Tx_previos_REDEF                           33538 non-null  int64   \n",
-      " 38  Adherencia_tto_recalc                      33538 non-null  float64 \n",
-      " 39  Pandemia_inicio_fin_tratamiento            33538 non-null  object  \n",
-      " 40  Situacion_tratamiento_REDEF                33538 non-null  int64   \n",
-      "dtypes: bool(20), category(1), float64(5), int64(14), object(1)\n",
-      "memory usage: 6.0+ MB\n",
-      "None\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "print(temp_bd.info())"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1653,135 +1136,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 22861 entries, 0 to 85164\n",
-      "Data columns (total 40 columns):\n",
-      " #   Column                                     Non-Null Count  Dtype   \n",
-      "---  ------                                     --------------  -----   \n",
-      " 0   Ed_Not Complete primary school             22861 non-null  bool    \n",
-      " 1   Ed_Primary education                       22861 non-null  bool    \n",
-      " 2   Ed_Secondary Education                     22861 non-null  bool    \n",
-      " 3   Ed_Secondary more technical education      22861 non-null  bool    \n",
-      " 4   Ed_Tertiary                                22861 non-null  bool    \n",
-      " 5   Social_protection_REDEF                    22861 non-null  int64   \n",
-      " 6   JobIn_Non-stable                           22861 non-null  bool    \n",
-      " 7   JobIn_Stable                               22861 non-null  bool    \n",
-      " 8   JobIn_Unemployed                           22861 non-null  bool    \n",
-      " 9   Hous_Institutional                         22861 non-null  bool    \n",
-      " 10  Hous_Stable                                22861 non-null  bool    \n",
-      " 11  Hous_Unstable                              22861 non-null  bool    \n",
-      " 12  Alterations_early_childhood_develop_REDEF  22861 non-null  int64   \n",
-      " 13  SocInc_Live with families or friends       22861 non-null  bool    \n",
-      " 14  SocInc_live alone                          22861 non-null  bool    \n",
-      " 15  SocInc_live in institutions                22861 non-null  bool    \n",
-      " 16  Risk_stigma_REDEF                          22861 non-null  category\n",
-      " 17  Structural_conflic                         22861 non-null  float64 \n",
-      " 18  Age                                        22861 non-null  float64 \n",
-      " 19  Sex_REDEF                                  22861 non-null  int64   \n",
-      " 20  NumHijos                                   22861 non-null  float64 \n",
-      " 21  Smoking_REDEF                              22861 non-null  int64   \n",
-      " 22  Biological_vulnerability_REDEF             22861 non-null  int64   \n",
-      " 23  Opiaceos_DxCIE_REDEF                       22861 non-null  int64   \n",
-      " 24  Cannabis_DXCIE_REDEF                       22861 non-null  int64   \n",
-      " 25  BZD_DxCIE_REDEF                            22861 non-null  int64   \n",
-      " 26  Cocaina_DxCIE_REDEF                        22861 non-null  int64   \n",
-      " 27  Alucinogenos_DXCIE_REDEF                   22861 non-null  int64   \n",
-      " 28  Tabaco_DXCIE_REDEF                         22861 non-null  int64   \n",
-      " 29  Frec30_1 día/semana                        22861 non-null  bool    \n",
-      " 30  Frec30_2-3 días‎/semana                    22861 non-null  bool    \n",
-      " 31  Frec30_4-6 días/semana                     22861 non-null  bool    \n",
-      " 32  Frec30_Menos de 1 día‎/semana              22861 non-null  bool    \n",
-      " 33  Frec30_No consumio                         22861 non-null  bool    \n",
-      " 34  Frec30_Todos los días                      22861 non-null  bool    \n",
-      " 35  Años_consumo_droga                         22861 non-null  float64 \n",
-      " 36  OtrosDx_Psiquiatrico_REDEF                 22861 non-null  int64   \n",
-      " 37  Tx_previos_REDEF                           22861 non-null  int64   \n",
-      " 38  Adherencia_tto_recalc                      22861 non-null  float64 \n",
-      " 39  Situacion_tratamiento_REDEF                22861 non-null  int64   \n",
-      "dtypes: bool(20), category(1), float64(5), int64(14)\n",
-      "memory usage: 3.9 MB\n",
-      "None\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "print(conj_pre.info())"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Index: 10677 entries, 11 to 85156\n",
-      "Data columns (total 40 columns):\n",
-      " #   Column                                     Non-Null Count  Dtype   \n",
-      "---  ------                                     --------------  -----   \n",
-      " 0   Ed_Not Complete primary school             10677 non-null  bool    \n",
-      " 1   Ed_Primary education                       10677 non-null  bool    \n",
-      " 2   Ed_Secondary Education                     10677 non-null  bool    \n",
-      " 3   Ed_Secondary more technical education      10677 non-null  bool    \n",
-      " 4   Ed_Tertiary                                10677 non-null  bool    \n",
-      " 5   Social_protection_REDEF                    10677 non-null  int64   \n",
-      " 6   JobIn_Non-stable                           10677 non-null  bool    \n",
-      " 7   JobIn_Stable                               10677 non-null  bool    \n",
-      " 8   JobIn_Unemployed                           10677 non-null  bool    \n",
-      " 9   Hous_Institutional                         10677 non-null  bool    \n",
-      " 10  Hous_Stable                                10677 non-null  bool    \n",
-      " 11  Hous_Unstable                              10677 non-null  bool    \n",
-      " 12  Alterations_early_childhood_develop_REDEF  10677 non-null  int64   \n",
-      " 13  SocInc_Live with families or friends       10677 non-null  bool    \n",
-      " 14  SocInc_live alone                          10677 non-null  bool    \n",
-      " 15  SocInc_live in institutions                10677 non-null  bool    \n",
-      " 16  Risk_stigma_REDEF                          10677 non-null  category\n",
-      " 17  Structural_conflic                         10677 non-null  float64 \n",
-      " 18  Age                                        10677 non-null  float64 \n",
-      " 19  Sex_REDEF                                  10677 non-null  int64   \n",
-      " 20  NumHijos                                   10677 non-null  float64 \n",
-      " 21  Smoking_REDEF                              10677 non-null  int64   \n",
-      " 22  Biological_vulnerability_REDEF             10677 non-null  int64   \n",
-      " 23  Opiaceos_DxCIE_REDEF                       10677 non-null  int64   \n",
-      " 24  Cannabis_DXCIE_REDEF                       10677 non-null  int64   \n",
-      " 25  BZD_DxCIE_REDEF                            10677 non-null  int64   \n",
-      " 26  Cocaina_DxCIE_REDEF                        10677 non-null  int64   \n",
-      " 27  Alucinogenos_DXCIE_REDEF                   10677 non-null  int64   \n",
-      " 28  Tabaco_DXCIE_REDEF                         10677 non-null  int64   \n",
-      " 29  Frec30_1 día/semana                        10677 non-null  bool    \n",
-      " 30  Frec30_2-3 días‎/semana                    10677 non-null  bool    \n",
-      " 31  Frec30_4-6 días/semana                     10677 non-null  bool    \n",
-      " 32  Frec30_Menos de 1 día‎/semana              10677 non-null  bool    \n",
-      " 33  Frec30_No consumio                         10677 non-null  bool    \n",
-      " 34  Frec30_Todos los días                      10677 non-null  bool    \n",
-      " 35  Años_consumo_droga                         10677 non-null  float64 \n",
-      " 36  OtrosDx_Psiquiatrico_REDEF                 10677 non-null  int64   \n",
-      " 37  Tx_previos_REDEF                           10677 non-null  int64   \n",
-      " 38  Adherencia_tto_recalc                      10677 non-null  float64 \n",
-      " 39  Situacion_tratamiento_REDEF                10677 non-null  int64   \n",
-      "dtypes: bool(20), category(1), float64(5), int64(14)\n",
-      "memory usage: 1.8 MB\n",
-      "None\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "print(conj_post.info())"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1793,52 +1166,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['Ed_Not Complete primary school' 'Ed_Primary education'\n",
-      " 'Ed_Secondary Education' 'Ed_Secondary more technical education'\n",
-      " 'Ed_Tertiary' 'Social_protection_REDEF' 'JobIn_Non-stable' 'JobIn_Stable'\n",
-      " 'JobIn_Unemployed' 'Hous_Institutional' 'Hous_Stable' 'Hous_Unstable'\n",
-      " 'Alterations_early_childhood_develop_REDEF'\n",
-      " 'SocInc_Live with families or friends' 'SocInc_live alone'\n",
-      " 'SocInc_live in institutions' 'Risk_stigma_REDEF' 'Structural_conflic'\n",
-      " 'Age' 'Sex_REDEF' 'NumHijos' 'Smoking_REDEF'\n",
-      " 'Biological_vulnerability_REDEF' 'Opiaceos_DxCIE_REDEF'\n",
-      " 'Cannabis_DXCIE_REDEF' 'BZD_DxCIE_REDEF' 'Cocaina_DxCIE_REDEF'\n",
-      " 'Alucinogenos_DXCIE_REDEF' 'Tabaco_DXCIE_REDEF' 'Frec30_1 día/semana'\n",
-      " 'Frec30_2-3 días\\u200e/semana' 'Frec30_4-6 días/semana'\n",
-      " 'Frec30_Menos de 1 día\\u200e/semana' 'Frec30_No consumio'\n",
-      " 'Frec30_Todos los días' 'Años_consumo_droga' 'OtrosDx_Psiquiatrico_REDEF'\n",
-      " 'Tx_previos_REDEF' 'Adherencia_tto_recalc']\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "print(feat)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(22861, 39)\n",
-      "(10677, 39)\n",
-      "(22861,)\n",
-      "(10677,)\n",
-      "39\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print(X_pre.shape)\n",
    "print(X_post.shape)\n",
@@ -1887,7 +1226,7 @@
    "axes[1].set_title(\"POST\")\n",
    "\n",
    "plt.tight_layout()\n",
-    "plt.savefig('EDA_plots/features/mutual_info.svg', format='svg', dpi=1200)\n",
+    "plt.savefig('./output/plots/feature_importance/mutual_info.svg', format='svg', dpi=1200)\n",
    "plt.show()"
   ]
  },
@@ -1926,7 +1265,7 @@
    "axes[1].set_title(\"POST\")\n",
    "\n",
    "plt.tight_layout()\n",
-    "plt.savefig('EDA_plots/features/ANOVA.svg', format='svg', dpi=1200)\n",
+    "plt.savefig('./output/plots/feature_importance/ANOVA.svg', format='svg', dpi=1200)\n",
    "plt.show()"
   ]
  },
@@ -1958,7 +1297,7 @@
    "axes[1].set_title(\"POST\")\n",
    "\n",
    "plt.tight_layout()\n",
-    "plt.savefig('EDA_plots/features/var_threshold.svg', format='svg', dpi=1200)\n",
+    "plt.savefig('./output/plots/feature_importance/var_threshold.svg', format='svg', dpi=1200)\n",
    "plt.show()"
   ]
  },
@@ -1971,7 +1310,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [

--- a/EDA/ind_vars_names.npy
+++ b/EDA/ind_vars_names.npy
--- a/EDA/plots/distributions/heatmaps_one_hot.svg
+++ b/EDA/plots/distributions/heatmaps_one_hot.svg
--- a/EDA/plots/distributions/boxplots.svg
+++ b/EDA/plots/distributions/boxplots.svg
--- a/EDA/plots/distributions/countplots.svg
+++ b/EDA/plots/distributions/countplots.svg
--- a/EDA/plots/distributions/histograms.svg
+++ b/EDA/plots/distributions/histograms.svg
--- a/EDA/plots/distributions/norm_countplots.svg
+++ b/EDA/plots/distributions/norm_countplots.svg
--- a/EDA/plots/feature_importance/ANOVA.svg
+++ b/EDA/plots/feature_importance/ANOVA.svg
--- a/EDA/plots/feature_importance/mutual_info.svg
+++ b/EDA/plots/feature_importance/mutual_info.svg
--- a/EDA/plots/feature_importance/var_threshold.svg
+++ b/EDA/plots/feature_importance/var_threshold.svg
--- a/EDA/soc_vars_names.npy
+++ b/EDA/soc_vars_names.npy