From c32883c437fc55d7311643e56d795074ff594f3c Mon Sep 17 00:00:00 2001 From: joaquintb Date: Thu, 27 Jun 2024 10:54:32 +0200 Subject: [PATCH] Updated paths EDA --- EDA/EDA.ipynb | 795 ++---------------- EDA/{ => output}/ind_vars_names.npy | Bin .../plots/correlations}/heatmaps_one_hot.svg | 0 .../plots/distributions/boxplots.svg | 0 .../plots/distributions/countplots.svg | 0 .../plots/distributions/histograms.svg | 0 .../plots/distributions/norm_countplots.svg | 0 .../plots/feature_importance/ANOVA.svg | 0 .../plots/feature_importance/mutual_info.svg | 0 .../feature_importance/var_threshold.svg | 0 EDA/{ => output}/soc_vars_names.npy | Bin 11 files changed, 67 insertions(+), 728 deletions(-) rename EDA/{ => output}/ind_vars_names.npy (100%) rename EDA/{plots/distributions => output/plots/correlations}/heatmaps_one_hot.svg (100%) rename EDA/{ => output}/plots/distributions/boxplots.svg (100%) rename EDA/{ => output}/plots/distributions/countplots.svg (100%) rename EDA/{ => output}/plots/distributions/histograms.svg (100%) rename EDA/{ => output}/plots/distributions/norm_countplots.svg (100%) rename EDA/{ => output}/plots/feature_importance/ANOVA.svg (100%) rename EDA/{ => output}/plots/feature_importance/mutual_info.svg (100%) rename EDA/{ => output}/plots/feature_importance/var_threshold.svg (100%) rename EDA/{ => output}/soc_vars_names.npy (100%) diff --git a/EDA/EDA.ipynb b/EDA/EDA.ipynb index 3b29533..c65d87c 100644 --- a/EDA/EDA.ipynb +++ b/EDA/EDA.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -48,11 +48,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "bd_all = pd.read_spss('17_abril.sav')\n", + "bd_all = pd.read_spss('./input/17_abril.sav')\n", "\n", "# Filter the dataset to work only with alcohol patients\n", "bd = bd_all[bd_all['Alcohol_DxCIE'] == 'Sí']\n", @@ -70,28 +70,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\2495984927.py:18: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " conj_post['Group'] = 'Post'\n", - "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\2495984927.py:19: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " conj_pre['Group'] = 'Pre'\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Pre-pandemic\n", "conj_pre = bd[bd['Pandemia_inicio_fin_tratamiento'] == 'Inicio y fin prepandemia']\n", @@ -117,22 +98,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PRE: 22861\n", - "\tALTA: 2792\n", - "\tABANDONO: 20069\n", - "POST: 10677\n", - "\tALTA: 1882\n", - "\tABANDONO: 8795\n" - ] - } - ], + "outputs": [], "source": [ "# Printing size of different datasets\n", "print(f\"PRE: {len(conj_pre)}\")\n", @@ -160,286 +128,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PRE\n", - "\n", - "Index: 22861 entries, 0 to 85164\n", - "Data columns (total 35 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 CODPROYECTO 22861 non-null float64 \n", - " 1 Education 22861 non-null object \n", - " 2 Social_protection 22861 non-null object \n", - " 3 Job_insecurity 22861 non-null object \n", - " 4 Housing 22861 non-null object \n", - " 5 Alterations_early_childhood_develop 22861 non-null object \n", - " 6 Social_inclusion 22861 non-null object \n", - " 7 Risk_stigma 21606 non-null category\n", - " 8 Structural_conflic 22861 non-null float64 \n", - " 9 Age 22852 non-null float64 \n", - " 10 Sex 22861 non-null object \n", - " 11 NumHijos 21647 non-null float64 \n", - " 12 Smoking 22861 non-null object \n", - " 13 Biological_vulnerability 22861 non-null object \n", - " 14 Alcohol_DxCIE 22861 non-null object \n", - " 15 Opiaceos_DxCIE 22861 non-null object \n", - " 16 Cannabis_DXCIE 22861 non-null object \n", - " 17 BZD_DxCIE 22861 non-null object \n", - " 18 Cocaina_DxCIE 22861 non-null object \n", - " 19 Alucinogenos_DXCIE 22861 non-null object \n", - " 20 Tabaco_DXCIE 22861 non-null object \n", - " 21 FrecuenciaConsumo30Dias 22861 non-null object \n", - " 22 Años_consumo_droga 22342 non-null float64 \n", - " 23 OtrosDx_Psiquiatrico 22861 non-null object \n", - " 24 Tx_previos 22861 non-null object \n", - " 25 Adherencia_tto_recalc 22861 non-null float64 \n", - " 26 Tiempo_tx 22861 non-null float64 \n", - " 27 Readmisiones_estudios 22861 non-null object \n", - " 28 Situacion_tratamiento 22861 non-null object \n", - " 29 Periodos_COVID 22861 non-null object \n", - " 30 Pandemia_inicio_fin_tratamiento 22861 non-null object \n", - " 31 Nreadmision 22861 non-null float64 \n", - " 32 Readmisiones_PRECOVID 22861 non-null float64 \n", - " 33 Readmisiones_COVID 22861 non-null float64 \n", - " 34 Group 22861 non-null object \n", - "dtypes: category(1), float64(10), object(24)\n", - "memory usage: 6.1+ MB\n", - "None\n", - "-------------------------------\n", - "PRE-ABANDONO\n", - "\n", - "Index: 20069 entries, 0 to 85164\n", - "Data columns (total 34 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 CODPROYECTO 20069 non-null float64 \n", - " 1 Education 20069 non-null object \n", - " 2 Social_protection 20069 non-null object \n", - " 3 Job_insecurity 20069 non-null object \n", - " 4 Housing 20069 non-null object \n", - " 5 Alterations_early_childhood_develop 20069 non-null object \n", - " 6 Social_inclusion 20069 non-null object \n", - " 7 Risk_stigma 18919 non-null category\n", - " 8 Structural_conflic 20069 non-null float64 \n", - " 9 Age 20061 non-null float64 \n", - " 10 Sex 20069 non-null object \n", - " 11 NumHijos 18958 non-null float64 \n", - " 12 Smoking 20069 non-null object \n", - " 13 Biological_vulnerability 20069 non-null object \n", - " 14 Alcohol_DxCIE 20069 non-null object \n", - " 15 Opiaceos_DxCIE 20069 non-null object \n", - " 16 Cannabis_DXCIE 20069 non-null object \n", - " 17 BZD_DxCIE 20069 non-null object \n", - " 18 Cocaina_DxCIE 20069 non-null object \n", - " 19 Alucinogenos_DXCIE 20069 non-null object \n", - " 20 Tabaco_DXCIE 20069 non-null object \n", - " 21 FrecuenciaConsumo30Dias 20069 non-null object \n", - " 22 Años_consumo_droga 19609 non-null float64 \n", - " 23 OtrosDx_Psiquiatrico 20069 non-null object \n", - " 24 Tx_previos 20069 non-null object \n", - " 25 Adherencia_tto_recalc 20069 non-null float64 \n", - " 26 Tiempo_tx 20069 non-null float64 \n", - " 27 Readmisiones_estudios 20069 non-null object \n", - " 28 Situacion_tratamiento 20069 non-null object \n", - " 29 Periodos_COVID 20069 non-null object \n", - " 30 Pandemia_inicio_fin_tratamiento 20069 non-null object \n", - " 31 Nreadmision 20069 non-null float64 \n", - " 32 Readmisiones_PRECOVID 20069 non-null float64 \n", - " 33 Readmisiones_COVID 20069 non-null float64 \n", - "dtypes: category(1), float64(10), object(23)\n", - "memory usage: 5.2+ MB\n", - "None\n", - "-------------------------------\n", - "PRE-ALTA\n", - "\n", - "Index: 2792 entries, 23 to 85159\n", - "Data columns (total 34 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 CODPROYECTO 2792 non-null float64 \n", - " 1 Education 2792 non-null object \n", - " 2 Social_protection 2792 non-null object \n", - " 3 Job_insecurity 2792 non-null object \n", - " 4 Housing 2792 non-null object \n", - " 5 Alterations_early_childhood_develop 2792 non-null object \n", - " 6 Social_inclusion 2792 non-null object \n", - " 7 Risk_stigma 2687 non-null category\n", - " 8 Structural_conflic 2792 non-null float64 \n", - " 9 Age 2791 non-null float64 \n", - " 10 Sex 2792 non-null object \n", - " 11 NumHijos 2689 non-null float64 \n", - " 12 Smoking 2792 non-null object \n", - " 13 Biological_vulnerability 2792 non-null object \n", - " 14 Alcohol_DxCIE 2792 non-null object \n", - " 15 Opiaceos_DxCIE 2792 non-null object \n", - " 16 Cannabis_DXCIE 2792 non-null object \n", - " 17 BZD_DxCIE 2792 non-null object \n", - " 18 Cocaina_DxCIE 2792 non-null object \n", - " 19 Alucinogenos_DXCIE 2792 non-null object \n", - " 20 Tabaco_DXCIE 2792 non-null object \n", - " 21 FrecuenciaConsumo30Dias 2792 non-null object \n", - " 22 Años_consumo_droga 2733 non-null float64 \n", - " 23 OtrosDx_Psiquiatrico 2792 non-null object \n", - " 24 Tx_previos 2792 non-null object \n", - " 25 Adherencia_tto_recalc 2792 non-null float64 \n", - " 26 Tiempo_tx 2792 non-null float64 \n", - " 27 Readmisiones_estudios 2792 non-null object \n", - " 28 Situacion_tratamiento 2792 non-null object \n", - " 29 Periodos_COVID 2792 non-null object \n", - " 30 Pandemia_inicio_fin_tratamiento 2792 non-null object \n", - " 31 Nreadmision 2792 non-null float64 \n", - " 32 Readmisiones_PRECOVID 2792 non-null float64 \n", - " 33 Readmisiones_COVID 2792 non-null float64 \n", - "dtypes: category(1), float64(10), object(23)\n", - "memory usage: 744.5+ KB\n", - "None\n", - "-------------------------------\n", - "\n", - "\n", - "\n", - "\n", - "POST\n", - "\n", - "Index: 10677 entries, 11 to 85156\n", - "Data columns (total 35 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 CODPROYECTO 10677 non-null float64 \n", - " 1 Education 10677 non-null object \n", - " 2 Social_protection 10677 non-null object \n", - " 3 Job_insecurity 10677 non-null object \n", - " 4 Housing 10677 non-null object \n", - " 5 Alterations_early_childhood_develop 10677 non-null object \n", - " 6 Social_inclusion 10677 non-null object \n", - " 7 Risk_stigma 10085 non-null category\n", - " 8 Structural_conflic 10677 non-null float64 \n", - " 9 Age 10676 non-null float64 \n", - " 10 Sex 10677 non-null object \n", - " 11 NumHijos 10103 non-null float64 \n", - " 12 Smoking 10677 non-null object \n", - " 13 Biological_vulnerability 10677 non-null object \n", - " 14 Alcohol_DxCIE 10677 non-null object \n", - " 15 Opiaceos_DxCIE 10677 non-null object \n", - " 16 Cannabis_DXCIE 10677 non-null object \n", - " 17 BZD_DxCIE 10677 non-null object \n", - " 18 Cocaina_DxCIE 10677 non-null object \n", - " 19 Alucinogenos_DXCIE 10677 non-null object \n", - " 20 Tabaco_DXCIE 10677 non-null object \n", - " 21 FrecuenciaConsumo30Dias 10677 non-null object \n", - " 22 Años_consumo_droga 10478 non-null float64 \n", - " 23 OtrosDx_Psiquiatrico 10677 non-null object \n", - " 24 Tx_previos 10677 non-null object \n", - " 25 Adherencia_tto_recalc 10677 non-null float64 \n", - " 26 Tiempo_tx 10677 non-null float64 \n", - " 27 Readmisiones_estudios 10677 non-null object \n", - " 28 Situacion_tratamiento 10677 non-null object \n", - " 29 Periodos_COVID 10677 non-null object \n", - " 30 Pandemia_inicio_fin_tratamiento 10677 non-null object \n", - " 31 Nreadmision 10677 non-null float64 \n", - " 32 Readmisiones_PRECOVID 10677 non-null float64 \n", - " 33 Readmisiones_COVID 10677 non-null float64 \n", - " 34 Group 10677 non-null object \n", - "dtypes: category(1), float64(10), object(24)\n", - "memory usage: 2.9+ MB\n", - "None\n", - "-------------------------------\n", - "POST-ABANDONO\n", - "\n", - "Index: 8795 entries, 11 to 85156\n", - "Data columns (total 34 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 CODPROYECTO 8795 non-null float64 \n", - " 1 Education 8795 non-null object \n", - " 2 Social_protection 8795 non-null object \n", - " 3 Job_insecurity 8795 non-null object \n", - " 4 Housing 8795 non-null object \n", - " 5 Alterations_early_childhood_develop 8795 non-null object \n", - " 6 Social_inclusion 8795 non-null object \n", - " 7 Risk_stigma 8308 non-null category\n", - " 8 Structural_conflic 8795 non-null float64 \n", - " 9 Age 8794 non-null float64 \n", - " 10 Sex 8795 non-null object \n", - " 11 NumHijos 8325 non-null float64 \n", - " 12 Smoking 8795 non-null object \n", - " 13 Biological_vulnerability 8795 non-null object \n", - " 14 Alcohol_DxCIE 8795 non-null object \n", - " 15 Opiaceos_DxCIE 8795 non-null object \n", - " 16 Cannabis_DXCIE 8795 non-null object \n", - " 17 BZD_DxCIE 8795 non-null object \n", - " 18 Cocaina_DxCIE 8795 non-null object \n", - " 19 Alucinogenos_DXCIE 8795 non-null object \n", - " 20 Tabaco_DXCIE 8795 non-null object \n", - " 21 FrecuenciaConsumo30Dias 8795 non-null object \n", - " 22 Años_consumo_droga 8627 non-null float64 \n", - " 23 OtrosDx_Psiquiatrico 8795 non-null object \n", - " 24 Tx_previos 8795 non-null object \n", - " 25 Adherencia_tto_recalc 8795 non-null float64 \n", - " 26 Tiempo_tx 8795 non-null float64 \n", - " 27 Readmisiones_estudios 8795 non-null object \n", - " 28 Situacion_tratamiento 8795 non-null object \n", - " 29 Periodos_COVID 8795 non-null object \n", - " 30 Pandemia_inicio_fin_tratamiento 8795 non-null object \n", - " 31 Nreadmision 8795 non-null float64 \n", - " 32 Readmisiones_PRECOVID 8795 non-null float64 \n", - " 33 Readmisiones_COVID 8795 non-null float64 \n", - "dtypes: category(1), float64(10), object(23)\n", - "memory usage: 2.3+ MB\n", - "None\n", - "-------------------------------\n", - "POST-ALTA\n", - "\n", - "Index: 1882 entries, 258 to 85149\n", - "Data columns (total 34 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 CODPROYECTO 1882 non-null float64 \n", - " 1 Education 1882 non-null object \n", - " 2 Social_protection 1882 non-null object \n", - " 3 Job_insecurity 1882 non-null object \n", - " 4 Housing 1882 non-null object \n", - " 5 Alterations_early_childhood_develop 1882 non-null object \n", - " 6 Social_inclusion 1882 non-null object \n", - " 7 Risk_stigma 1777 non-null category\n", - " 8 Structural_conflic 1882 non-null float64 \n", - " 9 Age 1882 non-null float64 \n", - " 10 Sex 1882 non-null object \n", - " 11 NumHijos 1778 non-null float64 \n", - " 12 Smoking 1882 non-null object \n", - " 13 Biological_vulnerability 1882 non-null object \n", - " 14 Alcohol_DxCIE 1882 non-null object \n", - " 15 Opiaceos_DxCIE 1882 non-null object \n", - " 16 Cannabis_DXCIE 1882 non-null object \n", - " 17 BZD_DxCIE 1882 non-null object \n", - " 18 Cocaina_DxCIE 1882 non-null object \n", - " 19 Alucinogenos_DXCIE 1882 non-null object \n", - " 20 Tabaco_DXCIE 1882 non-null object \n", - " 21 FrecuenciaConsumo30Dias 1882 non-null object \n", - " 22 Años_consumo_droga 1851 non-null float64 \n", - " 23 OtrosDx_Psiquiatrico 1882 non-null object \n", - " 24 Tx_previos 1882 non-null object \n", - " 25 Adherencia_tto_recalc 1882 non-null float64 \n", - " 26 Tiempo_tx 1882 non-null float64 \n", - " 27 Readmisiones_estudios 1882 non-null object \n", - " 28 Situacion_tratamiento 1882 non-null object \n", - " 29 Periodos_COVID 1882 non-null object \n", - " 30 Pandemia_inicio_fin_tratamiento 1882 non-null object \n", - " 31 Nreadmision 1882 non-null float64 \n", - " 32 Readmisiones_PRECOVID 1882 non-null float64 \n", - " 33 Readmisiones_COVID 1882 non-null float64 \n", - "dtypes: category(1), float64(10), object(23)\n", - "memory usage: 501.9+ KB\n", - "None\n", - "-------------------------------\n" - ] - } - ], + "outputs": [], "source": [ "print(\"PRE\")\n", "print(conj_pre.info())\n", @@ -473,18 +164,9 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['Live with families or friends' 'live alone' 'live in institutions' '9.0']\n", - "['Live with families or friends' 'live alone' 'live in institutions']\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# 9.0 represents unknown according to Variables.docx \n", "print(bd['Social_inclusion'].unique())\n", @@ -496,20 +178,9 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['No alterations (first exposure at 11 or more years)'\n", - " 'Alterations (first exposure before 11 years old)' '9']\n", - "['No alterations (first exposure at 11 or more years)'\n", - " 'Alterations (first exposure before 11 years old)']\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "print(bd['Alterations_early_childhood_develop'].unique())\n", "mode_alt = bd['Alterations_early_childhood_develop'].mode()[0]\n", @@ -519,28 +190,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[NaN, 'Yes', 'No']\n", - "Categories (3, object): [99.0, 'No', 'Yes']\n", - "[NaN, 'Yes', 'No']\n", - "Categories (2, object): ['No', 'Yes']\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\1073322024.py:3: FutureWarning: The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n", - " bd['Risk_stigma'] = bd['Risk_stigma'].replace(99.0, mode_stigma)\n" - ] - } - ], + "outputs": [], "source": [ "print(bd['Risk_stigma'].unique())\n", "mode_stigma = bd['Risk_stigma'].mode()[0]\n", @@ -550,18 +202,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[nan 0. 1. 2. 3. 4. 5. 8. 10. 6. 11. 12. 9. 7. 99. 14. 15.]\n", - "[nan 0. 1. 2. 3. 4. 5. 8. 10. 6. 11. 12. 9. 7. 14. 15.]\n" - ] - } - ], + "outputs": [], "source": [ "print(bd['NumHijos'].unique())\n", "mode_hijos = bd['NumHijos'].mode()[0]\n", @@ -609,44 +252,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\3303146707.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", - "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", - "\n", - "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", - "\n", - "\n", - " bd['Age'].fillna(age_mode, inplace=True)\n", - "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\3303146707.py:5: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", - "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", - "\n", - "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", - "\n", - "\n", - " bd['Años_consumo_droga'].fillna(años_consumo_mode, inplace=True)\n", - "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\3303146707.py:8: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", - "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", - "\n", - "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", - "\n", - "\n", - " bd['Risk_stigma'].fillna(risk_stigma_mode, inplace=True)\n", - "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_11540\\3303146707.py:11: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", - "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", - "\n", - "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", - "\n", - "\n", - " bd['NumHijos'].fillna(num_hijos_mode, inplace=True)\n" - ] - } - ], + "outputs": [], "source": [ "age_mode = bd['Age'].mode()[0]\n", "bd['Age'].fillna(age_mode, inplace=True)\n", @@ -734,8 +342,7 @@ "# Adjust layout to prevent overlapping titles\n", "plt.tight_layout()\n", "\n", - "# Save the figure in SVG format with DPI=600 in the \"./EDA_plots\" folder\n", - "plt.savefig('./EDA_plots/countplots.svg', dpi=600, bbox_inches='tight')" + "plt.savefig('./output/plots/distributions/countplots.svg', dpi=600, bbox_inches='tight')" ] }, { @@ -830,8 +437,8 @@ "# Adjust layout to prevent overlapping titles\n", "plt.tight_layout()\n", "\n", - "# Save the figure in SVG format with DPI=600 in the \"./EDA_plots\" folder\n", - "plt.savefig('./EDA_plots/norm_countplots.svg', dpi=600, bbox_inches='tight')" + "# Save the figure in SVG format with DPI=600 in the \"._plots\" folder\n", + "plt.savefig('./output/plots/distributions/norm_countplots.svg', dpi=600, bbox_inches='tight')" ] }, { @@ -886,7 +493,7 @@ "plt.tight_layout()\n", "\n", "# Save the figure in SVG format with DPI=600 in the \"./EDA_plots\" folder\n", - "plt.savefig('./EDA_plots/boxplots.svg', dpi=600, bbox_inches='tight')" + "plt.savefig('./output/plots/distributions/boxplots.svg', dpi=600, bbox_inches='tight')" ] }, { @@ -926,7 +533,7 @@ "plt.tight_layout()\n", "\n", "# Save the figure in SVG format with DPI=600 in the \"./EDA_plots\" folder\n", - "plt.savefig('./EDA_plots/histograms.svg', dpi=600, bbox_inches='tight')" + "plt.savefig('./output/plots/distributions/histograms.svg', dpi=600, bbox_inches='tight')" ] }, { @@ -945,7 +552,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -1022,7 +629,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -1036,7 +643,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -1063,7 +670,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -1098,7 +705,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -1138,21 +745,13 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['Ed_Not Complete primary school', 'Ed_Primary education', 'Ed_Secondary Education', 'Ed_Secondary more technical education', 'Ed_Tertiary', 'Ed_Unknowledge', 'Social_protection_REDEF', 'JobIn_Non-stable', 'JobIn_Stable', 'JobIn_Unemployed', 'JobIn_unkwnodledge', 'Hous_Institutional', 'Hous_Stable', 'Hous_Unstable', 'Hous_unknowledge', 'Alterations_early_childhood_develop_REDEF', 'SocInc_Live with families or friends', 'SocInc_live alone', 'SocInc_live in institutions', 'Risk_stigma_REDEF', 'Structural_conflic']\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Export column names for future programs\n", - "np.save('./soc_vars_names.npy', soc_vars_enc)\n", - "np.save('./ind_vars_names.npy', soc_vars_enc)" + "np.save('./output/soc_vars_names.npy', soc_vars_enc)\n", + "np.save('./output/ind_vars_names.npy', soc_vars_enc)" ] }, { @@ -1164,7 +763,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1194,7 +793,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1204,7 +803,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1244,7 +843,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1358,7 +957,7 @@ "plt.tight_layout()\n", "\n", "# Save the figure in SVG format in the \"./EDA_plots\" folder\n", - "plt.savefig('./EDA_plots/heatmaps_one_hot.svg', dpi=550, bbox_inches='tight')" + "plt.savefig('./output/plots/correlations/heatmaps_one_hot.svg', dpi=550, bbox_inches='tight')" ] }, { @@ -1489,69 +1088,9 @@ }, { "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Index: 33538 entries, 0 to 85164\n", - "Data columns (total 45 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 Ed_Not Complete primary school 33538 non-null bool \n", - " 1 Ed_Primary education 33538 non-null bool \n", - " 2 Ed_Secondary Education 33538 non-null bool \n", - " 3 Ed_Secondary more technical education 33538 non-null bool \n", - " 4 Ed_Tertiary 33538 non-null bool \n", - " 5 Ed_Unknowledge 33538 non-null bool \n", - " 6 Social_protection_REDEF 33538 non-null int64 \n", - " 7 JobIn_Non-stable 33538 non-null bool \n", - " 8 JobIn_Stable 33538 non-null bool \n", - " 9 JobIn_Unemployed 33538 non-null bool \n", - " 10 JobIn_unkwnodledge 33538 non-null bool \n", - " 11 Hous_Institutional 33538 non-null bool \n", - " 12 Hous_Stable 33538 non-null bool \n", - " 13 Hous_Unstable 33538 non-null bool \n", - " 14 Hous_unknowledge 33538 non-null bool \n", - " 15 Alterations_early_childhood_develop_REDEF 33538 non-null int64 \n", - " 16 SocInc_Live with families or friends 33538 non-null bool \n", - " 17 SocInc_live alone 33538 non-null bool \n", - " 18 SocInc_live in institutions 33538 non-null bool \n", - " 19 Risk_stigma_REDEF 33538 non-null category\n", - " 20 Structural_conflic 33538 non-null float64 \n", - " 21 Age 33538 non-null float64 \n", - " 22 Sex_REDEF 33538 non-null int64 \n", - " 23 NumHijos 33538 non-null float64 \n", - " 24 Smoking_REDEF 33538 non-null int64 \n", - " 25 Biological_vulnerability_REDEF 33538 non-null int64 \n", - " 26 Opiaceos_DxCIE_REDEF 33538 non-null int64 \n", - " 27 Cannabis_DXCIE_REDEF 33538 non-null int64 \n", - " 28 BZD_DxCIE_REDEF 33538 non-null int64 \n", - " 29 Cocaina_DxCIE_REDEF 33538 non-null int64 \n", - " 30 Alucinogenos_DXCIE_REDEF 33538 non-null int64 \n", - " 31 Tabaco_DXCIE_REDEF 33538 non-null int64 \n", - " 32 Frec30_1 día/semana 33538 non-null bool \n", - " 33 Frec30_2-3 días‎/semana 33538 non-null bool \n", - " 34 Frec30_4-6 días/semana 33538 non-null bool \n", - " 35 Frec30_Desconocido 33538 non-null bool \n", - " 36 Frec30_Menos de 1 día‎/semana 33538 non-null bool \n", - " 37 Frec30_No consumio 33538 non-null bool \n", - " 38 Frec30_Todos los días 33538 non-null bool \n", - " 39 Años_consumo_droga 33538 non-null float64 \n", - " 40 OtrosDx_Psiquiatrico_REDEF 33538 non-null int64 \n", - " 41 Tx_previos_REDEF 33538 non-null int64 \n", - " 42 Adherencia_tto_recalc 33538 non-null float64 \n", - " 43 Pandemia_inicio_fin_tratamiento 33538 non-null object \n", - " 44 Situacion_tratamiento_REDEF 33538 non-null int64 \n", - "dtypes: bool(24), category(1), float64(5), int64(14), object(1)\n", - "memory usage: 6.2+ MB\n", - "None\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Work with columns of interest\n", "cols_of_interest = corr_cols + ['Pandemia_inicio_fin_tratamiento'] + [target_var + \"_REDEF\"]\n", @@ -1561,7 +1100,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1572,72 +1111,16 @@ }, { "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Index: 33538 entries, 0 to 85164\n", - "Data columns (total 41 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 Ed_Not Complete primary school 33538 non-null bool \n", - " 1 Ed_Primary education 33538 non-null bool \n", - " 2 Ed_Secondary Education 33538 non-null bool \n", - " 3 Ed_Secondary more technical education 33538 non-null bool \n", - " 4 Ed_Tertiary 33538 non-null bool \n", - " 5 Social_protection_REDEF 33538 non-null int64 \n", - " 6 JobIn_Non-stable 33538 non-null bool \n", - " 7 JobIn_Stable 33538 non-null bool \n", - " 8 JobIn_Unemployed 33538 non-null bool \n", - " 9 Hous_Institutional 33538 non-null bool \n", - " 10 Hous_Stable 33538 non-null bool \n", - " 11 Hous_Unstable 33538 non-null bool \n", - " 12 Alterations_early_childhood_develop_REDEF 33538 non-null int64 \n", - " 13 SocInc_Live with families or friends 33538 non-null bool \n", - " 14 SocInc_live alone 33538 non-null bool \n", - " 15 SocInc_live in institutions 33538 non-null bool \n", - " 16 Risk_stigma_REDEF 33538 non-null category\n", - " 17 Structural_conflic 33538 non-null float64 \n", - " 18 Age 33538 non-null float64 \n", - " 19 Sex_REDEF 33538 non-null int64 \n", - " 20 NumHijos 33538 non-null float64 \n", - " 21 Smoking_REDEF 33538 non-null int64 \n", - " 22 Biological_vulnerability_REDEF 33538 non-null int64 \n", - " 23 Opiaceos_DxCIE_REDEF 33538 non-null int64 \n", - " 24 Cannabis_DXCIE_REDEF 33538 non-null int64 \n", - " 25 BZD_DxCIE_REDEF 33538 non-null int64 \n", - " 26 Cocaina_DxCIE_REDEF 33538 non-null int64 \n", - " 27 Alucinogenos_DXCIE_REDEF 33538 non-null int64 \n", - " 28 Tabaco_DXCIE_REDEF 33538 non-null int64 \n", - " 29 Frec30_1 día/semana 33538 non-null bool \n", - " 30 Frec30_2-3 días‎/semana 33538 non-null bool \n", - " 31 Frec30_4-6 días/semana 33538 non-null bool \n", - " 32 Frec30_Menos de 1 día‎/semana 33538 non-null bool \n", - " 33 Frec30_No consumio 33538 non-null bool \n", - " 34 Frec30_Todos los días 33538 non-null bool \n", - " 35 Años_consumo_droga 33538 non-null float64 \n", - " 36 OtrosDx_Psiquiatrico_REDEF 33538 non-null int64 \n", - " 37 Tx_previos_REDEF 33538 non-null int64 \n", - " 38 Adherencia_tto_recalc 33538 non-null float64 \n", - " 39 Pandemia_inicio_fin_tratamiento 33538 non-null object \n", - " 40 Situacion_tratamiento_REDEF 33538 non-null int64 \n", - "dtypes: bool(20), category(1), float64(5), int64(14), object(1)\n", - "memory usage: 6.0+ MB\n", - "None\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "print(temp_bd.info())" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1653,135 +1136,25 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Index: 22861 entries, 0 to 85164\n", - "Data columns (total 40 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 Ed_Not Complete primary school 22861 non-null bool \n", - " 1 Ed_Primary education 22861 non-null bool \n", - " 2 Ed_Secondary Education 22861 non-null bool \n", - " 3 Ed_Secondary more technical education 22861 non-null bool \n", - " 4 Ed_Tertiary 22861 non-null bool \n", - " 5 Social_protection_REDEF 22861 non-null int64 \n", - " 6 JobIn_Non-stable 22861 non-null bool \n", - " 7 JobIn_Stable 22861 non-null bool \n", - " 8 JobIn_Unemployed 22861 non-null bool \n", - " 9 Hous_Institutional 22861 non-null bool \n", - " 10 Hous_Stable 22861 non-null bool \n", - " 11 Hous_Unstable 22861 non-null bool \n", - " 12 Alterations_early_childhood_develop_REDEF 22861 non-null int64 \n", - " 13 SocInc_Live with families or friends 22861 non-null bool \n", - " 14 SocInc_live alone 22861 non-null bool \n", - " 15 SocInc_live in institutions 22861 non-null bool \n", - " 16 Risk_stigma_REDEF 22861 non-null category\n", - " 17 Structural_conflic 22861 non-null float64 \n", - " 18 Age 22861 non-null float64 \n", - " 19 Sex_REDEF 22861 non-null int64 \n", - " 20 NumHijos 22861 non-null float64 \n", - " 21 Smoking_REDEF 22861 non-null int64 \n", - " 22 Biological_vulnerability_REDEF 22861 non-null int64 \n", - " 23 Opiaceos_DxCIE_REDEF 22861 non-null int64 \n", - " 24 Cannabis_DXCIE_REDEF 22861 non-null int64 \n", - " 25 BZD_DxCIE_REDEF 22861 non-null int64 \n", - " 26 Cocaina_DxCIE_REDEF 22861 non-null int64 \n", - " 27 Alucinogenos_DXCIE_REDEF 22861 non-null int64 \n", - " 28 Tabaco_DXCIE_REDEF 22861 non-null int64 \n", - " 29 Frec30_1 día/semana 22861 non-null bool \n", - " 30 Frec30_2-3 días‎/semana 22861 non-null bool \n", - " 31 Frec30_4-6 días/semana 22861 non-null bool \n", - " 32 Frec30_Menos de 1 día‎/semana 22861 non-null bool \n", - " 33 Frec30_No consumio 22861 non-null bool \n", - " 34 Frec30_Todos los días 22861 non-null bool \n", - " 35 Años_consumo_droga 22861 non-null float64 \n", - " 36 OtrosDx_Psiquiatrico_REDEF 22861 non-null int64 \n", - " 37 Tx_previos_REDEF 22861 non-null int64 \n", - " 38 Adherencia_tto_recalc 22861 non-null float64 \n", - " 39 Situacion_tratamiento_REDEF 22861 non-null int64 \n", - "dtypes: bool(20), category(1), float64(5), int64(14)\n", - "memory usage: 3.9 MB\n", - "None\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "print(conj_pre.info())" ] }, { "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Index: 10677 entries, 11 to 85156\n", - "Data columns (total 40 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 Ed_Not Complete primary school 10677 non-null bool \n", - " 1 Ed_Primary education 10677 non-null bool \n", - " 2 Ed_Secondary Education 10677 non-null bool \n", - " 3 Ed_Secondary more technical education 10677 non-null bool \n", - " 4 Ed_Tertiary 10677 non-null bool \n", - " 5 Social_protection_REDEF 10677 non-null int64 \n", - " 6 JobIn_Non-stable 10677 non-null bool \n", - " 7 JobIn_Stable 10677 non-null bool \n", - " 8 JobIn_Unemployed 10677 non-null bool \n", - " 9 Hous_Institutional 10677 non-null bool \n", - " 10 Hous_Stable 10677 non-null bool \n", - " 11 Hous_Unstable 10677 non-null bool \n", - " 12 Alterations_early_childhood_develop_REDEF 10677 non-null int64 \n", - " 13 SocInc_Live with families or friends 10677 non-null bool \n", - " 14 SocInc_live alone 10677 non-null bool \n", - " 15 SocInc_live in institutions 10677 non-null bool \n", - " 16 Risk_stigma_REDEF 10677 non-null category\n", - " 17 Structural_conflic 10677 non-null float64 \n", - " 18 Age 10677 non-null float64 \n", - " 19 Sex_REDEF 10677 non-null int64 \n", - " 20 NumHijos 10677 non-null float64 \n", - " 21 Smoking_REDEF 10677 non-null int64 \n", - " 22 Biological_vulnerability_REDEF 10677 non-null int64 \n", - " 23 Opiaceos_DxCIE_REDEF 10677 non-null int64 \n", - " 24 Cannabis_DXCIE_REDEF 10677 non-null int64 \n", - " 25 BZD_DxCIE_REDEF 10677 non-null int64 \n", - " 26 Cocaina_DxCIE_REDEF 10677 non-null int64 \n", - " 27 Alucinogenos_DXCIE_REDEF 10677 non-null int64 \n", - " 28 Tabaco_DXCIE_REDEF 10677 non-null int64 \n", - " 29 Frec30_1 día/semana 10677 non-null bool \n", - " 30 Frec30_2-3 días‎/semana 10677 non-null bool \n", - " 31 Frec30_4-6 días/semana 10677 non-null bool \n", - " 32 Frec30_Menos de 1 día‎/semana 10677 non-null bool \n", - " 33 Frec30_No consumio 10677 non-null bool \n", - " 34 Frec30_Todos los días 10677 non-null bool \n", - " 35 Años_consumo_droga 10677 non-null float64 \n", - " 36 OtrosDx_Psiquiatrico_REDEF 10677 non-null int64 \n", - " 37 Tx_previos_REDEF 10677 non-null int64 \n", - " 38 Adherencia_tto_recalc 10677 non-null float64 \n", - " 39 Situacion_tratamiento_REDEF 10677 non-null int64 \n", - "dtypes: bool(20), category(1), float64(5), int64(14)\n", - "memory usage: 1.8 MB\n", - "None\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "print(conj_post.info())" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1793,52 +1166,18 @@ }, { "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['Ed_Not Complete primary school' 'Ed_Primary education'\n", - " 'Ed_Secondary Education' 'Ed_Secondary more technical education'\n", - " 'Ed_Tertiary' 'Social_protection_REDEF' 'JobIn_Non-stable' 'JobIn_Stable'\n", - " 'JobIn_Unemployed' 'Hous_Institutional' 'Hous_Stable' 'Hous_Unstable'\n", - " 'Alterations_early_childhood_develop_REDEF'\n", - " 'SocInc_Live with families or friends' 'SocInc_live alone'\n", - " 'SocInc_live in institutions' 'Risk_stigma_REDEF' 'Structural_conflic'\n", - " 'Age' 'Sex_REDEF' 'NumHijos' 'Smoking_REDEF'\n", - " 'Biological_vulnerability_REDEF' 'Opiaceos_DxCIE_REDEF'\n", - " 'Cannabis_DXCIE_REDEF' 'BZD_DxCIE_REDEF' 'Cocaina_DxCIE_REDEF'\n", - " 'Alucinogenos_DXCIE_REDEF' 'Tabaco_DXCIE_REDEF' 'Frec30_1 día/semana'\n", - " 'Frec30_2-3 días\\u200e/semana' 'Frec30_4-6 días/semana'\n", - " 'Frec30_Menos de 1 día\\u200e/semana' 'Frec30_No consumio'\n", - " 'Frec30_Todos los días' 'Años_consumo_droga' 'OtrosDx_Psiquiatrico_REDEF'\n", - " 'Tx_previos_REDEF' 'Adherencia_tto_recalc']\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "print(feat)" ] }, { "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(22861, 39)\n", - "(10677, 39)\n", - "(22861,)\n", - "(10677,)\n", - "39\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "print(X_pre.shape)\n", "print(X_post.shape)\n", @@ -1887,7 +1226,7 @@ "axes[1].set_title(\"POST\")\n", "\n", "plt.tight_layout()\n", - "plt.savefig('EDA_plots/features/mutual_info.svg', format='svg', dpi=1200)\n", + "plt.savefig('./output/plots/feature_importance/mutual_info.svg', format='svg', dpi=1200)\n", "plt.show()" ] }, @@ -1926,7 +1265,7 @@ "axes[1].set_title(\"POST\")\n", "\n", "plt.tight_layout()\n", - "plt.savefig('EDA_plots/features/ANOVA.svg', format='svg', dpi=1200)\n", + "plt.savefig('./output/plots/feature_importance/ANOVA.svg', format='svg', dpi=1200)\n", "plt.show()" ] }, @@ -1958,7 +1297,7 @@ "axes[1].set_title(\"POST\")\n", "\n", "plt.tight_layout()\n", - "plt.savefig('EDA_plots/features/var_threshold.svg', format='svg', dpi=1200)\n", + "plt.savefig('./output/plots/feature_importance/var_threshold.svg', format='svg', dpi=1200)\n", "plt.show()" ] }, @@ -1971,7 +1310,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ diff --git a/EDA/ind_vars_names.npy b/EDA/output/ind_vars_names.npy similarity index 100% rename from EDA/ind_vars_names.npy rename to EDA/output/ind_vars_names.npy diff --git a/EDA/plots/distributions/heatmaps_one_hot.svg b/EDA/output/plots/correlations/heatmaps_one_hot.svg similarity index 100% rename from EDA/plots/distributions/heatmaps_one_hot.svg rename to EDA/output/plots/correlations/heatmaps_one_hot.svg diff --git a/EDA/plots/distributions/boxplots.svg b/EDA/output/plots/distributions/boxplots.svg similarity index 100% rename from EDA/plots/distributions/boxplots.svg rename to EDA/output/plots/distributions/boxplots.svg diff --git a/EDA/plots/distributions/countplots.svg b/EDA/output/plots/distributions/countplots.svg similarity index 100% rename from EDA/plots/distributions/countplots.svg rename to EDA/output/plots/distributions/countplots.svg diff --git a/EDA/plots/distributions/histograms.svg b/EDA/output/plots/distributions/histograms.svg similarity index 100% rename from EDA/plots/distributions/histograms.svg rename to EDA/output/plots/distributions/histograms.svg diff --git a/EDA/plots/distributions/norm_countplots.svg b/EDA/output/plots/distributions/norm_countplots.svg similarity index 100% rename from EDA/plots/distributions/norm_countplots.svg rename to EDA/output/plots/distributions/norm_countplots.svg diff --git a/EDA/plots/feature_importance/ANOVA.svg b/EDA/output/plots/feature_importance/ANOVA.svg similarity index 100% rename from EDA/plots/feature_importance/ANOVA.svg rename to EDA/output/plots/feature_importance/ANOVA.svg diff --git a/EDA/plots/feature_importance/mutual_info.svg b/EDA/output/plots/feature_importance/mutual_info.svg similarity index 100% rename from EDA/plots/feature_importance/mutual_info.svg rename to EDA/output/plots/feature_importance/mutual_info.svg diff --git a/EDA/plots/feature_importance/var_threshold.svg b/EDA/output/plots/feature_importance/var_threshold.svg similarity index 100% rename from EDA/plots/feature_importance/var_threshold.svg rename to EDA/output/plots/feature_importance/var_threshold.svg diff --git a/EDA/soc_vars_names.npy b/EDA/output/soc_vars_names.npy similarity index 100% rename from EDA/soc_vars_names.npy rename to EDA/output/soc_vars_names.npy -- 2.24.1