From 7df4155b2efd92cd832b296783a0759ead3c896f Mon Sep 17 00:00:00 2001 From: joaquintb Date: Fri, 28 Jun 2024 19:08:51 +0200 Subject: [PATCH] Minor fixes --- EDA/EDA.ipynb | 456 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 424 insertions(+), 32 deletions(-) diff --git a/EDA/EDA.ipynb b/EDA/EDA.ipynb index 6b1c81c..67ef1f8 100644 --- a/EDA/EDA.ipynb +++ b/EDA/EDA.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -41,11 +41,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "bd_all = pd.read_spss('./input/17_abril.sav')\n", + "bd_all = pd.read_spss('./input/data.sav')\n", "\n", "# Filter the dataset to work only with alcohol patients\n", "bd = bd_all[bd_all['Alcohol_DxCIE'] == 'Sí']\n", @@ -56,9 +56,28 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\2495984927.py:18: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " conj_post['Group'] = 'Post'\n", + "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\2495984927.py:19: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " conj_pre['Group'] = 'Pre'\n" + ] + } + ], "source": [ "# Pre-pandemic\n", "conj_pre = bd[bd['Pandemia_inicio_fin_tratamiento'] == 'Inicio y fin prepandemia']\n", @@ -84,9 +103,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PRE: 22861\n", + "\tALTA: 2792\n", + "\tABANDONO: 20069\n", + "POST: 10677\n", + "\tALTA: 1882\n", + "\tABANDONO: 8795\n" + ] + } + ], "source": [ "# Printing size of different datasets\n", "print(f\"PRE: {len(conj_pre)}\")\n", @@ -100,9 +132,286 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PRE\n", + "\n", + "Index: 22861 entries, 0 to 85164\n", + "Data columns (total 35 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 CODPROYECTO 22861 non-null float64 \n", + " 1 Education 22861 non-null object \n", + " 2 Social_protection 22861 non-null object \n", + " 3 Job_insecurity 22861 non-null object \n", + " 4 Housing 22861 non-null object \n", + " 5 Alterations_early_childhood_develop 22861 non-null object \n", + " 6 Social_inclusion 22861 non-null object \n", + " 7 Risk_stigma 21606 non-null category\n", + " 8 Structural_conflic 22861 non-null float64 \n", + " 9 Age 22852 non-null float64 \n", + " 10 Sex 22861 non-null object \n", + " 11 NumHijos 21647 non-null float64 \n", + " 12 Smoking 22861 non-null object \n", + " 13 Biological_vulnerability 22861 non-null object \n", + " 14 Alcohol_DxCIE 22861 non-null object \n", + " 15 Opiaceos_DxCIE 22861 non-null object \n", + " 16 Cannabis_DXCIE 22861 non-null object \n", + " 17 BZD_DxCIE 22861 non-null object \n", + " 18 Cocaina_DxCIE 22861 non-null object \n", + " 19 Alucinogenos_DXCIE 22861 non-null object \n", + " 20 Tabaco_DXCIE 22861 non-null object \n", + " 21 FrecuenciaConsumo30Dias 22861 non-null object \n", + " 22 Años_consumo_droga 22342 non-null float64 \n", + " 23 OtrosDx_Psiquiatrico 22861 non-null object \n", + " 24 Tx_previos 22861 non-null object \n", + " 25 Adherencia_tto_recalc 22861 non-null float64 \n", + " 26 Tiempo_tx 22861 non-null float64 \n", + " 27 Readmisiones_estudios 22861 non-null object \n", + " 28 Situacion_tratamiento 22861 non-null object \n", + " 29 Periodos_COVID 22861 non-null object \n", + " 30 Pandemia_inicio_fin_tratamiento 22861 non-null object \n", + " 31 Nreadmision 22861 non-null float64 \n", + " 32 Readmisiones_PRECOVID 22861 non-null float64 \n", + " 33 Readmisiones_COVID 22861 non-null float64 \n", + " 34 Group 22861 non-null object \n", + "dtypes: category(1), float64(10), object(24)\n", + "memory usage: 6.1+ MB\n", + "None\n", + "-------------------------------\n", + "PRE-ABANDONO\n", + "\n", + "Index: 20069 entries, 0 to 85164\n", + "Data columns (total 34 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 CODPROYECTO 20069 non-null float64 \n", + " 1 Education 20069 non-null object \n", + " 2 Social_protection 20069 non-null object \n", + " 3 Job_insecurity 20069 non-null object \n", + " 4 Housing 20069 non-null object \n", + " 5 Alterations_early_childhood_develop 20069 non-null object \n", + " 6 Social_inclusion 20069 non-null object \n", + " 7 Risk_stigma 18919 non-null category\n", + " 8 Structural_conflic 20069 non-null float64 \n", + " 9 Age 20061 non-null float64 \n", + " 10 Sex 20069 non-null object \n", + " 11 NumHijos 18958 non-null float64 \n", + " 12 Smoking 20069 non-null object \n", + " 13 Biological_vulnerability 20069 non-null object \n", + " 14 Alcohol_DxCIE 20069 non-null object \n", + " 15 Opiaceos_DxCIE 20069 non-null object \n", + " 16 Cannabis_DXCIE 20069 non-null object \n", + " 17 BZD_DxCIE 20069 non-null object \n", + " 18 Cocaina_DxCIE 20069 non-null object \n", + " 19 Alucinogenos_DXCIE 20069 non-null object \n", + " 20 Tabaco_DXCIE 20069 non-null object \n", + " 21 FrecuenciaConsumo30Dias 20069 non-null object \n", + " 22 Años_consumo_droga 19609 non-null float64 \n", + " 23 OtrosDx_Psiquiatrico 20069 non-null object \n", + " 24 Tx_previos 20069 non-null object \n", + " 25 Adherencia_tto_recalc 20069 non-null float64 \n", + " 26 Tiempo_tx 20069 non-null float64 \n", + " 27 Readmisiones_estudios 20069 non-null object \n", + " 28 Situacion_tratamiento 20069 non-null object \n", + " 29 Periodos_COVID 20069 non-null object \n", + " 30 Pandemia_inicio_fin_tratamiento 20069 non-null object \n", + " 31 Nreadmision 20069 non-null float64 \n", + " 32 Readmisiones_PRECOVID 20069 non-null float64 \n", + " 33 Readmisiones_COVID 20069 non-null float64 \n", + "dtypes: category(1), float64(10), object(23)\n", + "memory usage: 5.2+ MB\n", + "None\n", + "-------------------------------\n", + "PRE-ALTA\n", + "\n", + "Index: 2792 entries, 23 to 85159\n", + "Data columns (total 34 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 CODPROYECTO 2792 non-null float64 \n", + " 1 Education 2792 non-null object \n", + " 2 Social_protection 2792 non-null object \n", + " 3 Job_insecurity 2792 non-null object \n", + " 4 Housing 2792 non-null object \n", + " 5 Alterations_early_childhood_develop 2792 non-null object \n", + " 6 Social_inclusion 2792 non-null object \n", + " 7 Risk_stigma 2687 non-null category\n", + " 8 Structural_conflic 2792 non-null float64 \n", + " 9 Age 2791 non-null float64 \n", + " 10 Sex 2792 non-null object \n", + " 11 NumHijos 2689 non-null float64 \n", + " 12 Smoking 2792 non-null object \n", + " 13 Biological_vulnerability 2792 non-null object \n", + " 14 Alcohol_DxCIE 2792 non-null object \n", + " 15 Opiaceos_DxCIE 2792 non-null object \n", + " 16 Cannabis_DXCIE 2792 non-null object \n", + " 17 BZD_DxCIE 2792 non-null object \n", + " 18 Cocaina_DxCIE 2792 non-null object \n", + " 19 Alucinogenos_DXCIE 2792 non-null object \n", + " 20 Tabaco_DXCIE 2792 non-null object \n", + " 21 FrecuenciaConsumo30Dias 2792 non-null object \n", + " 22 Años_consumo_droga 2733 non-null float64 \n", + " 23 OtrosDx_Psiquiatrico 2792 non-null object \n", + " 24 Tx_previos 2792 non-null object \n", + " 25 Adherencia_tto_recalc 2792 non-null float64 \n", + " 26 Tiempo_tx 2792 non-null float64 \n", + " 27 Readmisiones_estudios 2792 non-null object \n", + " 28 Situacion_tratamiento 2792 non-null object \n", + " 29 Periodos_COVID 2792 non-null object \n", + " 30 Pandemia_inicio_fin_tratamiento 2792 non-null object \n", + " 31 Nreadmision 2792 non-null float64 \n", + " 32 Readmisiones_PRECOVID 2792 non-null float64 \n", + " 33 Readmisiones_COVID 2792 non-null float64 \n", + "dtypes: category(1), float64(10), object(23)\n", + "memory usage: 744.5+ KB\n", + "None\n", + "-------------------------------\n", + "\n", + "\n", + "\n", + "\n", + "POST\n", + "\n", + "Index: 10677 entries, 11 to 85156\n", + "Data columns (total 35 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 CODPROYECTO 10677 non-null float64 \n", + " 1 Education 10677 non-null object \n", + " 2 Social_protection 10677 non-null object \n", + " 3 Job_insecurity 10677 non-null object \n", + " 4 Housing 10677 non-null object \n", + " 5 Alterations_early_childhood_develop 10677 non-null object \n", + " 6 Social_inclusion 10677 non-null object \n", + " 7 Risk_stigma 10085 non-null category\n", + " 8 Structural_conflic 10677 non-null float64 \n", + " 9 Age 10676 non-null float64 \n", + " 10 Sex 10677 non-null object \n", + " 11 NumHijos 10103 non-null float64 \n", + " 12 Smoking 10677 non-null object \n", + " 13 Biological_vulnerability 10677 non-null object \n", + " 14 Alcohol_DxCIE 10677 non-null object \n", + " 15 Opiaceos_DxCIE 10677 non-null object \n", + " 16 Cannabis_DXCIE 10677 non-null object \n", + " 17 BZD_DxCIE 10677 non-null object \n", + " 18 Cocaina_DxCIE 10677 non-null object \n", + " 19 Alucinogenos_DXCIE 10677 non-null object \n", + " 20 Tabaco_DXCIE 10677 non-null object \n", + " 21 FrecuenciaConsumo30Dias 10677 non-null object \n", + " 22 Años_consumo_droga 10478 non-null float64 \n", + " 23 OtrosDx_Psiquiatrico 10677 non-null object \n", + " 24 Tx_previos 10677 non-null object \n", + " 25 Adherencia_tto_recalc 10677 non-null float64 \n", + " 26 Tiempo_tx 10677 non-null float64 \n", + " 27 Readmisiones_estudios 10677 non-null object \n", + " 28 Situacion_tratamiento 10677 non-null object \n", + " 29 Periodos_COVID 10677 non-null object \n", + " 30 Pandemia_inicio_fin_tratamiento 10677 non-null object \n", + " 31 Nreadmision 10677 non-null float64 \n", + " 32 Readmisiones_PRECOVID 10677 non-null float64 \n", + " 33 Readmisiones_COVID 10677 non-null float64 \n", + " 34 Group 10677 non-null object \n", + "dtypes: category(1), float64(10), object(24)\n", + "memory usage: 2.9+ MB\n", + "None\n", + "-------------------------------\n", + "POST-ABANDONO\n", + "\n", + "Index: 8795 entries, 11 to 85156\n", + "Data columns (total 34 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 CODPROYECTO 8795 non-null float64 \n", + " 1 Education 8795 non-null object \n", + " 2 Social_protection 8795 non-null object \n", + " 3 Job_insecurity 8795 non-null object \n", + " 4 Housing 8795 non-null object \n", + " 5 Alterations_early_childhood_develop 8795 non-null object \n", + " 6 Social_inclusion 8795 non-null object \n", + " 7 Risk_stigma 8308 non-null category\n", + " 8 Structural_conflic 8795 non-null float64 \n", + " 9 Age 8794 non-null float64 \n", + " 10 Sex 8795 non-null object \n", + " 11 NumHijos 8325 non-null float64 \n", + " 12 Smoking 8795 non-null object \n", + " 13 Biological_vulnerability 8795 non-null object \n", + " 14 Alcohol_DxCIE 8795 non-null object \n", + " 15 Opiaceos_DxCIE 8795 non-null object \n", + " 16 Cannabis_DXCIE 8795 non-null object \n", + " 17 BZD_DxCIE 8795 non-null object \n", + " 18 Cocaina_DxCIE 8795 non-null object \n", + " 19 Alucinogenos_DXCIE 8795 non-null object \n", + " 20 Tabaco_DXCIE 8795 non-null object \n", + " 21 FrecuenciaConsumo30Dias 8795 non-null object \n", + " 22 Años_consumo_droga 8627 non-null float64 \n", + " 23 OtrosDx_Psiquiatrico 8795 non-null object \n", + " 24 Tx_previos 8795 non-null object \n", + " 25 Adherencia_tto_recalc 8795 non-null float64 \n", + " 26 Tiempo_tx 8795 non-null float64 \n", + " 27 Readmisiones_estudios 8795 non-null object \n", + " 28 Situacion_tratamiento 8795 non-null object \n", + " 29 Periodos_COVID 8795 non-null object \n", + " 30 Pandemia_inicio_fin_tratamiento 8795 non-null object \n", + " 31 Nreadmision 8795 non-null float64 \n", + " 32 Readmisiones_PRECOVID 8795 non-null float64 \n", + " 33 Readmisiones_COVID 8795 non-null float64 \n", + "dtypes: category(1), float64(10), object(23)\n", + "memory usage: 2.3+ MB\n", + "None\n", + "-------------------------------\n", + "POST-ALTA\n", + "\n", + "Index: 1882 entries, 258 to 85149\n", + "Data columns (total 34 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 CODPROYECTO 1882 non-null float64 \n", + " 1 Education 1882 non-null object \n", + " 2 Social_protection 1882 non-null object \n", + " 3 Job_insecurity 1882 non-null object \n", + " 4 Housing 1882 non-null object \n", + " 5 Alterations_early_childhood_develop 1882 non-null object \n", + " 6 Social_inclusion 1882 non-null object \n", + " 7 Risk_stigma 1777 non-null category\n", + " 8 Structural_conflic 1882 non-null float64 \n", + " 9 Age 1882 non-null float64 \n", + " 10 Sex 1882 non-null object \n", + " 11 NumHijos 1778 non-null float64 \n", + " 12 Smoking 1882 non-null object \n", + " 13 Biological_vulnerability 1882 non-null object \n", + " 14 Alcohol_DxCIE 1882 non-null object \n", + " 15 Opiaceos_DxCIE 1882 non-null object \n", + " 16 Cannabis_DXCIE 1882 non-null object \n", + " 17 BZD_DxCIE 1882 non-null object \n", + " 18 Cocaina_DxCIE 1882 non-null object \n", + " 19 Alucinogenos_DXCIE 1882 non-null object \n", + " 20 Tabaco_DXCIE 1882 non-null object \n", + " 21 FrecuenciaConsumo30Dias 1882 non-null object \n", + " 22 Años_consumo_droga 1851 non-null float64 \n", + " 23 OtrosDx_Psiquiatrico 1882 non-null object \n", + " 24 Tx_previos 1882 non-null object \n", + " 25 Adherencia_tto_recalc 1882 non-null float64 \n", + " 26 Tiempo_tx 1882 non-null float64 \n", + " 27 Readmisiones_estudios 1882 non-null object \n", + " 28 Situacion_tratamiento 1882 non-null object \n", + " 29 Periodos_COVID 1882 non-null object \n", + " 30 Pandemia_inicio_fin_tratamiento 1882 non-null object \n", + " 31 Nreadmision 1882 non-null float64 \n", + " 32 Readmisiones_PRECOVID 1882 non-null float64 \n", + " 33 Readmisiones_COVID 1882 non-null float64 \n", + "dtypes: category(1), float64(10), object(23)\n", + "memory usage: 501.9+ KB\n", + "None\n", + "-------------------------------\n" + ] + } + ], "source": [ "print(\"PRE\")\n", "print(conj_pre.info())\n", @@ -136,9 +445,36 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Live with families or friends' 'live alone' 'live in institutions' '9.0']\n", + "['Live with families or friends' 'live alone' 'live in institutions']\n", + "['No alterations (first exposure at 11 or more years)'\n", + " 'Alterations (first exposure before 11 years old)' '9']\n", + "['No alterations (first exposure at 11 or more years)'\n", + " 'Alterations (first exposure before 11 years old)']\n", + "[NaN, 'Yes', 'No']\n", + "Categories (3, object): [99.0, 'No', 'Yes']\n", + "[NaN, 'Yes', 'No']\n", + "Categories (2, object): ['No', 'Yes']\n", + "[nan 0. 1. 2. 3. 4. 5. 8. 10. 6. 11. 12. 9. 7. 99. 14. 15.]\n", + "[nan 0. 1. 2. 3. 4. 5. 8. 10. 6. 11. 12. 9. 7. 14. 15.]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\1003504044.py:14: FutureWarning: The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n", + " bd['Risk_stigma'] = bd['Risk_stigma'].replace(99.0, mode_stigma)\n" + ] + } + ], "source": [ "# 9.0 represents unknown according to Variables.docx \n", "print(bd['Social_inclusion'].unique())\n", @@ -164,9 +500,30 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total missing values Age: 10\n", + "Total missing values Años_consumo_droga: 718\n", + "Total missing values Risk_stigma: 1847\n", + "Total missing values NumHijos: 1788\n", + "\tCONJUNTO PREPANDEMIA\n", + "\t\tMissing values Age: 9\n", + "\t\tMissing values Años_consumo_droga: 519\n", + "\t\tMissing values Risk_stigma: 1255\n", + "\t\tMissing values NumHijos: 1214\n", + "\tCONJUNTO POSTPANDEMIA\n", + "\t\tMissing values Age: 1\n", + "\t\tMissing values Años_consumo_droga: 199\n", + "\t\tMissing values Risk_stigma: 592\n", + "\t\tMissing values NumHijos: 574\n" + ] + } + ], "source": [ "print(f\"Total missing values Age: {bd['Age'].isnull().sum()}\")\n", "print(f\"Total missing values Años_consumo_droga: {bd['Años_consumo_droga'].isnull().sum()}\")\n", @@ -188,9 +545,44 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\3303146707.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " bd['Age'].fillna(age_mode, inplace=True)\n", + "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\3303146707.py:5: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " bd['Años_consumo_droga'].fillna(años_consumo_mode, inplace=True)\n", + "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\3303146707.py:8: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " bd['Risk_stigma'].fillna(risk_stigma_mode, inplace=True)\n", + "C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\3303146707.py:11: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " bd['NumHijos'].fillna(num_hijos_mode, inplace=True)\n" + ] + } + ], "source": [ "age_mode = bd['Age'].mode()[0]\n", "bd['Age'].fillna(age_mode, inplace=True)\n", @@ -481,7 +873,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -512,7 +904,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -589,7 +981,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -622,7 +1014,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -662,7 +1054,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -681,7 +1073,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -691,7 +1083,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -748,14 +1140,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# Export feature names\n", - "np.save('./output/feature_names/feature_names.npy', corr_cols)\n", - "np.save('./output/feature_names/soc_vars_names.npy', soc_vars_enc)\n", - "np.save('./output/feature_names/ind_vars_names.npy', ind_vars_enc)" + "np.save('./output/feature_names/all_features.npy', corr_cols)\n", + "np.save('./output/feature_names/social_factors.npy', soc_vars_enc)\n", + "np.save('./output/feature_names/individual_factors.npy', ind_vars_enc)" ] }, { -- 2.24.1