Commit 7df4155b authored by Joaquin Torres's avatar Joaquin Torres

Minor fixes

parent 2a1e1b03
......@@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
......@@ -41,11 +41,11 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"bd_all = pd.read_spss('./input/17_abril.sav')\n",
"bd_all = pd.read_spss('./input/data.sav')\n",
"\n",
"# Filter the dataset to work only with alcohol patients\n",
"bd = bd_all[bd_all['Alcohol_DxCIE'] == 'Sí']\n",
......@@ -56,9 +56,28 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\2495984927.py:18: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" conj_post['Group'] = 'Post'\n",
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\2495984927.py:19: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" conj_pre['Group'] = 'Pre'\n"
]
}
],
"source": [
"# Pre-pandemic\n",
"conj_pre = bd[bd['Pandemia_inicio_fin_tratamiento'] == 'Inicio y fin prepandemia']\n",
......@@ -84,9 +103,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PRE: 22861\n",
"\tALTA: 2792\n",
"\tABANDONO: 20069\n",
"POST: 10677\n",
"\tALTA: 1882\n",
"\tABANDONO: 8795\n"
]
}
],
"source": [
"# Printing size of different datasets\n",
"print(f\"PRE: {len(conj_pre)}\")\n",
......@@ -100,9 +132,286 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PRE\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 22861 entries, 0 to 85164\n",
"Data columns (total 35 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 CODPROYECTO 22861 non-null float64 \n",
" 1 Education 22861 non-null object \n",
" 2 Social_protection 22861 non-null object \n",
" 3 Job_insecurity 22861 non-null object \n",
" 4 Housing 22861 non-null object \n",
" 5 Alterations_early_childhood_develop 22861 non-null object \n",
" 6 Social_inclusion 22861 non-null object \n",
" 7 Risk_stigma 21606 non-null category\n",
" 8 Structural_conflic 22861 non-null float64 \n",
" 9 Age 22852 non-null float64 \n",
" 10 Sex 22861 non-null object \n",
" 11 NumHijos 21647 non-null float64 \n",
" 12 Smoking 22861 non-null object \n",
" 13 Biological_vulnerability 22861 non-null object \n",
" 14 Alcohol_DxCIE 22861 non-null object \n",
" 15 Opiaceos_DxCIE 22861 non-null object \n",
" 16 Cannabis_DXCIE 22861 non-null object \n",
" 17 BZD_DxCIE 22861 non-null object \n",
" 18 Cocaina_DxCIE 22861 non-null object \n",
" 19 Alucinogenos_DXCIE 22861 non-null object \n",
" 20 Tabaco_DXCIE 22861 non-null object \n",
" 21 FrecuenciaConsumo30Dias 22861 non-null object \n",
" 22 Años_consumo_droga 22342 non-null float64 \n",
" 23 OtrosDx_Psiquiatrico 22861 non-null object \n",
" 24 Tx_previos 22861 non-null object \n",
" 25 Adherencia_tto_recalc 22861 non-null float64 \n",
" 26 Tiempo_tx 22861 non-null float64 \n",
" 27 Readmisiones_estudios 22861 non-null object \n",
" 28 Situacion_tratamiento 22861 non-null object \n",
" 29 Periodos_COVID 22861 non-null object \n",
" 30 Pandemia_inicio_fin_tratamiento 22861 non-null object \n",
" 31 Nreadmision 22861 non-null float64 \n",
" 32 Readmisiones_PRECOVID 22861 non-null float64 \n",
" 33 Readmisiones_COVID 22861 non-null float64 \n",
" 34 Group 22861 non-null object \n",
"dtypes: category(1), float64(10), object(24)\n",
"memory usage: 6.1+ MB\n",
"None\n",
"-------------------------------\n",
"PRE-ABANDONO\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 20069 entries, 0 to 85164\n",
"Data columns (total 34 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 CODPROYECTO 20069 non-null float64 \n",
" 1 Education 20069 non-null object \n",
" 2 Social_protection 20069 non-null object \n",
" 3 Job_insecurity 20069 non-null object \n",
" 4 Housing 20069 non-null object \n",
" 5 Alterations_early_childhood_develop 20069 non-null object \n",
" 6 Social_inclusion 20069 non-null object \n",
" 7 Risk_stigma 18919 non-null category\n",
" 8 Structural_conflic 20069 non-null float64 \n",
" 9 Age 20061 non-null float64 \n",
" 10 Sex 20069 non-null object \n",
" 11 NumHijos 18958 non-null float64 \n",
" 12 Smoking 20069 non-null object \n",
" 13 Biological_vulnerability 20069 non-null object \n",
" 14 Alcohol_DxCIE 20069 non-null object \n",
" 15 Opiaceos_DxCIE 20069 non-null object \n",
" 16 Cannabis_DXCIE 20069 non-null object \n",
" 17 BZD_DxCIE 20069 non-null object \n",
" 18 Cocaina_DxCIE 20069 non-null object \n",
" 19 Alucinogenos_DXCIE 20069 non-null object \n",
" 20 Tabaco_DXCIE 20069 non-null object \n",
" 21 FrecuenciaConsumo30Dias 20069 non-null object \n",
" 22 Años_consumo_droga 19609 non-null float64 \n",
" 23 OtrosDx_Psiquiatrico 20069 non-null object \n",
" 24 Tx_previos 20069 non-null object \n",
" 25 Adherencia_tto_recalc 20069 non-null float64 \n",
" 26 Tiempo_tx 20069 non-null float64 \n",
" 27 Readmisiones_estudios 20069 non-null object \n",
" 28 Situacion_tratamiento 20069 non-null object \n",
" 29 Periodos_COVID 20069 non-null object \n",
" 30 Pandemia_inicio_fin_tratamiento 20069 non-null object \n",
" 31 Nreadmision 20069 non-null float64 \n",
" 32 Readmisiones_PRECOVID 20069 non-null float64 \n",
" 33 Readmisiones_COVID 20069 non-null float64 \n",
"dtypes: category(1), float64(10), object(23)\n",
"memory usage: 5.2+ MB\n",
"None\n",
"-------------------------------\n",
"PRE-ALTA\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 2792 entries, 23 to 85159\n",
"Data columns (total 34 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 CODPROYECTO 2792 non-null float64 \n",
" 1 Education 2792 non-null object \n",
" 2 Social_protection 2792 non-null object \n",
" 3 Job_insecurity 2792 non-null object \n",
" 4 Housing 2792 non-null object \n",
" 5 Alterations_early_childhood_develop 2792 non-null object \n",
" 6 Social_inclusion 2792 non-null object \n",
" 7 Risk_stigma 2687 non-null category\n",
" 8 Structural_conflic 2792 non-null float64 \n",
" 9 Age 2791 non-null float64 \n",
" 10 Sex 2792 non-null object \n",
" 11 NumHijos 2689 non-null float64 \n",
" 12 Smoking 2792 non-null object \n",
" 13 Biological_vulnerability 2792 non-null object \n",
" 14 Alcohol_DxCIE 2792 non-null object \n",
" 15 Opiaceos_DxCIE 2792 non-null object \n",
" 16 Cannabis_DXCIE 2792 non-null object \n",
" 17 BZD_DxCIE 2792 non-null object \n",
" 18 Cocaina_DxCIE 2792 non-null object \n",
" 19 Alucinogenos_DXCIE 2792 non-null object \n",
" 20 Tabaco_DXCIE 2792 non-null object \n",
" 21 FrecuenciaConsumo30Dias 2792 non-null object \n",
" 22 Años_consumo_droga 2733 non-null float64 \n",
" 23 OtrosDx_Psiquiatrico 2792 non-null object \n",
" 24 Tx_previos 2792 non-null object \n",
" 25 Adherencia_tto_recalc 2792 non-null float64 \n",
" 26 Tiempo_tx 2792 non-null float64 \n",
" 27 Readmisiones_estudios 2792 non-null object \n",
" 28 Situacion_tratamiento 2792 non-null object \n",
" 29 Periodos_COVID 2792 non-null object \n",
" 30 Pandemia_inicio_fin_tratamiento 2792 non-null object \n",
" 31 Nreadmision 2792 non-null float64 \n",
" 32 Readmisiones_PRECOVID 2792 non-null float64 \n",
" 33 Readmisiones_COVID 2792 non-null float64 \n",
"dtypes: category(1), float64(10), object(23)\n",
"memory usage: 744.5+ KB\n",
"None\n",
"-------------------------------\n",
"\n",
"\n",
"\n",
"\n",
"POST\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 10677 entries, 11 to 85156\n",
"Data columns (total 35 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 CODPROYECTO 10677 non-null float64 \n",
" 1 Education 10677 non-null object \n",
" 2 Social_protection 10677 non-null object \n",
" 3 Job_insecurity 10677 non-null object \n",
" 4 Housing 10677 non-null object \n",
" 5 Alterations_early_childhood_develop 10677 non-null object \n",
" 6 Social_inclusion 10677 non-null object \n",
" 7 Risk_stigma 10085 non-null category\n",
" 8 Structural_conflic 10677 non-null float64 \n",
" 9 Age 10676 non-null float64 \n",
" 10 Sex 10677 non-null object \n",
" 11 NumHijos 10103 non-null float64 \n",
" 12 Smoking 10677 non-null object \n",
" 13 Biological_vulnerability 10677 non-null object \n",
" 14 Alcohol_DxCIE 10677 non-null object \n",
" 15 Opiaceos_DxCIE 10677 non-null object \n",
" 16 Cannabis_DXCIE 10677 non-null object \n",
" 17 BZD_DxCIE 10677 non-null object \n",
" 18 Cocaina_DxCIE 10677 non-null object \n",
" 19 Alucinogenos_DXCIE 10677 non-null object \n",
" 20 Tabaco_DXCIE 10677 non-null object \n",
" 21 FrecuenciaConsumo30Dias 10677 non-null object \n",
" 22 Años_consumo_droga 10478 non-null float64 \n",
" 23 OtrosDx_Psiquiatrico 10677 non-null object \n",
" 24 Tx_previos 10677 non-null object \n",
" 25 Adherencia_tto_recalc 10677 non-null float64 \n",
" 26 Tiempo_tx 10677 non-null float64 \n",
" 27 Readmisiones_estudios 10677 non-null object \n",
" 28 Situacion_tratamiento 10677 non-null object \n",
" 29 Periodos_COVID 10677 non-null object \n",
" 30 Pandemia_inicio_fin_tratamiento 10677 non-null object \n",
" 31 Nreadmision 10677 non-null float64 \n",
" 32 Readmisiones_PRECOVID 10677 non-null float64 \n",
" 33 Readmisiones_COVID 10677 non-null float64 \n",
" 34 Group 10677 non-null object \n",
"dtypes: category(1), float64(10), object(24)\n",
"memory usage: 2.9+ MB\n",
"None\n",
"-------------------------------\n",
"POST-ABANDONO\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 8795 entries, 11 to 85156\n",
"Data columns (total 34 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 CODPROYECTO 8795 non-null float64 \n",
" 1 Education 8795 non-null object \n",
" 2 Social_protection 8795 non-null object \n",
" 3 Job_insecurity 8795 non-null object \n",
" 4 Housing 8795 non-null object \n",
" 5 Alterations_early_childhood_develop 8795 non-null object \n",
" 6 Social_inclusion 8795 non-null object \n",
" 7 Risk_stigma 8308 non-null category\n",
" 8 Structural_conflic 8795 non-null float64 \n",
" 9 Age 8794 non-null float64 \n",
" 10 Sex 8795 non-null object \n",
" 11 NumHijos 8325 non-null float64 \n",
" 12 Smoking 8795 non-null object \n",
" 13 Biological_vulnerability 8795 non-null object \n",
" 14 Alcohol_DxCIE 8795 non-null object \n",
" 15 Opiaceos_DxCIE 8795 non-null object \n",
" 16 Cannabis_DXCIE 8795 non-null object \n",
" 17 BZD_DxCIE 8795 non-null object \n",
" 18 Cocaina_DxCIE 8795 non-null object \n",
" 19 Alucinogenos_DXCIE 8795 non-null object \n",
" 20 Tabaco_DXCIE 8795 non-null object \n",
" 21 FrecuenciaConsumo30Dias 8795 non-null object \n",
" 22 Años_consumo_droga 8627 non-null float64 \n",
" 23 OtrosDx_Psiquiatrico 8795 non-null object \n",
" 24 Tx_previos 8795 non-null object \n",
" 25 Adherencia_tto_recalc 8795 non-null float64 \n",
" 26 Tiempo_tx 8795 non-null float64 \n",
" 27 Readmisiones_estudios 8795 non-null object \n",
" 28 Situacion_tratamiento 8795 non-null object \n",
" 29 Periodos_COVID 8795 non-null object \n",
" 30 Pandemia_inicio_fin_tratamiento 8795 non-null object \n",
" 31 Nreadmision 8795 non-null float64 \n",
" 32 Readmisiones_PRECOVID 8795 non-null float64 \n",
" 33 Readmisiones_COVID 8795 non-null float64 \n",
"dtypes: category(1), float64(10), object(23)\n",
"memory usage: 2.3+ MB\n",
"None\n",
"-------------------------------\n",
"POST-ALTA\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 1882 entries, 258 to 85149\n",
"Data columns (total 34 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 CODPROYECTO 1882 non-null float64 \n",
" 1 Education 1882 non-null object \n",
" 2 Social_protection 1882 non-null object \n",
" 3 Job_insecurity 1882 non-null object \n",
" 4 Housing 1882 non-null object \n",
" 5 Alterations_early_childhood_develop 1882 non-null object \n",
" 6 Social_inclusion 1882 non-null object \n",
" 7 Risk_stigma 1777 non-null category\n",
" 8 Structural_conflic 1882 non-null float64 \n",
" 9 Age 1882 non-null float64 \n",
" 10 Sex 1882 non-null object \n",
" 11 NumHijos 1778 non-null float64 \n",
" 12 Smoking 1882 non-null object \n",
" 13 Biological_vulnerability 1882 non-null object \n",
" 14 Alcohol_DxCIE 1882 non-null object \n",
" 15 Opiaceos_DxCIE 1882 non-null object \n",
" 16 Cannabis_DXCIE 1882 non-null object \n",
" 17 BZD_DxCIE 1882 non-null object \n",
" 18 Cocaina_DxCIE 1882 non-null object \n",
" 19 Alucinogenos_DXCIE 1882 non-null object \n",
" 20 Tabaco_DXCIE 1882 non-null object \n",
" 21 FrecuenciaConsumo30Dias 1882 non-null object \n",
" 22 Años_consumo_droga 1851 non-null float64 \n",
" 23 OtrosDx_Psiquiatrico 1882 non-null object \n",
" 24 Tx_previos 1882 non-null object \n",
" 25 Adherencia_tto_recalc 1882 non-null float64 \n",
" 26 Tiempo_tx 1882 non-null float64 \n",
" 27 Readmisiones_estudios 1882 non-null object \n",
" 28 Situacion_tratamiento 1882 non-null object \n",
" 29 Periodos_COVID 1882 non-null object \n",
" 30 Pandemia_inicio_fin_tratamiento 1882 non-null object \n",
" 31 Nreadmision 1882 non-null float64 \n",
" 32 Readmisiones_PRECOVID 1882 non-null float64 \n",
" 33 Readmisiones_COVID 1882 non-null float64 \n",
"dtypes: category(1), float64(10), object(23)\n",
"memory usage: 501.9+ KB\n",
"None\n",
"-------------------------------\n"
]
}
],
"source": [
"print(\"PRE\")\n",
"print(conj_pre.info())\n",
......@@ -136,9 +445,36 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Live with families or friends' 'live alone' 'live in institutions' '9.0']\n",
"['Live with families or friends' 'live alone' 'live in institutions']\n",
"['No alterations (first exposure at 11 or more years)'\n",
" 'Alterations (first exposure before 11 years old)' '9']\n",
"['No alterations (first exposure at 11 or more years)'\n",
" 'Alterations (first exposure before 11 years old)']\n",
"[NaN, 'Yes', 'No']\n",
"Categories (3, object): [99.0, 'No', 'Yes']\n",
"[NaN, 'Yes', 'No']\n",
"Categories (2, object): ['No', 'Yes']\n",
"[nan 0. 1. 2. 3. 4. 5. 8. 10. 6. 11. 12. 9. 7. 99. 14. 15.]\n",
"[nan 0. 1. 2. 3. 4. 5. 8. 10. 6. 11. 12. 9. 7. 14. 15.]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\1003504044.py:14: FutureWarning: The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n",
" bd['Risk_stigma'] = bd['Risk_stigma'].replace(99.0, mode_stigma)\n"
]
}
],
"source": [
"# 9.0 represents unknown according to Variables.docx \n",
"print(bd['Social_inclusion'].unique())\n",
......@@ -164,9 +500,30 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total missing values Age: 10\n",
"Total missing values Años_consumo_droga: 718\n",
"Total missing values Risk_stigma: 1847\n",
"Total missing values NumHijos: 1788\n",
"\tCONJUNTO PREPANDEMIA\n",
"\t\tMissing values Age: 9\n",
"\t\tMissing values Años_consumo_droga: 519\n",
"\t\tMissing values Risk_stigma: 1255\n",
"\t\tMissing values NumHijos: 1214\n",
"\tCONJUNTO POSTPANDEMIA\n",
"\t\tMissing values Age: 1\n",
"\t\tMissing values Años_consumo_droga: 199\n",
"\t\tMissing values Risk_stigma: 592\n",
"\t\tMissing values NumHijos: 574\n"
]
}
],
"source": [
"print(f\"Total missing values Age: {bd['Age'].isnull().sum()}\")\n",
"print(f\"Total missing values Años_consumo_droga: {bd['Años_consumo_droga'].isnull().sum()}\")\n",
......@@ -188,9 +545,44 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\3303146707.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" bd['Age'].fillna(age_mode, inplace=True)\n",
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\3303146707.py:5: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" bd['Años_consumo_droga'].fillna(años_consumo_mode, inplace=True)\n",
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\3303146707.py:8: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" bd['Risk_stigma'].fillna(risk_stigma_mode, inplace=True)\n",
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_10184\\3303146707.py:11: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" bd['NumHijos'].fillna(num_hijos_mode, inplace=True)\n"
]
}
],
"source": [
"age_mode = bd['Age'].mode()[0]\n",
"bd['Age'].fillna(age_mode, inplace=True)\n",
......@@ -481,7 +873,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
......@@ -512,7 +904,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
......@@ -589,7 +981,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
......@@ -622,7 +1014,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
......@@ -662,7 +1054,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
......@@ -681,7 +1073,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
......@@ -691,7 +1083,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
......@@ -748,14 +1140,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# Export feature names\n",
"np.save('./output/feature_names/feature_names.npy', corr_cols)\n",
"np.save('./output/feature_names/soc_vars_names.npy', soc_vars_enc)\n",
"np.save('./output/feature_names/ind_vars_names.npy', ind_vars_enc)"
"np.save('./output/feature_names/all_features.npy', corr_cols)\n",
"np.save('./output/feature_names/social_factors.npy', soc_vars_enc)\n",
"np.save('./output/feature_names/individual_factors.npy', ind_vars_enc)"
]
},
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment