Commit a0bfde29 authored by Joaquin Torres's avatar Joaquin Torres

adapting code to new dataset (waiting to know what do to with unknown values)

parent a74eb5ce
......@@ -302,9 +302,30 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 64,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total missing values Age: 10\n",
"Total missing values Años_consumo_droga: 718\n",
"Total missing values Risk_stigma: 1847\n",
"Total missing values NumHijos: 1788\n",
"\tCONJUNTO PREPANDEMIA\n",
"\t\tMissing values Age: 9\n",
"\t\tMissing values Años_consumo_droga: 519\n",
"\t\tMissing values Risk_stigma: 1255\n",
"\t\tMissing values NumHijos: 1214\n",
"\tCONJUNTO POSTPANDEMIA\n",
"\t\tMissing values Age: 1\n",
"\t\tMissing values Años_consumo_droga: 199\n",
"\t\tMissing values Risk_stigma: 592\n",
"\t\tMissing values NumHijos: 574\n"
]
}
],
"source": [
"print(f\"Total missing values Age: {bd['Age'].isnull().sum()}\")\n",
"print(f\"Total missing values Años_consumo_droga: {bd['Años_consumo_droga'].isnull().sum()}\")\n",
......@@ -608,7 +629,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
......@@ -664,7 +685,7 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
......@@ -680,7 +701,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
......@@ -702,7 +723,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"##### One-hot encode all categorical variables"
"##### One-hot encode categorical variables"
]
},
{
......@@ -723,34 +744,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"# Original approach\n",
"one_hot_vars = ['Education', 'Social_protection', 'Job_insecurity', 'Housing', 'Alterations_early_childhood_develop']\n",
"\n",
"social_vars = ['Education', 'Social_protection', 'Job_insecurity', 'Housing', 'Alterations_early_childhood_develop', \n",
" 'Social_inclusion', 'Risk_stigma', 'Structural_conflic']\n",
"ind_vars = ['Age', 'Sex', 'NumHijos', 'Smoking', 'Biological_vulnerability', 'Opiaceos_DxCIE', \n",
" 'Cannabis_DXCIE', 'BZD_DxCIE', 'Cocaina_DxCIE', 'Alucinogenos_DXCIE', 'Tabaco_DXCIE', \n",
" 'FrecuenciaConsumo30Dias', 'Años_consumo_droga','OtrosDx_Psiquiatrico', 'Tx_previos', 'Adherencia_tto_recalc'] \n",
"target_var = 'Situacion_tratamiento'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Frec30_1 día/semana', 'Frec30_2-3 días\\u200e/semana', 'Frec30_4-6 días/semana', 'Frec30_Desconocido', 'Frec30_Menos de 1 día\\u200e/semana', 'Frec30_No consumio', 'Frec30_Todos los días']\n"
]
}
],
"source": [
"# Specify columns to one hot encode; empty list otherwise\n",
"one_hot_vars = ['Droga_Ppal_REC', 'Sexo_x_Hijos', 'Education',\n",
" 'Job_insecurity', 'Housing', 'Social_inclusion', 'FrecuenciaConsumo30Dias'] \n",
"one_hot_vars = ['Education', 'Job_insecurity', 'Housing', 'Social_inclusion', 'FrecuenciaConsumo30Dias']\n",
"\n",
"one_hots_vars_prefix = {\n",
" 'Droga_Ppal_REC': 'DrogP',\n",
" 'Sexo_x_Hijos': 'SexHij',\n",
" 'Education': 'Ed',\n",
" 'Job_insecurity': 'JobIn',\n",
" 'Housing': 'Hous', \n",
......@@ -939,35 +948,6 @@
" plt.title(\"\\n\\n\" + plot_title, fontdict={'fontsize': 30, 'fontweight': 'bold'})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"###### Original approach (all categorical mapped to integers)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, axs = plt.subplots(3, 3, figsize=(50, 50))\n",
"plt.subplots_adjust(hspace=0.75, wspace=2)\n",
"\n",
"# Go through possible values for 'Situacion_tratamiento' and 'Group'\n",
"for sit_tto in range(1,4):\n",
" for group in range(1,4):\n",
" plt.subplot(3, 3, 3*(sit_tto-1) + group) # Calculate the subplot position dynamically\n",
" plot_heatmap(sit_tto, group)\n",
" \n",
"# Adjust layout to prevent overlapping titles\n",
"plt.tight_layout()\n",
"\n",
"# Save the figure in SVG format in the \"./EDA_plots\" folder\n",
"plt.savefig('./EDA_plots/heatmaps_original.svg', dpi=550, bbox_inches='tight')"
]
},
{
"cell_type": "markdown",
"metadata": {},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment