Commit a0bfde29 authored by Joaquin Torres's avatar Joaquin Torres

adapting code to new dataset (waiting to know what do to with unknown values)

parent a74eb5ce
...@@ -302,9 +302,30 @@ ...@@ -302,9 +302,30 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 64,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total missing values Age: 10\n",
"Total missing values Años_consumo_droga: 718\n",
"Total missing values Risk_stigma: 1847\n",
"Total missing values NumHijos: 1788\n",
"\tCONJUNTO PREPANDEMIA\n",
"\t\tMissing values Age: 9\n",
"\t\tMissing values Años_consumo_droga: 519\n",
"\t\tMissing values Risk_stigma: 1255\n",
"\t\tMissing values NumHijos: 1214\n",
"\tCONJUNTO POSTPANDEMIA\n",
"\t\tMissing values Age: 1\n",
"\t\tMissing values Años_consumo_droga: 199\n",
"\t\tMissing values Risk_stigma: 592\n",
"\t\tMissing values NumHijos: 574\n"
]
}
],
"source": [ "source": [
"print(f\"Total missing values Age: {bd['Age'].isnull().sum()}\")\n", "print(f\"Total missing values Age: {bd['Age'].isnull().sum()}\")\n",
"print(f\"Total missing values Años_consumo_droga: {bd['Años_consumo_droga'].isnull().sum()}\")\n", "print(f\"Total missing values Años_consumo_droga: {bd['Años_consumo_droga'].isnull().sum()}\")\n",
...@@ -608,7 +629,7 @@ ...@@ -608,7 +629,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 56, "execution_count": 65,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -664,7 +685,7 @@ ...@@ -664,7 +685,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 57, "execution_count": 66,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -680,7 +701,7 @@ ...@@ -680,7 +701,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 58, "execution_count": 67,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -702,7 +723,7 @@ ...@@ -702,7 +723,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"##### One-hot encode all categorical variables" "##### One-hot encode categorical variables"
] ]
}, },
{ {
...@@ -723,34 +744,22 @@ ...@@ -723,34 +744,22 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 68,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
"source": [
"# Original approach\n",
"one_hot_vars = ['Education', 'Social_protection', 'Job_insecurity', 'Housing', 'Alterations_early_childhood_develop']\n",
"\n",
"social_vars = ['Education', 'Social_protection', 'Job_insecurity', 'Housing', 'Alterations_early_childhood_develop', \n",
" 'Social_inclusion', 'Risk_stigma', 'Structural_conflic']\n",
"ind_vars = ['Age', 'Sex', 'NumHijos', 'Smoking', 'Biological_vulnerability', 'Opiaceos_DxCIE', \n",
" 'Cannabis_DXCIE', 'BZD_DxCIE', 'Cocaina_DxCIE', 'Alucinogenos_DXCIE', 'Tabaco_DXCIE', \n",
" 'FrecuenciaConsumo30Dias', 'Años_consumo_droga','OtrosDx_Psiquiatrico', 'Tx_previos', 'Adherencia_tto_recalc'] \n",
"target_var = 'Situacion_tratamiento'"
]
},
{ {
"cell_type": "code", "name": "stdout",
"execution_count": null, "output_type": "stream",
"metadata": {}, "text": [
"outputs": [], "['Frec30_1 día/semana', 'Frec30_2-3 días\\u200e/semana', 'Frec30_4-6 días/semana', 'Frec30_Desconocido', 'Frec30_Menos de 1 día\\u200e/semana', 'Frec30_No consumio', 'Frec30_Todos los días']\n"
]
}
],
"source": [ "source": [
"# Specify columns to one hot encode; empty list otherwise\n", "# Specify columns to one hot encode; empty list otherwise\n",
"one_hot_vars = ['Droga_Ppal_REC', 'Sexo_x_Hijos', 'Education',\n", "one_hot_vars = ['Education', 'Job_insecurity', 'Housing', 'Social_inclusion', 'FrecuenciaConsumo30Dias']\n",
" 'Job_insecurity', 'Housing', 'Social_inclusion', 'FrecuenciaConsumo30Dias'] \n",
"\n", "\n",
"one_hots_vars_prefix = {\n", "one_hots_vars_prefix = {\n",
" 'Droga_Ppal_REC': 'DrogP',\n",
" 'Sexo_x_Hijos': 'SexHij',\n",
" 'Education': 'Ed',\n", " 'Education': 'Ed',\n",
" 'Job_insecurity': 'JobIn',\n", " 'Job_insecurity': 'JobIn',\n",
" 'Housing': 'Hous', \n", " 'Housing': 'Hous', \n",
...@@ -939,35 +948,6 @@ ...@@ -939,35 +948,6 @@
" plt.title(\"\\n\\n\" + plot_title, fontdict={'fontsize': 30, 'fontweight': 'bold'})" " plt.title(\"\\n\\n\" + plot_title, fontdict={'fontsize': 30, 'fontweight': 'bold'})"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"###### Original approach (all categorical mapped to integers)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, axs = plt.subplots(3, 3, figsize=(50, 50))\n",
"plt.subplots_adjust(hspace=0.75, wspace=2)\n",
"\n",
"# Go through possible values for 'Situacion_tratamiento' and 'Group'\n",
"for sit_tto in range(1,4):\n",
" for group in range(1,4):\n",
" plt.subplot(3, 3, 3*(sit_tto-1) + group) # Calculate the subplot position dynamically\n",
" plot_heatmap(sit_tto, group)\n",
" \n",
"# Adjust layout to prevent overlapping titles\n",
"plt.tight_layout()\n",
"\n",
"# Save the figure in SVG format in the \"./EDA_plots\" folder\n",
"plt.savefig('./EDA_plots/heatmaps_original.svg', dpi=550, bbox_inches='tight')"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment