Commit 94cd0c57 authored by Joaquin Torres's avatar Joaquin Torres

Renaming in process

parent 7a2bcb50
......@@ -48,7 +48,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 139,
"metadata": {},
"outputs": [],
"source": [
......@@ -70,9 +70,28 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 140,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_19584\\2495984927.py:18: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" conj_post['Group'] = 'Post'\n",
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_19584\\2495984927.py:19: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" conj_pre['Group'] = 'Pre'\n"
]
}
],
"source": [
"# Pre-pandemic\n",
"conj_pre = bd[bd['Pandemia_inicio_fin_tratamiento'] == 'Inicio y fin prepandemia']\n",
......@@ -98,9 +117,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 100,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PRE: 22861\n",
"\tALTA: 2792\n",
"\tABANDONO: 20069\n",
"POST: 10677\n",
"\tALTA: 1882\n",
"\tABANDONO: 8795\n"
]
}
],
"source": [
"# Printing size of different datasets\n",
"print(f\"PRE: {len(conj_pre)}\")\n",
......@@ -164,9 +196,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 141,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Live with families or friends' 'live alone' 'live in institutions' '9.0']\n",
"['Live with families or friends' 'live alone' 'live in institutions']\n"
]
}
],
"source": [
"# 9.0 represents unknown according to Variables.docx \n",
"print(bd['Social_inclusion'].unique())\n",
......@@ -178,9 +219,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 142,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['No alterations (first exposure at 11 or more years)'\n",
" 'Alterations (first exposure before 11 years old)' '9']\n",
"['No alterations (first exposure at 11 or more years)'\n",
" 'Alterations (first exposure before 11 years old)']\n"
]
}
],
"source": [
"print(bd['Alterations_early_childhood_develop'].unique())\n",
"mode_alt = bd['Alterations_early_childhood_develop'].mode()[0]\n",
......@@ -190,9 +242,28 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 143,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[NaN, 'Yes', 'No']\n",
"Categories (3, object): [99.0, 'No', 'Yes']\n",
"[NaN, 'Yes', 'No']\n",
"Categories (2, object): ['No', 'Yes']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_19584\\1073322024.py:3: FutureWarning: The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n",
" bd['Risk_stigma'] = bd['Risk_stigma'].replace(99.0, mode_stigma)\n"
]
}
],
"source": [
"print(bd['Risk_stigma'].unique())\n",
"mode_stigma = bd['Risk_stigma'].mode()[0]\n",
......@@ -202,9 +273,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 144,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[nan 0. 1. 2. 3. 4. 5. 8. 10. 6. 11. 12. 9. 7. 99. 14. 15.]\n",
"[nan 0. 1. 2. 3. 4. 5. 8. 10. 6. 11. 12. 9. 7. 14. 15.]\n"
]
}
],
"source": [
"print(bd['NumHijos'].unique())\n",
"mode_hijos = bd['NumHijos'].mode()[0]\n",
......@@ -252,9 +332,44 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 145,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_19584\\3303146707.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" bd['Age'].fillna(age_mode, inplace=True)\n",
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_19584\\3303146707.py:5: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" bd['Años_consumo_droga'].fillna(años_consumo_mode, inplace=True)\n",
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_19584\\3303146707.py:8: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" bd['Risk_stigma'].fillna(risk_stigma_mode, inplace=True)\n",
"C:\\Users\\Joaquín Torres\\AppData\\Local\\Temp\\ipykernel_19584\\3303146707.py:11: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" bd['NumHijos'].fillna(num_hijos_mode, inplace=True)\n"
]
}
],
"source": [
"age_mode = bd['Age'].mode()[0]\n",
"bd['Age'].fillna(age_mode, inplace=True)\n",
......@@ -552,7 +667,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 146,
"metadata": {},
"outputs": [],
"source": [
......@@ -629,7 +744,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
......@@ -643,7 +758,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 128,
"metadata": {},
"outputs": [],
"source": [
......@@ -660,7 +775,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 147,
"metadata": {},
"outputs": [],
"source": [
......@@ -695,7 +810,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 148,
"metadata": {},
"outputs": [],
"source": [
......@@ -742,7 +857,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 149,
"metadata": {},
"outputs": [],
"source": [
......@@ -754,9 +869,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 150,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Ed_Not_Complete_Primary', 'Ed_Primary', 'Ed_Secondary', 'Ed_Secondary_Technical', 'Ed_Tertiary', 'Social_Protection', 'JobIn_Unstable', 'JobIn_Stable', 'JobIn_Unemployed', 'Hous_Institutional', 'Hous_Stable', 'Hous_Unstable', 'Early_Alterations', 'SocInc_Family_Friends', 'SocInc_Alone', 'SocInc_Instit', 'Risk_Stigma', 'Structural_Conflict', 'age', 'Sex', 'Num_Children', 'Smoking', 'Bio_Vulner', 'Opiods_DXCIE', 'Cannabis_DXCIE', 'BZD_DXCIE', 'Cocaine_DXCIE', 'Hallucin_DXCIE', 'Tobacco_DXCIE', 'Freq_1dpw', 'Freq_2-3dpw', 'Freq_4-6dpw', 'Freq_l1dpw', 'Freq_None', 'Freq_Everyday', 'Years_Drug_Use', 'Other_Psychiatric_DX', 'Previous_Treatments', 'Treatment_Adherence']\n"
]
}
],
"source": [
"name_mapping = {\n",
" 'Ed_Not Complete primary school': 'Ed_Not_Complete_Primary',\n",
......@@ -777,7 +900,7 @@
" 'SocInc_live in institutions': 'SocInc_Instit',\n",
" 'Risk_stigma_REDEF': 'Risk_Stigma',\n",
" 'Structural_conflic': 'Structural_Conflict',\n",
" 'Age': 'Age',\n",
" # 'Age': 'Age',\n",
" 'Sex_REDEF': 'Sex',\n",
" 'NumHijos': 'Num_Children',\n",
" 'Smoking_REDEF': 'Smoking',\n",
......@@ -802,13 +925,26 @@
"\n",
"# Update lists of feature names\n",
"corr_cols = [name_mapping[corr_col] for corr_col in corr_cols]\n",
"print(corr_cols)\n",
"soc_vars_enc = [name_mapping[col] for col in soc_vars_enc]\n",
"ind_vars_enc = [name_mapping[col] for col in ind_vars_enc]"
"ind_vars_enc = [name_mapping[col] for col in ind_vars_enc]\n",
"\n",
"bd = bd.rename(columns=name_mapping)"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {},
"outputs": [],
"source": [
"# Create bd with just corr_cols and target\n",
"bd = bd[corr_cols + ['Situacion_tratamiento','Situacion_tratamiento_REDEF', 'Pandemia_inicio_fin_tratamiento']]"
]
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
......@@ -827,7 +963,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 134,
"metadata": {},
"outputs": [],
"source": [
......@@ -857,12 +993,105 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"binary_vars = [col for col in corr_cols if len(bd[col].unique()) == 2] + ['Situacion_tratamiento_REDEF', 'Risk_stigma_REDEF']\n",
"cont_vars = ['Structural_conflic', 'Age', 'NumHijos', 'Años_consumo_droga', 'Adherencia_tto_recalc']"
"execution_count": 135,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['Ed_Not_Complete_Primary', 'Ed_Primary', 'Ed_Secondary',\n",
" 'Ed_Secondary_Technical', 'Ed_Tertiary', 'Social_Protection',\n",
" 'JobIn_Unstable', 'JobIn_Stable', 'JobIn_Unemployed',\n",
" 'Hous_Institutional', 'Hous_Stable', 'Hous_Unstable',\n",
" 'Early_Alterations', 'SocInc_Family_Friends', 'SocInc_Alone',\n",
" 'SocInc_Instit', 'Risk_Stigma', 'Structural_Conflict', 'age', 'Sex',\n",
" 'Sex', 'Num_Children', 'Smoking', 'Smoking', 'Bio_Vulner',\n",
" 'Opiods_DXCIE', 'Cannabis_DXCIE', 'Cannabis_DXCIE', 'BZD_DXCIE',\n",
" 'Cocaine_DXCIE', 'Hallucin_DXCIE', 'Tobacco_DXCIE', 'Freq_1dpw',\n",
" 'Freq_2-3dpw', 'Freq_4-6dpw', 'Freq_l1dpw', 'Freq_None',\n",
" 'Freq_Everyday', 'Years_Drug_Use', 'Other_Psychiatric_DX',\n",
" 'Previous_Treatments', 'Treatment_Adherence', 'Situacion_tratamiento',\n",
" 'Situacion_tratamiento_REDEF', 'Pandemia_inicio_fin_tratamiento'],\n",
" dtype='object')\n"
]
}
],
"source": [
"print(bd.columns)"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ed_Not_Complete_Primary\n",
"2\n",
"Ed_Primary\n",
"2\n",
"Ed_Secondary\n",
"2\n",
"Ed_Secondary_Technical\n",
"2\n",
"Ed_Tertiary\n",
"2\n",
"Social_Protection\n",
"2\n",
"JobIn_Unstable\n",
"2\n",
"JobIn_Stable\n",
"2\n",
"JobIn_Unemployed\n",
"2\n",
"Hous_Institutional\n",
"2\n",
"Hous_Stable\n",
"2\n",
"Hous_Unstable\n",
"2\n",
"Early_Alterations\n",
"2\n",
"SocInc_Family_Friends\n",
"2\n",
"SocInc_Alone\n",
"2\n",
"SocInc_Instit\n",
"2\n",
"Risk_Stigma\n",
"2\n",
"Structural_Conflict\n",
"107\n",
"age\n",
"74\n",
"Sex\n"
]
},
{
"ename": "AttributeError",
"evalue": "'DataFrame' object has no attribute 'unique'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_19584\\340002156.py\u001b[0m in \u001b[0;36m?\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# print(len(bd['Cocaine_DXCIE'].unique()) == 2)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mcol\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcorr_cols\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbd\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munique\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6\u001b[0m \u001b[1;31m#binary_vars = [col for col in corr_cols if len(bd[col].unique()) == 2] + ['Situacion_tratamiento_REDEF', name_mapping['Risk_stigma_REDEF']]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;31m#cont_vars = [name_mapping[col] for col in ['Structural_conflic', 'Age', 'NumHijos', 'Años_consumo_droga', 'Adherencia_tto_recalc']]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\Joaquín Torres\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 6292\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6293\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6294\u001b[0m \u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6295\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 6296\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'unique'"
]
}
],
"source": [
"# print(len(bd['Cocaine_DXCIE'].unique()) == 2)\n",
"\n",
"for col in corr_cols:\n",
" print(col)\n",
" print(len(bd[col].unique()))\n",
"#binary_vars = [col for col in corr_cols if len(bd[col].unique()) == 2] + ['Situacion_tratamiento_REDEF', name_mapping['Risk_stigma_REDEF']]\n",
"#cont_vars = [name_mapping[col] for col in ['Structural_conflic', 'Age', 'NumHijos', 'Años_consumo_droga', 'Adherencia_tto_recalc']]"
]
},
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment