Commit 5b97dbfb authored by Joaquin Torres's avatar Joaquin Torres

Corrected computation of mean and sd for hyperparameter tuning

parent a9a1abac
......@@ -73,13 +73,13 @@ if __name__ == "__main__":
# --------------------------------------------------------------------------------------------------------
# 1. No class weight
models_simple = {"DT" : DecisionTreeClassifier(),
"RF" : RandomForestClassifier(),
"Bagging" : BaggingClassifier(),
"AB" : AdaBoostClassifier(algorithm='SAMME'),
"XGB": XGBClassifier(),
"LR" : LogisticRegression(max_iter=1000),
"SVM" : SVC(probability=True, max_iter=1000),
"MLP" : MLPClassifier(max_iter=500)
# "RF" : RandomForestClassifier(),
# "Bagging" : BaggingClassifier(),
# "AB" : AdaBoostClassifier(algorithm='SAMME'),
# "XGB": XGBClassifier(),
# "LR" : LogisticRegression(max_iter=1000),
# "SVM" : SVC(probability=True, max_iter=1000),
# "MLP" : MLPClassifier(max_iter=500)
# "ElNet" : LogisticRegression(max_iter=1000, penalty='elasticnet')
}
......@@ -141,15 +141,15 @@ if __name__ == "__main__":
# --------------------------------------------------------------------------------------------------------
# Store each df as a sheet in an excel file
sheets_dict = {}
for i, group in enumerate(['pre', 'post']):
for j, method in enumerate(['', '', 'over_', 'under_']):
for i, group in enumerate(['pre']): #['pre', 'post']
for j, method in enumerate(['under_']): #['', '', 'over_', 'under_']
# Get dataset based on group and method
X = data_dic['X_train_' + method + group]
y = data_dic['y_train_' + method + group]
# Use group of models with class weight if needed
models = models_CS if j == 1 else models_simple
# Save results: params and best score for each of the mdodels of this method and group
hyperparam_df = pd.DataFrame(index=list(models.keys()), columns=['Best Parameters','Best Precision', 'Mean Precision', 'SD'])
# Save results: set of optimal hyperpameters -> mean precision and sd for those parameters across folds
hyperparam_df = pd.DataFrame(index=list(models.keys()), columns=['Best Parameters','Mean Precision', 'SD'])
for model_name, model in models.items():
print(f"{group}-{method_names[j]}-{model_name}")
# Find optimal hyperparams for curr model
......@@ -158,18 +158,15 @@ if __name__ == "__main__":
search.fit(X,y)
# Access the results
results = search.cv_results_
# Best parameters and best score directly accessible
hyperparam_df.at[model_name, 'Best Parameters'] = search.best_params_
hyperparam_df.at[model_name, 'Best Precision'] = round(search.best_score_, 4)
# Finding the index for the best set of parameters
best_index = search.best_index_
# Accessing the mean and std of the test score specifically for the best parameters
# Get sd and mean across folds for best set of hyperpameters
best_params = search.best_params_
mean_precision_best = results['mean_test_score'][best_index]
std_precision_best = results['std_test_score'][best_index]
# Storing these values
hyperparam_df.at[model_name, 'Mean Precision'] = mean_precision_best
hyperparam_df.at[model_name, 'SD'] = std_precision_best
hyperparam_df.at[model_name, 'Best Parameters'] = best_params
hyperparam_df.at[model_name, 'Mean Precision'] = round(mean_precision_best, 4)
hyperparam_df.at[model_name, 'SD'] = round(std_precision_best, 4)
# Store the DataFrame in the dictionary with a unique key for each sheet
sheet_name = f"{group}_{method_names[j]}"
sheets_dict[sheet_name] = hyperparam_df
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment