Commit 5b97dbfb authored by Joaquin Torres's avatar Joaquin Torres

Corrected computation of mean and sd for hyperparameter tuning

parent a9a1abac
...@@ -73,13 +73,13 @@ if __name__ == "__main__": ...@@ -73,13 +73,13 @@ if __name__ == "__main__":
# -------------------------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------------------------
# 1. No class weight # 1. No class weight
models_simple = {"DT" : DecisionTreeClassifier(), models_simple = {"DT" : DecisionTreeClassifier(),
"RF" : RandomForestClassifier(), # "RF" : RandomForestClassifier(),
"Bagging" : BaggingClassifier(), # "Bagging" : BaggingClassifier(),
"AB" : AdaBoostClassifier(algorithm='SAMME'), # "AB" : AdaBoostClassifier(algorithm='SAMME'),
"XGB": XGBClassifier(), # "XGB": XGBClassifier(),
"LR" : LogisticRegression(max_iter=1000), # "LR" : LogisticRegression(max_iter=1000),
"SVM" : SVC(probability=True, max_iter=1000), # "SVM" : SVC(probability=True, max_iter=1000),
"MLP" : MLPClassifier(max_iter=500) # "MLP" : MLPClassifier(max_iter=500)
# "ElNet" : LogisticRegression(max_iter=1000, penalty='elasticnet') # "ElNet" : LogisticRegression(max_iter=1000, penalty='elasticnet')
} }
...@@ -141,15 +141,15 @@ if __name__ == "__main__": ...@@ -141,15 +141,15 @@ if __name__ == "__main__":
# -------------------------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------------------------
# Store each df as a sheet in an excel file # Store each df as a sheet in an excel file
sheets_dict = {} sheets_dict = {}
for i, group in enumerate(['pre', 'post']): for i, group in enumerate(['pre']): #['pre', 'post']
for j, method in enumerate(['', '', 'over_', 'under_']): for j, method in enumerate(['under_']): #['', '', 'over_', 'under_']
# Get dataset based on group and method # Get dataset based on group and method
X = data_dic['X_train_' + method + group] X = data_dic['X_train_' + method + group]
y = data_dic['y_train_' + method + group] y = data_dic['y_train_' + method + group]
# Use group of models with class weight if needed # Use group of models with class weight if needed
models = models_CS if j == 1 else models_simple models = models_CS if j == 1 else models_simple
# Save results: params and best score for each of the mdodels of this method and group # Save results: set of optimal hyperpameters -> mean precision and sd for those parameters across folds
hyperparam_df = pd.DataFrame(index=list(models.keys()), columns=['Best Parameters','Best Precision', 'Mean Precision', 'SD']) hyperparam_df = pd.DataFrame(index=list(models.keys()), columns=['Best Parameters','Mean Precision', 'SD'])
for model_name, model in models.items(): for model_name, model in models.items():
print(f"{group}-{method_names[j]}-{model_name}") print(f"{group}-{method_names[j]}-{model_name}")
# Find optimal hyperparams for curr model # Find optimal hyperparams for curr model
...@@ -158,18 +158,15 @@ if __name__ == "__main__": ...@@ -158,18 +158,15 @@ if __name__ == "__main__":
search.fit(X,y) search.fit(X,y)
# Access the results # Access the results
results = search.cv_results_ results = search.cv_results_
# Best parameters and best score directly accessible
hyperparam_df.at[model_name, 'Best Parameters'] = search.best_params_
hyperparam_df.at[model_name, 'Best Precision'] = round(search.best_score_, 4)
# Finding the index for the best set of parameters
best_index = search.best_index_ best_index = search.best_index_
# Accessing the mean and std of the test score specifically for the best parameters # Get sd and mean across folds for best set of hyperpameters
best_params = search.best_params_
mean_precision_best = results['mean_test_score'][best_index] mean_precision_best = results['mean_test_score'][best_index]
std_precision_best = results['std_test_score'][best_index] std_precision_best = results['std_test_score'][best_index]
# Storing these values # Storing these values
hyperparam_df.at[model_name, 'Mean Precision'] = mean_precision_best hyperparam_df.at[model_name, 'Best Parameters'] = best_params
hyperparam_df.at[model_name, 'SD'] = std_precision_best hyperparam_df.at[model_name, 'Mean Precision'] = round(mean_precision_best, 4)
hyperparam_df.at[model_name, 'SD'] = round(std_precision_best, 4)
# Store the DataFrame in the dictionary with a unique key for each sheet # Store the DataFrame in the dictionary with a unique key for each sheet
sheet_name = f"{group}_{method_names[j]}" sheet_name = f"{group}_{method_names[j]}"
sheets_dict[sheet_name] = hyperparam_df sheets_dict[sheet_name] = hyperparam_df
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment