diff --git a/model_selection/hyperparam_tuning.py b/model_selection/hyperparam_tuning.py index ca31f54755e5da977c5d2f20e2cf1d133de94057..fc97f49892f1921ea2b9a00aa0eb8edc1039c9df 100644 --- a/model_selection/hyperparam_tuning.py +++ b/model_selection/hyperparam_tuning.py @@ -73,13 +73,13 @@ if __name__ == "__main__": # -------------------------------------------------------------------------------------------------------- # 1. No class weight models_simple = {"DT" : DecisionTreeClassifier(), - "RF" : RandomForestClassifier(), - "Bagging" : BaggingClassifier(), - "AB" : AdaBoostClassifier(algorithm='SAMME'), - "XGB": XGBClassifier(), - "LR" : LogisticRegression(max_iter=1000), - "SVM" : SVC(probability=True, max_iter=1000), - "MLP" : MLPClassifier(max_iter=500) + # "RF" : RandomForestClassifier(), + # "Bagging" : BaggingClassifier(), + # "AB" : AdaBoostClassifier(algorithm='SAMME'), + # "XGB": XGBClassifier(), + # "LR" : LogisticRegression(max_iter=1000), + # "SVM" : SVC(probability=True, max_iter=1000), + # "MLP" : MLPClassifier(max_iter=500) # "ElNet" : LogisticRegression(max_iter=1000, penalty='elasticnet') } @@ -141,15 +141,15 @@ if __name__ == "__main__": # -------------------------------------------------------------------------------------------------------- # Store each df as a sheet in an excel file sheets_dict = {} - for i, group in enumerate(['pre', 'post']): - for j, method in enumerate(['', '', 'over_', 'under_']): + for i, group in enumerate(['pre']): #['pre', 'post'] + for j, method in enumerate(['under_']): #['', '', 'over_', 'under_'] # Get dataset based on group and method X = data_dic['X_train_' + method + group] y = data_dic['y_train_' + method + group] # Use group of models with class weight if needed models = models_CS if j == 1 else models_simple - # Save results: params and best score for each of the mdodels of this method and group - hyperparam_df = pd.DataFrame(index=list(models.keys()), columns=['Best Parameters','Best Precision', 'Mean Precision', 'SD']) + # Save results: set of optimal hyperpameters -> mean precision and sd for those parameters across folds + hyperparam_df = pd.DataFrame(index=list(models.keys()), columns=['Best Parameters','Mean Precision', 'SD']) for model_name, model in models.items(): print(f"{group}-{method_names[j]}-{model_name}") # Find optimal hyperparams for curr model @@ -158,18 +158,15 @@ if __name__ == "__main__": search.fit(X,y) # Access the results results = search.cv_results_ - # Best parameters and best score directly accessible - hyperparam_df.at[model_name, 'Best Parameters'] = search.best_params_ - hyperparam_df.at[model_name, 'Best Precision'] = round(search.best_score_, 4) - # Finding the index for the best set of parameters best_index = search.best_index_ - # Accessing the mean and std of the test score specifically for the best parameters + # Get sd and mean across folds for best set of hyperpameters + best_params = search.best_params_ mean_precision_best = results['mean_test_score'][best_index] std_precision_best = results['std_test_score'][best_index] # Storing these values - hyperparam_df.at[model_name, 'Mean Precision'] = mean_precision_best - hyperparam_df.at[model_name, 'SD'] = std_precision_best - + hyperparam_df.at[model_name, 'Best Parameters'] = best_params + hyperparam_df.at[model_name, 'Mean Precision'] = round(mean_precision_best, 4) + hyperparam_df.at[model_name, 'SD'] = round(std_precision_best, 4) # Store the DataFrame in the dictionary with a unique key for each sheet sheet_name = f"{group}_{method_names[j]}" sheets_dict[sheet_name] = hyperparam_df diff --git a/model_selection/output_hyperparam/hyperparamers.xlsx b/model_selection/output_hyperparam/hyperparamers.xlsx index 598578e577cf2b89b361f358a0fcc87f12b68552..0f4ba7ea4cbbfa8585dcc2d47f8faffc61ec8ab8 100644 Binary files a/model_selection/output_hyperparam/hyperparamers.xlsx and b/model_selection/output_hyperparam/hyperparamers.xlsx differ