Corrected computation of mean and sd for hyperparameter tuning

5b97dbfb · Joaquin Torres · a9a1abac · 5b97dbfb · 5b97dbfb
Commit 5b97dbfb authored May 22, 2024 by Joaquin Torres
Showing with 16 additions and 19 deletions

model_selection/hyperparam_tuning.py model_selection/hyperparam_tuning.py +16 -19

model_selection/output_hyperparam/hyperparamers.xlsx model_selection/output_hyperparam/hyperparamers.xlsx +0 -0

No files found.
--- a/model_selection/hyperparam_tuning.py
+++ b/model_selection/hyperparam_tuning.py
@@ -73,13 +73,13 @@ if __name__ == "__main__":
    # --------------------------------------------------------------------------------------------------------
    # 1. No class weight
    models_simple = {"DT" : DecisionTreeClassifier(), 
-            "RF" : RandomForestClassifier(), 
+            # "RF" : RandomForestClassifier(), 
-            "Bagging" : BaggingClassifier(),
+            # "Bagging" : BaggingClassifier(),
-            "AB" : AdaBoostClassifier(algorithm='SAMME'), 
+            # "AB" : AdaBoostClassifier(algorithm='SAMME'), 
-            "XGB": XGBClassifier(),
+            # "XGB": XGBClassifier(),
-            "LR" : LogisticRegression(max_iter=1000), 
+            # "LR" : LogisticRegression(max_iter=1000), 
-            "SVM" : SVC(probability=True, max_iter=1000), 
+            # "SVM" : SVC(probability=True, max_iter=1000), 
-            "MLP" : MLPClassifier(max_iter=500)
+            # "MLP" : MLPClassifier(max_iter=500)
            # "ElNet" : LogisticRegression(max_iter=1000, penalty='elasticnet')
            }
@@ -141,15 +141,15 @@ if __name__ == "__main__":
    # --------------------------------------------------------------------------------------------------------
    # Store each df as a sheet in an excel file
    sheets_dict = {}
-    for i, group in enumerate(['pre', 'post']):
+    for i, group in enumerate(['pre']): #['pre', 'post']
-        for j, method in enumerate(['', '', 'over_', 'under_']):
+        for j, method in enumerate(['under_']): #['', '', 'over_', 'under_']
            # Get dataset based on group and method
            X = data_dic['X_train_' + method + group]
            y = data_dic['y_train_' + method + group]
            # Use group of models with class weight if needed
            models = models_CS if j == 1 else models_simple 
-            # Save results: params and best score for each of the mdodels of this method and group
+            # Save results: set of optimal hyperpameters -> mean precision and sd for those parameters across folds
-            hyperparam_df = pd.DataFrame(index=list(models.keys()), columns=['Best Parameters','Best Precision', 'Mean Precision', 'SD'])
+            hyperparam_df = pd.DataFrame(index=list(models.keys()), columns=['Best Parameters','Mean Precision', 'SD'])
            for model_name, model in models.items():
                print(f"{group}-{method_names[j]}-{model_name}")
                # Find optimal hyperparams for curr model
@@ -158,18 +158,15 @@ if __name__ == "__main__":
                search.fit(X,y)
                # Access the results
                results = search.cv_results_
-                # Best parameters and best score directly accessible
-                hyperparam_df.at[model_name, 'Best Parameters'] = search.best_params_
-                hyperparam_df.at[model_name, 'Best Precision'] = round(search.best_score_, 4)
-                # Finding the index for the best set of parameters
                best_index = search.best_index_
-                # Accessing the mean and std of the test score specifically for the best parameters
+                # Get sd and mean across folds for best set of hyperpameters
+                best_params = search.best_params_
                mean_precision_best = results['mean_test_score'][best_index]
                std_precision_best = results['std_test_score'][best_index]
                # Storing these values
-                hyperparam_df.at[model_name, 'Mean Precision'] = mean_precision_best
+                hyperparam_df.at[model_name, 'Best Parameters'] = best_params
-                hyperparam_df.at[model_name, 'SD'] = std_precision_best
+                hyperparam_df.at[model_name, 'Mean Precision'] = round(mean_precision_best, 4)
+                hyperparam_df.at[model_name, 'SD'] = round(std_precision_best, 4)
            # Store the DataFrame in the dictionary with a unique key for each sheet
            sheet_name = f"{group}_{method_names[j]}"
            sheets_dict[sheet_name] = hyperparam_df

--- a/model_selection/output_hyperparam/hyperparamers.xlsx
+++ b/model_selection/output_hyperparam/hyperparamers.xlsx