diff --git a/model_selection/hyperparam_tuning.py b/model_selection/hyperparam_tuning.py index e688b7cb4f9d0e3789dcb91374c1faa20e67e281..22d004afff2dada2d85b9a4973fd5bbb97ce50c9 100644 --- a/model_selection/hyperparam_tuning.py +++ b/model_selection/hyperparam_tuning.py @@ -142,18 +142,17 @@ if __name__ == "__main__": # -------------------------------------------------------------------------------------------------------- # Store each df as a sheet in an excel file sheets_dict = {} - for i, group in enumerate(['pre']): - for j, method in enumerate(['under_']): #['', '', 'over_', 'under_'] + for i, group in enumerate(['post']): + for j, method in enumerate(['', '', 'over_', 'under_']): # Get dataset based on group and method X = data_dic['X_train_' + method + group] y = data_dic['y_train_' + method + group] # Use group of models with class weight if needed - # models = models_CS if j == 2 else models_simple - models = models_simple + models = models_CS if j == 2 else models_simple # Save results: params and best score for each of the mdodels of this method and group hyperparam_df = pd.DataFrame(index=list(models.keys()), columns=['Parameters','Score']) for model_name, model in models.items(): - print(f"{group}-{method_names[3]}-{model_name}") + print(f"{group}-{method_names[j]}-{model_name}") # Find optimal hyperparams for curr model params = hyperparameters[model_name] search = RandomizedSearchCV(model, param_distributions=params, cv=cv, n_jobs=8, scoring='precision') @@ -162,11 +161,11 @@ if __name__ == "__main__": hyperparam_df.at[model_name,'Score']=round(search.best_score_,4) # Store the DataFrame in the dictionary with a unique key for each sheet - sheet_name = f"{group}_{method_names[3]}" + sheet_name = f"{group}_{method_names[j]}" sheets_dict[sheet_name] = hyperparam_df # Write results to Excel file - with pd.ExcelWriter('./output/hyperparam_pre_UNDER.xlsx') as writer: + with pd.ExcelWriter('./output/hyperparam_post.xlsx') as writer: for sheet_name, data in sheets_dict.items(): data.to_excel(writer, sheet_name=sheet_name) diff --git a/model_selection/output/hyperparam_post.xlsx b/model_selection/output/hyperparam_post.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..848361b289ff6bd08da83976326ce8e72a6ad4b8 Binary files /dev/null and b/model_selection/output/hyperparam_post.xlsx differ