diff --git a/model_selection/cv_metric_gen.py b/model_selection/cv_metric_gen.py index 1ba32c8fc27c6d90b5d970b89ea54522ca4ebb3a..cffd6f88bfd920b5221122f53e46f8d550341ab2 100644 --- a/model_selection/cv_metric_gen.py +++ b/model_selection/cv_metric_gen.py @@ -81,9 +81,9 @@ def get_tuned_models(group_str, method_str): tuned_models = {} # Iterate through each row of the DataFrame for _, row in tuned_models_df.iterrows(): - model_name = row[0] + model_name = row.iloc[0] # Read dictionary - parameters = ast.literal_eval(row['Parameters']) + parameters = ast.literal_eval(row['Best Parameters']) # Add extra parameters if model_name == 'AB': parameters['algorithm'] = 'SAMME' @@ -177,7 +177,7 @@ if __name__ == "__main__": scores_sheets = {} # To store score dfs as sheets in the same excel file for i, group in enumerate(['pre', 'post']): for j, method in enumerate(['', '', 'over_', 'under_']): - print(f"{group}-{method_names[j]}") + # print(f"{group}-{method_names[j]}") # Get train dataset based on group and method X_train = data_dic['X_train_' + method + group] y_train = data_dic['y_train_' + method + group] @@ -188,12 +188,13 @@ if __name__ == "__main__": scores_df = pd.DataFrame(columns=range(1,11), index=[f"{model_name}_{metric_name}" for model_name in models.keys() for metric_name in scorings.keys()]) # Metric generation for each model for model_name, model in models.items(): - # Retrieve cv scores for our metrics of interest - scores = cross_validate(model, X_train, y_train, scoring=scorings, cv=cv, return_train_score=True, n_jobs=10) - # Save results of each fold - scores_df.loc[model_name +'_F1']=list(np.around(np.array(scores["test_f1"]),4)) - scores_df.loc[model_name +'_PREC']=list(np.around(np.array(scores["test_precision"]),4)) - scores_df.loc[model_name +'_REC']=list(np.around(np.array(scores["test_recall"]),4)) + if model_name == 'DT': + print(f"{group}-{method_names[j]}-{model_name}") + # Retrieve cv scores for our metrics of interest + scores = cross_validate(model, X_train, y_train, scoring=scorings, cv=cv, return_train_score=True, n_jobs=10) + # Save results of each fold + for metric_name in scorings.keys(): + scores_df.loc[model_name + f'_{metric_name}']=list(np.around(np.array(scores[f"test_{metric_name}"]),4)) # Store the DataFrame in the dictionary with a unique key for each sheet sheet_name = f"{group}_{method_names[j]}" scores_sheets[sheet_name] = scores_df diff --git a/model_selection/output_cv_metrics.xlsx b/model_selection/output_cv_metrics.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..41849ebdca3a33114e62b3092daf8ca9f3f42c05 Binary files /dev/null and b/model_selection/output_cv_metrics.xlsx differ