Testing metric generation

44116618 · Joaquin Torres · 97658355 · 44116618 · 44116618
Commit 44116618 authored May 22, 2024 by Joaquin Torres
Show whitespace changes
Inline Side-by-side

Showing with 10 additions and 9 deletions

model_selection/cv_metric_gen.py model_selection/cv_metric_gen.py +10 -9

model_selection/output_cv_metrics.xlsx model_selection/output_cv_metrics.xlsx +0 -0

No files found.
--- a/model_selection/cv_metric_gen.py
+++ b/model_selection/cv_metric_gen.py
@@ -81,9 +81,9 @@ def get_tuned_models(group_str, method_str):
    tuned_models = {}
    # Iterate through each row of the DataFrame
    for _, row in tuned_models_df.iterrows():
-        model_name = row[0]
+        model_name = row.iloc[0]
        # Read dictionary
-        parameters = ast.literal_eval(row['Parameters'])
+        parameters = ast.literal_eval(row['Best Parameters'])
        # Add extra parameters 
        if model_name == 'AB':
            parameters['algorithm'] = 'SAMME'
@@ -177,7 +177,7 @@ if __name__ == "__main__":
    scores_sheets = {} # To store score dfs as sheets in the same excel file
    for i, group in enumerate(['pre', 'post']):
        for j, method in enumerate(['', '', 'over_', 'under_']):
-            print(f"{group}-{method_names[j]}")
+            # print(f"{group}-{method_names[j]}")
            # Get train dataset based on group and method
            X_train = data_dic['X_train_' + method + group]
            y_train = data_dic['y_train_' + method + group]
@@ -188,12 +188,13 @@ if __name__ == "__main__":
            scores_df = pd.DataFrame(columns=range(1,11), index=[f"{model_name}_{metric_name}" for model_name in models.keys() for metric_name in scorings.keys()])
            # Metric generation for each model
            for model_name, model in models.items():
+                if model_name == 'DT':
+                    print(f"{group}-{method_names[j]}-{model_name}")
                    # Retrieve cv scores for our metrics of interest
                    scores = cross_validate(model, X_train, y_train, scoring=scorings, cv=cv, return_train_score=True, n_jobs=10)
                    # Save results of each fold
-                scores_df.loc[model_name +'_F1']=list(np.around(np.array(scores["test_f1"]),4)) 
-                scores_df.loc[model_name +'_PREC']=list(np.around(np.array(scores["test_precision"]),4))
-                scores_df.loc[model_name +'_REC']=list(np.around(np.array(scores["test_recall"]),4))
+                    for metric_name in scorings.keys():
+                        scores_df.loc[model_name + f'_{metric_name}']=list(np.around(np.array(scores[f"test_{metric_name}"]),4)) 
            # Store the DataFrame in the dictionary with a unique key for each sheet
            sheet_name = f"{group}_{method_names[j]}"
            scores_sheets[sheet_name] = scores_df

--- a/model_selection/output_cv_metrics.xlsx
+++ b/model_selection/output_cv_metrics.xlsx