ready to implement the PR curves

9a51e5c3 · Joaquin Torres · 9fa990e0 · 9a51e5c3
Commit 9a51e5c3 authored May 23, 2024 by Joaquin Torres
Show whitespace changes
Inline Side-by-side

Showing with 38 additions and 39 deletions

model_selection/cv_metric_gen.py model_selection/cv_metric_gen.py +38 -39

No files found.
--- a/model_selection/cv_metric_gen.py
+++ b/model_selection/cv_metric_gen.py
@@ -177,7 +177,6 @@ if __name__ == "__main__":
    scores_sheets = {} # To store score dfs as sheets in the same excel file
    for i, group in enumerate(['pre']): # 'post'
        for j, method in enumerate(['']): # '', 'over_', 'under_'
-            # print(f"{group}-{method_names[j]}")
            # Get train dataset based on group and method
            X_train = data_dic['X_train_' + method + group]
            y_train = data_dic['y_train_' + method + group]
@@ -191,18 +190,17 @@ if __name__ == "__main__":
                axes = [axes]
            # Metric generation for each model
            for model_idx, (model_name, model) in enumerate(models.items()):
-                if model_name == 'DT':
                print(f"{group}-{method_names[j]}-{model_name}")
                # Retrieve cv scores for our metrics of interest
                scores = cross_validate(model, X_train, y_train, scoring=scorings, cv=cv, return_train_score=True, n_jobs=10)
                # Save results of each fold
                for metric_name in scorings.keys():
                    scores_df.loc[model_name + f'_{metric_name}']=list(np.around(np.array(scores[f"test_{metric_name}"]),4)) 
-                    # Generate ROC curves
+                # ---------- Generate ROC curves ----------
                mean_fpr = np.linspace(0, 1, 100) 
                tprs, aucs = [], []
-                    cmap = plt.get_cmap('tab10')  # Colormap for stronger colors
-                    # Loop through each fold in the cross-validation
+                cmap = plt.get_cmap('tab10')  # Colormap
+                # Loop through each fold in the cross-validation (redoing cv for simplicity)
                for fold_idx, (train, test) in enumerate(cv.split(X_train, y_train)):
                    # Fit the model on the training data
                    model.fit(X_train[train], y_train[train])
@@ -229,6 +227,7 @@ if __name__ == "__main__":
                axes[model_idx].set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],
                                    title=f"ROC Curve - {model_name} ({group}-{method_names[j]})")
                axes[model_idx].legend(loc="lower right")
+                # ---------- END ROC curves Generation ----------
            # Store the DataFrame in the dictionary with a unique key for each sheet
            sheet_name = f"{group}_{method_names[j]}"
            scores_sheets[sheet_name] = scores_df