Completed comments

28db5fbd · Joaquin Torres · 6d590283 · 28db5fbd
Commit 28db5fbd authored Jul 08, 2024 by Joaquin Torres
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 5 deletions

model_selection/cv_metric_distr.py model_selection/cv_metric_distr.py +8 -5

No files found.
--- a/model_selection/cv_metric_distr.py
+++ b/model_selection/cv_metric_distr.py
+# CV Metrics Distribution
+# Author: Joaquín Torres Bravo
 """
-    Plotting the distribution of the metrics obtained from cv via boxplots.
+    Plotting the distribution of the metrics obtained from CV via boxplots.
 """

 # Libraries
 # --------------------------------------------------------------------------------------------------------
 import pandas as pd
-import matplotlib.pyplot as plt  # Corrected import
+import matplotlib.pyplot as plt
 # --------------------------------------------------------------------------------------------------------

 if __name__ == "__main__":

    metric_names = ['F1', 'PREC', 'REC', 'ACC', 'NREC', 'TN', 'FN', 'FP', 'TP', 'AUROC', 'AUPRC']
    model_names_simple = ['DT', 'RF', 'Bagging', 'AB', 'XGB', 'LR', 'SVM', 'MLP']
-    model_names_cs = ['DT', 'RF', 'Bagging', 'AB', 'LR', 'SVM']
+    model_names_cs = ['DT', 'RF', 'Bagging', 'AB', 'LR', 'SVM'] # Cost-sensitive learning

    # Distribution of cv metrics
    # --------------------------------------------------------------------------------------------------------
    for group in ['pre', 'post']:
        for method in ['_ORIG', '_ORIG_CW', '_OVER', '_UNDER']:
-            # Read current sheet as df
+            # Read CV metrics sheet for current group and method
            df = pd.read_excel('./output_cv_metrics/metrics.xlsx', sheet_name=group+method)
            # Model names based on cost-senstive training or not
            if method == '_ORIG_CW':
@@ -27,6 +29,7 @@ if __name__ == "__main__":
                model_names = model_names_simple
            # Create figure for current sheet, one row per metric
            fig, axes = plt.subplots(len(metric_names), 1, figsize=(10, 10 * len(metric_names)))
+            # For each metric
            for metric_id, metric_name in enumerate(metric_names):
                # Get the axis for the current metric
                ax = axes[metric_id]
@@ -45,7 +48,7 @@ if __name__ == "__main__":
                    ax.set_ylim(0, 1)
            plt.tight_layout()
            fig.savefig(f'./output/cv_metrics/distributions/{group}{method}.svg', format='svg', dpi=600)
-            plt.close(fig)  # Close the figure to free up memory
+            plt.close(fig)

    print("Succesful distribution plots generation")
    # --------------------------------------------------------------------------------------------------------
\ No newline at end of file