From 28db5fbd85a92d962d5dcd048594fa988a8466a8 Mon Sep 17 00:00:00 2001
From: joaquintb <joaquintobrw@gmail.com>
Date: Mon, 8 Jul 2024 12:18:06 +0200
Subject: [PATCH] Completed comments

---
 model_selection/cv_metric_distr.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/model_selection/cv_metric_distr.py b/model_selection/cv_metric_distr.py
index 845eb0f..0dfe30d 100644
--- a/model_selection/cv_metric_distr.py
+++ b/model_selection/cv_metric_distr.py
@@ -1,24 +1,26 @@
+# CV Metrics Distribution
+# Author: Joaquín Torres Bravo
 """
-    Plotting the distribution of the metrics obtained from cv via boxplots.
+    Plotting the distribution of the metrics obtained from CV via boxplots.
 """
 
 # Libraries
 # --------------------------------------------------------------------------------------------------------
 import pandas as pd
-import matplotlib.pyplot as plt  # Corrected import
+import matplotlib.pyplot as plt
 # --------------------------------------------------------------------------------------------------------
 
 if __name__ == "__main__":
 
     metric_names = ['F1', 'PREC', 'REC', 'ACC', 'NREC', 'TN', 'FN', 'FP', 'TP', 'AUROC', 'AUPRC']
     model_names_simple = ['DT', 'RF', 'Bagging', 'AB', 'XGB', 'LR', 'SVM', 'MLP']
-    model_names_cs = ['DT', 'RF', 'Bagging', 'AB', 'LR', 'SVM']
+    model_names_cs = ['DT', 'RF', 'Bagging', 'AB', 'LR', 'SVM'] # Cost-sensitive learning
 
     # Distribution of cv metrics
     # --------------------------------------------------------------------------------------------------------
     for group in ['pre', 'post']:
         for method in ['_ORIG', '_ORIG_CW', '_OVER', '_UNDER']:
-            # Read current sheet as df
+            # Read CV metrics sheet for current group and method
             df = pd.read_excel('./output_cv_metrics/metrics.xlsx', sheet_name=group+method)
             # Model names based on cost-senstive training or not
             if method == '_ORIG_CW':
@@ -27,6 +29,7 @@ if __name__ == "__main__":
                 model_names = model_names_simple
             # Create figure for current sheet, one row per metric
             fig, axes = plt.subplots(len(metric_names), 1, figsize=(10, 10 * len(metric_names)))
+            # For each metric
             for metric_id, metric_name in enumerate(metric_names):
                 # Get the axis for the current metric
                 ax = axes[metric_id]
@@ -45,7 +48,7 @@ if __name__ == "__main__":
                     ax.set_ylim(0, 1)
             plt.tight_layout()
             fig.savefig(f'./output/cv_metrics/distributions/{group}{method}.svg', format='svg', dpi=600)
-            plt.close(fig)  # Close the figure to free up memory
+            plt.close(fig)
 
     print("Succesful distribution plots generation")
     # --------------------------------------------------------------------------------------------------------
\ No newline at end of file
-- 
2.24.1