Commit 28db5fbd authored by Joaquin Torres's avatar Joaquin Torres

Completed comments

parent 6d590283
# CV Metrics Distribution
# Author: Joaquín Torres Bravo
""" """
Plotting the distribution of the metrics obtained from cv via boxplots. Plotting the distribution of the metrics obtained from CV via boxplots.
""" """
# Libraries # Libraries
# -------------------------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------------------------
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt # Corrected import import matplotlib.pyplot as plt
# -------------------------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------------------------
if __name__ == "__main__": if __name__ == "__main__":
metric_names = ['F1', 'PREC', 'REC', 'ACC', 'NREC', 'TN', 'FN', 'FP', 'TP', 'AUROC', 'AUPRC'] metric_names = ['F1', 'PREC', 'REC', 'ACC', 'NREC', 'TN', 'FN', 'FP', 'TP', 'AUROC', 'AUPRC']
model_names_simple = ['DT', 'RF', 'Bagging', 'AB', 'XGB', 'LR', 'SVM', 'MLP'] model_names_simple = ['DT', 'RF', 'Bagging', 'AB', 'XGB', 'LR', 'SVM', 'MLP']
model_names_cs = ['DT', 'RF', 'Bagging', 'AB', 'LR', 'SVM'] model_names_cs = ['DT', 'RF', 'Bagging', 'AB', 'LR', 'SVM'] # Cost-sensitive learning
# Distribution of cv metrics # Distribution of cv metrics
# -------------------------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------------------------
for group in ['pre', 'post']: for group in ['pre', 'post']:
for method in ['_ORIG', '_ORIG_CW', '_OVER', '_UNDER']: for method in ['_ORIG', '_ORIG_CW', '_OVER', '_UNDER']:
# Read current sheet as df # Read CV metrics sheet for current group and method
df = pd.read_excel('./output_cv_metrics/metrics.xlsx', sheet_name=group+method) df = pd.read_excel('./output_cv_metrics/metrics.xlsx', sheet_name=group+method)
# Model names based on cost-senstive training or not # Model names based on cost-senstive training or not
if method == '_ORIG_CW': if method == '_ORIG_CW':
...@@ -27,6 +29,7 @@ if __name__ == "__main__": ...@@ -27,6 +29,7 @@ if __name__ == "__main__":
model_names = model_names_simple model_names = model_names_simple
# Create figure for current sheet, one row per metric # Create figure for current sheet, one row per metric
fig, axes = plt.subplots(len(metric_names), 1, figsize=(10, 10 * len(metric_names))) fig, axes = plt.subplots(len(metric_names), 1, figsize=(10, 10 * len(metric_names)))
# For each metric
for metric_id, metric_name in enumerate(metric_names): for metric_id, metric_name in enumerate(metric_names):
# Get the axis for the current metric # Get the axis for the current metric
ax = axes[metric_id] ax = axes[metric_id]
...@@ -45,7 +48,7 @@ if __name__ == "__main__": ...@@ -45,7 +48,7 @@ if __name__ == "__main__":
ax.set_ylim(0, 1) ax.set_ylim(0, 1)
plt.tight_layout() plt.tight_layout()
fig.savefig(f'./output/cv_metrics/distributions/{group}{method}.svg', format='svg', dpi=600) fig.savefig(f'./output/cv_metrics/distributions/{group}{method}.svg', format='svg', dpi=600)
plt.close(fig) # Close the figure to free up memory plt.close(fig)
print("Succesful distribution plots generation") print("Succesful distribution plots generation")
# -------------------------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------------------------
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment