# CV Metrics Distribution # Author: JoaquĆ­n Torres Bravo """ Plotting the distribution of the metrics obtained from CV via boxplots. """ # Libraries # -------------------------------------------------------------------------------------------------------- import pandas as pd import matplotlib.pyplot as plt # -------------------------------------------------------------------------------------------------------- if __name__ == "__main__": metric_names = ['F1', 'PREC', 'REC', 'ACC', 'NREC', 'TN', 'FN', 'FP', 'TP', 'AUROC', 'AUPRC'] model_names_simple = ['DT', 'RF', 'Bagging', 'AB', 'XGB', 'LR', 'SVM', 'MLP'] model_names_cs = ['DT', 'RF', 'Bagging', 'AB', 'LR', 'SVM'] # Cost-sensitive learning # Distribution of cv metrics # -------------------------------------------------------------------------------------------------------- for group in ['pre', 'post']: for method in ['_ORIG', '_ORIG_CW', '_OVER', '_UNDER']: # Read CV metrics sheet for current group and method df = pd.read_excel('./results/cv_metrics/metrics.xlsx', sheet_name=group+method) # Model names based on cost-senstive training or not if method == '_ORIG_CW': model_names = model_names_cs else: model_names = model_names_simple # Create figure for current sheet, one row per metric fig, axes = plt.subplots(len(metric_names), 1, figsize=(10, 10 * len(metric_names))) # For each metric for metric_id, metric_name in enumerate(metric_names): # Get the axis for the current metric ax = axes[metric_id] # List to store the metric array for each model metric_data = [] for model_name in model_names: # Construct the row name row_name = f'{model_name}_{metric_name}' metric_row = df.loc[df['Unnamed: 0'] == row_name].iloc[0, 1:].values metric_data.append(metric_row) # Plot boxplot for the current metric across all models ax.boxplot(metric_data, labels=model_names) ax.set_title(f'{metric_name} for {group}{method}') # Set y-axis limits for metrics that take values in the interval [0, 1] if metric_name in ['F1', 'PREC', 'REC', 'ACC', 'AUROC', 'AUPRC']: ax.set_ylim(0, 1) plt.tight_layout() fig.savefig(f'./results/cv_metrics/distributions/{group}{method}.svg', format='svg', dpi=600) plt.close(fig) print("Succesful distribution plots generation") # --------------------------------------------------------------------------------------------------------