test_models.py 13.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
"""
    Evaluating optimized models with test data
"""

# Libraries
# --------------------------------------------------------------------------------------------------------
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix
11
from sklearn.metrics import f1_score, make_scorer, precision_score, recall_score, accuracy_score, roc_auc_score, average_precision_score
12 13 14 15 16
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.linear_model import  LogisticRegression
from sklearn.tree import DecisionTreeClassifier
17 18 19
from sklearn.metrics import RocCurveDisplay, roc_curve
from sklearn.metrics import PrecisionRecallDisplay, precision_recall_curve
import matplotlib.pyplot as plt
20
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
21
import ast # String to dictionary
Joaquin Torres's avatar
Joaquin Torres committed
22 23
import seaborn as sns
from mpl_toolkits.axes_grid1 import make_axes_locatable 
24 25
# --------------------------------------------------------------------------------------------------------

26
# Reading data
27
# --------------------------------------------------------------------------------------------------------
28
def read_data():
29 30 31 32 33 34
    # Load test data
    X_test_pre = np.load('../gen_train_data/data/output/pre/X_test_pre.npy', allow_pickle=True)
    y_test_pre = np.load('../gen_train_data/data/output/pre/y_test_pre.npy', allow_pickle=True)
    X_test_post = np.load('../gen_train_data/data/output/post/X_test_post.npy', allow_pickle=True)
    y_test_post = np.load('../gen_train_data/data/output/post/y_test_post.npy', allow_pickle=True)

Joaquin Torres's avatar
Joaquin Torres committed
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
    # Load ORIGINAL training data
    X_train_pre = np.load('../gen_train_data/data/output/pre/X_train_pre.npy', allow_pickle=True)
    y_train_pre = np.load('../gen_train_data/data/output/pre/y_train_pre.npy', allow_pickle=True)
    X_train_post = np.load('../gen_train_data/data/output/post/X_train_post.npy', allow_pickle=True)
    y_train_post = np.load('../gen_train_data/data/output/post/y_train_post.npy', allow_pickle=True)

    # Load oversampled training data
    X_train_over_pre = np.load('../gen_train_data/data/output/pre/X_train_over_pre.npy', allow_pickle=True)
    y_train_over_pre = np.load('../gen_train_data/data/output/pre/y_train_over_pre.npy', allow_pickle=True)
    X_train_over_post = np.load('../gen_train_data/data/output/post/X_train_over_post.npy', allow_pickle=True)
    y_train_over_post = np.load('../gen_train_data/data/output/post/y_train_over_post.npy', allow_pickle=True)

    # Load undersampled training data
    X_train_under_pre = np.load('../gen_train_data/data/output/pre/X_train_under_pre.npy', allow_pickle=True)
    y_train_under_pre = np.load('../gen_train_data/data/output/pre/y_train_under_pre.npy', allow_pickle=True)
    X_train_under_post = np.load('../gen_train_data/data/output/post/X_train_under_post.npy', allow_pickle=True)
    y_train_under_post = np.load('../gen_train_data/data/output/post/y_train_under_post.npy', allow_pickle=True)

53 54 55 56 57
    data_dic = {
        "X_test_pre": X_test_pre,
        "y_test_pre": y_test_pre,
        "X_test_post": X_test_post,
        "y_test_post": y_test_post,
Joaquin Torres's avatar
Joaquin Torres committed
58 59 60 61 62 63 64 65 66 67 68 69
        "X_train_pre": X_train_pre,
        "y_train_pre": y_train_pre,
        "X_train_post": X_train_post,
        "y_train_post": y_train_post,
        "X_train_over_pre": X_train_over_pre,
        "y_train_over_pre": y_train_over_pre,
        "X_train_over_post": X_train_over_post,
        "y_train_over_post": y_train_over_post,
        "X_train_under_pre": X_train_under_pre,
        "y_train_under_pre": y_train_under_pre,
        "X_train_under_post": X_train_under_post,
        "y_train_under_post": y_train_under_post,
70 71 72 73 74 75 76
    }

    return data_dic
# --------------------------------------------------------------------------------------------------------

# Returning tuned models for each situation
# --------------------------------------------------------------------------------------------------------
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
def get_tuned_models(group_str, method_str):

    # Read sheet corresponding to group and method with tuned models and their hyperparam
    tuned_models_df = pd.read_excel("./output_hyperparam/hyperparamers.xlsx",sheet_name=f"{group_str}_{method_str}")
    # Mapping from model abbreviations to sklearn model classes
    model_mapping = {
        'DT': DecisionTreeClassifier,
        'RF': RandomForestClassifier,
        'Bagging': BaggingClassifier,
        'AB': AdaBoostClassifier,
        'XGB': XGBClassifier,
        'LR': LogisticRegression,
        'SVM': SVC,
        'MLP': MLPClassifier
    }
    tuned_models = {}
    # Iterate through each row of the DataFrame
94 95
    for _, row in tuned_models_df.iterrows():
        model_name = row.iloc[0]
96
        # Read dictionary
97
        parameters = ast.literal_eval(row['Best Parameters'])
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
        # Add extra parameters 
        if model_name == 'AB':
            parameters['algorithm'] = 'SAMME'
        elif model_name == 'LR':
            parameters['max_iter'] = 1000
        elif model_name == 'SVM':
            parameters['max_iter'] = 1000
            parameters['probability'] = True
        elif model_name == "MLP":
            parameters['max_iter'] = 500
        # Add class_weight argument for cost-sensitive learning method
        if 'CW' in method_str:
            if model_name == 'Bagging' or model_name == 'AB':
                parameters['estimator'] = DecisionTreeClassifier(class_weight='balanced')
            else:
                parameters['class_weight'] = 'balanced'
        # Fetch class
        model_class = model_mapping[model_name]
        # Initialize model
        tuned_models[model_name] = model_class(**parameters)
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
    return tuned_models
# --------------------------------------------------------------------------------------------------------

# Scorers
# --------------------------------------------------------------------------------------------------------
def TN_scorer(clf, X, y):
    """Gives the number of samples predicted as true negatives"""
    y_pred = clf.predict(X)
    cm = confusion_matrix(y, y_pred)
    TN = cm[0,0]
    return TN
def FN_scorer(clf, X, y):
    """Gives the number of samples predicted as false negatives"""
    y_pred = clf.predict(X)
    cm = confusion_matrix(y, y_pred)
    FN = cm[0,1]
    return FN
def FP_scorer(clf, X, y):
    """Gives the number of samples predicted as false positive"""
    y_pred = clf.predict(X)
    cm = confusion_matrix(y, y_pred)
    FP = cm[1,0]
    return FP
def TP_scorer(clf, X, y):
    """Gives the number of samples predicted as true positive"""
    y_pred = clf.predict(X)
    cm = confusion_matrix(y, y_pred)
    TP = cm[1,1]
    return TP

def negative_recall_scorer(clf, X, y):
    """Gives the negative recall defined as the (number of true_negative_samples)/(total number of negative samples)"""
    y_pred = clf.predict(X)
    cm = confusion_matrix(y, y_pred)
    TN_prop = cm[0,0]/(cm[0,1]+cm[0,0])
    return TN_prop
Joaquin Torres's avatar
Joaquin Torres committed
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168

# Custom scorers for AUROC and AUPRC
def AUROC_scorer(clf, X, y):
    if hasattr(clf, "decision_function"):
        y_score = clf.decision_function(X)
    else:
        y_score = clf.predict_proba(X)[:, 1]
    return roc_auc_score(y, y_score)

def AUPRC_scorer(clf, X, y):
    if hasattr(clf, "decision_function"):
        y_score = clf.decision_function(X)
    else:
        y_score = clf.predict_proba(X)[:, 1]
    return average_precision_score(y, y_score)
169 170 171
# --------------------------------------------------------------------------------------------------------

if __name__ == "__main__":
172 173
    # Reading data
    data_dic = read_data()
174 175 176 177 178 179 180 181 182 183 184 185 186

    # Setup
    # --------------------------------------------------------------------------------------------------------
    # Scorings to use for model evaluation
    scorings = {
        'F1':make_scorer(f1_score), 
        'NREC': negative_recall_scorer, 
        'REC':make_scorer(recall_score), 
        'PREC':make_scorer(precision_score), 
        'ACC': make_scorer(accuracy_score),
        'TN':TN_scorer, 
        'FN':FN_scorer, 
        'FP':FP_scorer, 
187
        'TP':TP_scorer,
Joaquin Torres's avatar
Joaquin Torres committed
188 189
        'AUROC': AUROC_scorer,
        'AUPRC': AUPRC_scorer
190
        } 
191 192 193 194 195 196 197 198 199 200 201 202 203
    method_names = {
        0: "ORIG",
        1: "ORIG_CW",
        2: "OVER",
        3: "UNDER"
    }
    # --------------------------------------------------------------------------------------------------------

    # Evaluating performance using test dataset
    # --------------------------------------------------------------------------------------------------------
    scores_sheets = {} # To store score dfs as sheets in the same excel file
    for i, group in enumerate(['pre', 'post']):
        # Get test dataset based on group
Joaquin Torres's avatar
Joaquin Torres committed
204 205
        X_test = data_dic['X_test_' + group]
        y_test = data_dic['y_test_' + group]
206
        for j, method in enumerate(['', '', 'over_', 'under_']): 
Joaquin Torres's avatar
Joaquin Torres committed
207 208 209
            # Get train dataset based on group and method
            X_train = data_dic['X_train_' + method + group]
            y_train = data_dic['y_train_' + method + group]
210
            # Get tuned models for this group and method
211
            models = get_tuned_models(group, method_names[j])
212 213
            # Scores df
            scores_df = pd.DataFrame(index=models.keys(), columns=scorings.keys())
214
            # Create a figure for all models in this group-method
215
            fig, axes = plt.subplots(len(models), 3, figsize=(10, 8 * len(models)))
216
            # Evaluate each model with test dataset
217
            for model_idx, (model_name, model) in enumerate(models.items()):
218 219
                print(f"{group}-{method_names[j]}-{model_name}")
                # Fit the model on the training data
Joaquin Torres's avatar
Joaquin Torres committed
220
                model.fit(X_train, y_train)
221 222 223 224 225 226 227 228
                # --------------------- SCORINGS ---------------------------
                # Calculate and store the scores for each metric
                for metric_name, scorer in scorings.items():
                    score = scorer(model, X_test, y_test)
                    scores_df.at[model_name, metric_name] = round(score, 4)
                # -----------------------------------------------------------
                # --------------------- PLOTS ---------------------------
                # Check if the model has a decision_function method
Joaquin Torres's avatar
Joaquin Torres committed
229
                if hasattr(model, "decision_function"):
230
                    # Use the decision function to get scores
Joaquin Torres's avatar
Joaquin Torres committed
231 232
                    y_score = model.decision_function(X_test)
                else:
233 234
                    # Otherwise, use the probability estimates and take the probability of the positive class
                    y_score = model.predict_proba(X_test)[:, 1]
Joaquin Torres's avatar
Joaquin Torres committed
235 236
                # Calculate ROC curve and ROC area for each class
                fpr, tpr, _ = roc_curve(y_test, y_score, pos_label=model.classes_[1])
237 238 239 240 241
                # Plot the ROC curve with thicker line
                roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr)
                roc_display.plot(ax=axes[model_idx][0], lw=2)
                # Plot the diagonal line for the ROC curve
                axes[model_idx][0].plot([0, 1], [0, 1], 'k--', lw=2, label='Random Classifier')
Joaquin Torres's avatar
Joaquin Torres committed
242
                axes[model_idx][0].set_title(f'ROC Curve for {group}-{method_names[j]}-{model_name}')
243 244 245
                axes[model_idx][0].set_xlabel('False Positive Rate')
                axes[model_idx][0].set_ylabel('True Positive Rate')
                axes[model_idx][0].legend(loc='lower right')
Joaquin Torres's avatar
Joaquin Torres committed
246 247
                # Calculate precision-recall curve
                precision, recall, _ = precision_recall_curve(y_test, y_score, pos_label=model.classes_[1])
248 249 250 251 252 253
                # Plot the precision-recall curve with thicker line
                pr_display = PrecisionRecallDisplay(precision=precision, recall=recall)
                pr_display.plot(ax=axes[model_idx][1], lw=2)
                # Plot the baseline for the PR curve
                no_skill = len(y_test[y_test == 1]) / len(y_test)
                axes[model_idx][1].plot([0, 1], [no_skill, no_skill], 'k--', lw=2, label='No Skill')
Joaquin Torres's avatar
Joaquin Torres committed
254
                axes[model_idx][1].set_title(f'PR Curve for {group}-{method_names[j]}-{model_name}')
255 256 257 258
                axes[model_idx][1].set_xlabel('Recall')
                axes[model_idx][1].set_ylabel('Precision')
                axes[model_idx][1].legend(loc='lower left')
                # Predict the test data to get confusion matrix
259
                y_pred = model.predict(X_test)
260
                # Compute confusion matrix
261
                cm = confusion_matrix(y_test, y_pred)
262
                # Plot the confusion matrix
Joaquin Torres's avatar
Joaquin Torres committed
263 264 265 266 267 268 269 270 271
                cmp = ConfusionMatrixDisplay(cm)
                # Deactivate default colorbar
                cmp.plot(ax=axes[model_idx][2], colorbar=False, cmap=sns.color_palette("light:b", as_cmap=True))

                # Adding custom colorbar using make_axes_locatable
                divider = make_axes_locatable(axes[model_idx][2])
                cax = divider.append_axes("right", size="5%", pad=0.05)
                plt.colorbar(cmp.im_, cax=cax)

Joaquin Torres's avatar
Joaquin Torres committed
272
                axes[model_idx][2].set_title(f'CM for {group}-{method_names[j]}-{model_name}')
Joaquin Torres's avatar
Joaquin Torres committed
273 274
                axes[model_idx][2].set_xlabel('Predicted label')
                axes[model_idx][2].set_ylabel('True label')
275
                # ----------------------------------------------------------
276 277
            # Adjust layout and save/show figure
            plt.tight_layout()
278
            plt.savefig(f'./output_test/plots/{group}_{method_names[j]}.svg', format='svg', dpi=500)
279
            plt.close(fig)
280 281 282 283
            # Store the DataFrame in the dictionary with a unique key for each sheet
            sheet_name = f"{group}_{method_names[j]}"
            scores_sheets[sheet_name] = scores_df
    # Write results to Excel file
284
    with pd.ExcelWriter('./output_test/testing_tuned_models.xlsx') as writer:
285 286
        for sheet_name, data in scores_sheets.items():
            data.to_excel(writer, sheet_name=sheet_name)
287 288
    print("Successful evaluation with test dataset")
# --------------------------------------------------------------------------------------------------------