diff --git a/model_selection/cv_metric_gen.py b/model_selection/cv_metric_gen.py index b5f4a08ce77b52554ca4739657d7fb95dfd4ff69..e91a790c2614b1fcee93dca465e73647e7127acf 100644 --- a/model_selection/cv_metric_gen.py +++ b/model_selection/cv_metric_gen.py @@ -15,9 +15,9 @@ from sklearn.neural_network import MLPClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier -from sklearn.model_selection import StratifiedKFold, cross_validate +from sklearn.model_selection import StratifiedKFold from sklearn.metrics import RocCurveDisplay, auc -from sklearn.metrics import PrecisionRecallDisplay, precision_recall_curve +from sklearn.metrics import PrecisionRecallDisplay import matplotlib.pyplot as plt import ast # String to dictionary # -------------------------------------------------------------------------------------------------------- @@ -140,6 +140,21 @@ def negative_recall_scorer(clf, X, y): cm = confusion_matrix(y, y_pred) TN_prop = cm[0,0]/(cm[0,1]+cm[0,0]) return TN_prop + +# Custom scorers for AUROC and AUPRC +def AUROC_scorer(clf, X, y): + if hasattr(clf, "decision_function"): + y_score = clf.decision_function(X) + else: + y_score = clf.predict_proba(X)[:, 1] + return roc_auc_score(y, y_score) + +def AUPRC_scorer(clf, X, y): + if hasattr(clf, "decision_function"): + y_score = clf.decision_function(X) + else: + y_score = clf.predict_proba(X)[:, 1] + return average_precision_score(y, y_score) # -------------------------------------------------------------------------------------------------------- if __name__ == "__main__": @@ -159,8 +174,8 @@ if __name__ == "__main__": 'FN':FN_scorer, 'FP':FP_scorer, 'TP':TP_scorer, - 'AUROC': make_scorer(roc_auc_score), - 'AUPRC': make_scorer(average_precision_score) + 'AUROC': AUROC_scorer, + 'AUPRC': AUPRC_scorer } method_names = { 0: "ORIG", @@ -207,27 +222,12 @@ if __name__ == "__main__": # Fit the model on the training data model.fit(X_train_fold, y_train_fold) # --------------------- SCORINGS --------------------------- - # Predict on the test data - # Check if the model has a decision_function method - if hasattr(model, "decision_function"): - # Use decision_function to get the continuous scores for each test sample - y_score = model.decision_function(X_test_fold) - else: - # If decision_function is not available, use predict_proba to get probabilities - # predict_proba returns an array with probabilities for all classes - # [:, 1] extracts the probability for the positive class (class 1) - y_score = model.predict_proba(X_test_fold)[:, 1] - # Get the predicted class labels for the test data - y_pred = model.predict(X_test_fold) # Calculate and store the scores for each metric for metric_name, scorer in scorings.items(): - if metric_name in ['AUROC', 'AUPRC']: - score = scorer._score_func(y_test_fold, y_score) - else: - score = scorer._score_func(y_test_fold, y_pred) + score = scorer(model, X_test_fold, y_test_fold) fold_scores[metric_name].append(score) # --------------------- END SCORINGS --------------------------- - # --------------------- CURVES --------------------------- + # --------------------- CURVES --------------------------- # Generate ROC curve for the fold roc_display = RocCurveDisplay.from_estimator(model, X_test_fold, y_test_fold, name=f"ROC fold {fold_idx}", alpha=0.6, lw=2,