Commit 9a41f53a authored by Joaquin Torres's avatar Joaquin Torres

Simplified scorings computations

parent 283ca8df
......@@ -15,9 +15,9 @@ from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import RocCurveDisplay, auc
from sklearn.metrics import PrecisionRecallDisplay, precision_recall_curve
from sklearn.metrics import PrecisionRecallDisplay
import matplotlib.pyplot as plt
import ast # String to dictionary
# --------------------------------------------------------------------------------------------------------
......@@ -140,6 +140,21 @@ def negative_recall_scorer(clf, X, y):
cm = confusion_matrix(y, y_pred)
TN_prop = cm[0,0]/(cm[0,1]+cm[0,0])
return TN_prop
# Custom scorers for AUROC and AUPRC
def AUROC_scorer(clf, X, y):
if hasattr(clf, "decision_function"):
y_score = clf.decision_function(X)
else:
y_score = clf.predict_proba(X)[:, 1]
return roc_auc_score(y, y_score)
def AUPRC_scorer(clf, X, y):
if hasattr(clf, "decision_function"):
y_score = clf.decision_function(X)
else:
y_score = clf.predict_proba(X)[:, 1]
return average_precision_score(y, y_score)
# --------------------------------------------------------------------------------------------------------
if __name__ == "__main__":
......@@ -159,8 +174,8 @@ if __name__ == "__main__":
'FN':FN_scorer,
'FP':FP_scorer,
'TP':TP_scorer,
'AUROC': make_scorer(roc_auc_score),
'AUPRC': make_scorer(average_precision_score)
'AUROC': AUROC_scorer,
'AUPRC': AUPRC_scorer
}
method_names = {
0: "ORIG",
......@@ -207,27 +222,12 @@ if __name__ == "__main__":
# Fit the model on the training data
model.fit(X_train_fold, y_train_fold)
# --------------------- SCORINGS ---------------------------
# Predict on the test data
# Check if the model has a decision_function method
if hasattr(model, "decision_function"):
# Use decision_function to get the continuous scores for each test sample
y_score = model.decision_function(X_test_fold)
else:
# If decision_function is not available, use predict_proba to get probabilities
# predict_proba returns an array with probabilities for all classes
# [:, 1] extracts the probability for the positive class (class 1)
y_score = model.predict_proba(X_test_fold)[:, 1]
# Get the predicted class labels for the test data
y_pred = model.predict(X_test_fold)
# Calculate and store the scores for each metric
for metric_name, scorer in scorings.items():
if metric_name in ['AUROC', 'AUPRC']:
score = scorer._score_func(y_test_fold, y_score)
else:
score = scorer._score_func(y_test_fold, y_pred)
score = scorer(model, X_test_fold, y_test_fold)
fold_scores[metric_name].append(score)
# --------------------- END SCORINGS ---------------------------
# --------------------- CURVES ---------------------------
# --------------------- CURVES ---------------------------
# Generate ROC curve for the fold
roc_display = RocCurveDisplay.from_estimator(model, X_test_fold, y_test_fold,
name=f"ROC fold {fold_idx}", alpha=0.6, lw=2,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment