import numpy as np import pandas as pd from sklearn.metrics import (f1_score, precision_score, roc_auc_score, recall_score, confusion_matrix, classification_report, precision_recall_curve) def recall_precision_at_k(y_true, y_proba, step=50, name_model=""): """ Calcula recall y precisión para los top-k pares positivos. """ idx = np.argsort(y_proba)[::-1] y_true = np.asarray(y_true)[idx] out = [] for k in range(1, len(y_true)+1, step): y_pred = np.zeros_like(y_true) y_pred[:k] = 1 out.append(dict(K=k, recall=recall_score(y_true, y_pred, zero_division=0), precision=precision_score(y_true, y_pred, zero_division=0), modelo=name_model)) return pd.DataFrame(out) def summary_metrics(y_test, y_pred, y_proba): """ Devuelve resumen de métricas (F1 por clase, precisión, recall, AUC, matriz de confusión). """ top_100_idx = np.argsort(y_proba)[::-1][:100] top_100_true = np.array(y_test)[top_100_idx].sum() recall_at_100 = top_100_true / np.array(y_test).sum() f1_class1 = f1_score(y_test, y_pred, pos_label=1, zero_division=0) # solo clase 1 f1_class0 = f1_score(y_test, y_pred, pos_label=0, zero_division=0) # solo clase 0 precision = precision_score(y_test, y_pred, zero_division=0) recall = recall_score(y_test, y_pred, zero_division=0) auc = roc_auc_score(y_test, y_proba) # Matriz de confusión tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel() return dict( f1_class1=f1_class1, f1_class0=f1_class0, precision=precision, recall=recall, recall_at_100=recall_at_100, auc_roc=auc, TP=tp, TN=tn, FP=fp, FN=fn ) def conf_mat(y_test, y_pred): """ Devuelve la matriz de confusión (TN, FP, FN, TP). """ return confusion_matrix(y_test, y_pred) def clasif_report(y_test, y_pred): """ Devuelve un informe detallado de clasificación: """ return classification_report(y_test, y_pred, zero_division=0) def find_best_threshold(y_true, y_proba, metric=f1_score): """ Calcula el mejor umbral de decisión que maximiza el F1-score. """ prec, rec, thr = precision_recall_curve(y_true, y_proba) f1 = 2 * prec * rec / (prec + rec + 1e-9) best_idx = np.nanargmax(f1) return thr[best_idx], f1[best_idx]