Commit 9aee207f authored by Joaquin Torres's avatar Joaquin Torres

Tested tuned models and metrics-plots generated

parent c703ac3d
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -89,10 +89,10 @@ def get_tuned_models(group_str, method_str): ...@@ -89,10 +89,10 @@ def get_tuned_models(group_str, method_str):
} }
tuned_models = {} tuned_models = {}
# Iterate through each row of the DataFrame # Iterate through each row of the DataFrame
for index, row in tuned_models_df.iterrows(): for _, row in tuned_models_df.iterrows():
model_name = row[0] model_name = row.iloc[0]
# Read dictionary # Read dictionary
parameters = ast.literal_eval(row['Parameters']) parameters = ast.literal_eval(row['Best Parameters'])
# Add extra parameters # Add extra parameters
if model_name == 'AB': if model_name == 'AB':
parameters['algorithm'] = 'SAMME' parameters['algorithm'] = 'SAMME'
...@@ -202,7 +202,6 @@ if __name__ == "__main__": ...@@ -202,7 +202,6 @@ if __name__ == "__main__":
X_test = data_dic['X_test_' + group] X_test = data_dic['X_test_' + group]
y_test = data_dic['y_test_' + group] y_test = data_dic['y_test_' + group]
for j, method in enumerate(['', '', 'over_', 'under_']): for j, method in enumerate(['', '', 'over_', 'under_']):
print(f"{group}-{method_names[j]}")
# Get train dataset based on group and method # Get train dataset based on group and method
X_train = data_dic['X_train_' + method + group] X_train = data_dic['X_train_' + method + group]
y_train = data_dic['y_train_' + method + group] y_train = data_dic['y_train_' + method + group]
...@@ -214,44 +213,66 @@ if __name__ == "__main__": ...@@ -214,44 +213,66 @@ if __name__ == "__main__":
fig, axes = plt.subplots(len(models), 3, figsize=(10, 8 * len(models))) fig, axes = plt.subplots(len(models), 3, figsize=(10, 8 * len(models)))
if len(models) == 1: # Adjustment if there's only one model (axes indexing issue) if len(models) == 1: # Adjustment if there's only one model (axes indexing issue)
axes = [axes] axes = [axes]
# Evaluate each model # Evaluate each model with test dataset
for model_idx, (model_name, model) in enumerate(models.items()): for model_idx, (model_name, model) in enumerate(models.items()):
# ----------- TEMPORAL ------------- print(f"{group}-{method_names[j]}-{model_name}")
# Train the model (it was just initialized above) # Fit the model on the training data
model.fit(X_train, y_train) model.fit(X_train, y_train)
# --------------------- SCORINGS ---------------------------
# Calculate and store the scores for each metric
for metric_name, scorer in scorings.items():
score = scorer(model, X_test, y_test)
scores_df.at[model_name, metric_name] = round(score, 4)
# -----------------------------------------------------------
# --------------------- PLOTS ---------------------------
# Check if the model has a decision_function method
if hasattr(model, "decision_function"): if hasattr(model, "decision_function"):
# Use the decision function to get scores
y_score = model.decision_function(X_test) y_score = model.decision_function(X_test)
else: else:
y_score = model.predict_proba(X_test)[:, 1] # Use probability of positive class # Otherwise, use the probability estimates and take the probability of the positive class
y_score = model.predict_proba(X_test)[:, 1]
# Calculate ROC curve and ROC area for each class # Calculate ROC curve and ROC area for each class
fpr, tpr, _ = roc_curve(y_test, y_score, pos_label=model.classes_[1]) fpr, tpr, _ = roc_curve(y_test, y_score, pos_label=model.classes_[1])
roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot(ax=axes[model_idx][0]) # Plot the ROC curve with thicker line
roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr)
roc_display.plot(ax=axes[model_idx][0], lw=2)
# Plot the diagonal line for the ROC curve
axes[model_idx][0].plot([0, 1], [0, 1], 'k--', lw=2, label='Random Classifier')
axes[model_idx][0].set_title(f'ROC Curve for {group}-{method}-{model_name}')
axes[model_idx][0].set_xlabel('False Positive Rate')
axes[model_idx][0].set_ylabel('True Positive Rate')
axes[model_idx][0].legend(loc='lower right')
# Calculate precision-recall curve # Calculate precision-recall curve
precision, recall, _ = precision_recall_curve(y_test, y_score, pos_label=model.classes_[1]) precision, recall, _ = precision_recall_curve(y_test, y_score, pos_label=model.classes_[1])
pr_display = PrecisionRecallDisplay(precision=precision, recall=recall).plot(ax=axes[model_idx][1]) # Plot the precision-recall curve with thicker line
# Get confusion matrix plot pr_display = PrecisionRecallDisplay(precision=precision, recall=recall)
pr_display.plot(ax=axes[model_idx][1], lw=2)
# Plot the baseline for the PR curve
no_skill = len(y_test[y_test == 1]) / len(y_test)
axes[model_idx][1].plot([0, 1], [no_skill, no_skill], 'k--', lw=2, label='No Skill')
axes[model_idx][1].set_title(f'PR Curve for {group}-{method}-{model_name}')
axes[model_idx][1].set_xlabel('Recall')
axes[model_idx][1].set_ylabel('Precision')
axes[model_idx][1].legend(loc='lower left')
# Predict the test data to get confusion matrix
y_pred = model.predict(X_test) y_pred = model.predict(X_test)
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred) cm = confusion_matrix(y_test, y_pred)
# Plot the confusion matrix
ConfusionMatrixDisplay(cm).plot(ax=axes[model_idx][2]) ConfusionMatrixDisplay(cm).plot(ax=axes[model_idx][2])
# Give name to plots axes[model_idx][2].set_title(f'CM for {group}-{method}-{model_name}')
axes[model_idx][0].set_title(f'ROC Curve for {model_name}') # ----------------------------------------------------------
axes[model_idx][1].set_title(f'PR Curve for {model_name}')
axes[model_idx][2].set_title(f'CM for {model_name}')
# Evaluate at each of the scores of interest
for score_name, scorer in scorings.items():
score_value = scorer(model, X_test, y_test)
scores_df.at[model_name, score_name] = score_value
# Adjust layout and save/show figure # Adjust layout and save/show figure
plt.tight_layout() plt.tight_layout()
plt.savefig(f'./test_results/aux_plots/{group}_{method_names[j]}.svg', format='svg', dpi=500) plt.savefig(f'./output_test/plots/{group}_{method_names[j]}.svg', format='svg', dpi=500)
plt.close(fig) plt.close(fig)
# Store the DataFrame in the dictionary with a unique key for each sheet # Store the DataFrame in the dictionary with a unique key for each sheet
sheet_name = f"{group}_{method_names[j]}" sheet_name = f"{group}_{method_names[j]}"
scores_sheets[sheet_name] = scores_df scores_sheets[sheet_name] = scores_df
# Write results to Excel file # Write results to Excel file
with pd.ExcelWriter('./test_results/testing_tuned_models.xlsx') as writer: with pd.ExcelWriter('./output_test/testing_tuned_models.xlsx') as writer:
for sheet_name, data in scores_sheets.items(): for sheet_name, data in scores_sheets.items():
data.to_excel(writer, sheet_name=sheet_name) data.to_excel(writer, sheet_name=sheet_name)
print("Successful evaluation with test dataset")
# -------------------------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------------------------
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment