Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
C
covid_analysis
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
COMPARA
covid_analysis
Commits
72e7890d
Commit
72e7890d
authored
May 23, 2024
by
Joaquin Torres
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Preparing for ROC curve generation
parent
61193933
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
20 additions
and
11 deletions
+20
-11
model_selection/cv_metric_gen.py
model_selection/cv_metric_gen.py
+20
-11
No files found.
model_selection/cv_metric_gen.py
View file @
72e7890d
...
...
@@ -16,7 +16,7 @@ from sklearn.svm import SVC
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.tree
import
DecisionTreeClassifier
from
sklearn.model_selection
import
StratifiedKFold
,
cross_validate
from
sklearn.metrics
import
RocCurveDisplay
,
roc_curve
from
sklearn.metrics
import
RocCurveDisplay
,
roc_curve
,
auc
from
sklearn.metrics
import
PrecisionRecallDisplay
,
precision_recall_curve
import
matplotlib.pyplot
as
plt
import
ast
# String to dictionary
...
...
@@ -186,7 +186,7 @@ if __name__ == "__main__":
# Scores df -> one column per cv split, one row for each model-metric
scores_df
=
pd
.
DataFrame
(
columns
=
range
(
1
,
11
),
index
=
[
f
"{model_name}_{metric_name}"
for
model_name
in
models
.
keys
()
for
metric_name
in
scorings
.
keys
()])
# Create a figure for all models in this group-method
fig
,
axes
=
plt
.
subplots
(
len
(
models
),
2
,
figsize
=
(
10
,
8
*
len
(
models
)))
fig
,
axes
=
plt
.
subplots
(
len
(
models
),
1
,
figsize
=
(
10
,
8
*
len
(
models
)))
if
len
(
models
)
==
1
:
# Adjustment if there's only one model (axes indexing issue)
axes
=
[
axes
]
# Metric generation for each model
...
...
@@ -200,31 +200,40 @@ if __name__ == "__main__":
# Generate ROC curves
mean_fpr
=
np
.
linspace
(
0
,
1
,
100
)
tprs
,
aucs
=
[],
[]
# Loop through each fold in the cross-validation
for
fold_idx
,
(
train
,
test
)
in
enumerate
(
cv
.
split
(
X_train
,
y_train
)):
# Fit the model on the training data
model
.
fit
(
X_train
[
train
],
y_train
[
train
])
viz
=
RocCurveDisplay
.
from_estimator
(
model
,
X_train
[
test
],
y_train
[
test
],
name
=
f
"ROC fold {fold_idx}"
,
alpha
=
0.3
,
lw
=
1
,
ax
=
axes
[
model_idx
]
)
interp_tpr
=
np
.
interp
(
mean_fpr
,
viz
.
fpr
,
viz
.
tpr
)
# Use RocCurveDisplay to generate the ROC curve
roc_display
=
RocCurveDisplay
.
from_estimator
(
model
,
X_train
[
test
],
y_train
[
test
],
name
=
f
"ROC fold {fold_idx}"
,
alpha
=
0.3
,
lw
=
1
,
ax
=
axes
[
model_idx
])
# Interpolate the true positive rates to get a smooth curve
interp_tpr
=
np
.
interp
(
mean_fpr
,
roc_display
.
fpr
,
roc_display
.
tpr
)
interp_tpr
[
0
]
=
0.0
# Append the interpolated TPR and AUC for this fold
tprs
.
append
(
interp_tpr
)
aucs
.
append
(
viz
.
roc_auc
)
aucs
.
append
(
roc_display
.
roc_auc
)
# Plot the diagonal line representing random guessing
axes
[
model_idx
]
.
plot
([
0
,
1
],
[
0
,
1
],
linestyle
=
'--'
,
lw
=
2
,
color
=
'r'
,
alpha
=
.8
)
# Compute the mean and standard deviation of the TPRs
mean_tpr
=
np
.
mean
(
tprs
,
axis
=
0
)
mean_tpr
[
-
1
]
=
1.0
mean_auc
=
auc
(
mean_fpr
,
mean_tpr
)
mean_auc
=
auc
(
mean_fpr
,
mean_tpr
)
# Calculate the mean AUC
std_auc
=
np
.
std
(
aucs
)
# Plot the mean ROC curve
axes
[
model_idx
]
.
plot
(
mean_fpr
,
mean_tpr
,
color
=
'b'
,
label
=
r'Mean ROC (AUC =
%0.2
f $\pm$
%0.2
f)'
%
(
mean_auc
,
std_auc
),
lw
=
2
,
alpha
=
.8
)
# Plot the standard deviation of the TPRs
std_tpr
=
np
.
std
(
tprs
,
axis
=
0
)
tprs_upper
=
np
.
minimum
(
mean_tpr
+
std_tpr
,
1
)
tprs_lower
=
np
.
maximum
(
mean_tpr
-
std_tpr
,
0
)
axes
[
model_idx
]
.
fill_between
(
mean_fpr
,
tprs_lower
,
tprs_upper
,
color
=
'grey'
,
alpha
=
.2
,
label
=
r'$\pm$ 1 std. dev.'
)
# Set plot limits and title
axes
[
model_idx
]
.
set
(
xlim
=
[
-
0.05
,
1.05
],
ylim
=
[
-
0.05
,
1.05
],
title
=
f
"ROC Curve - {model_name} ({group}-{method_names[j]})"
)
axes
[
model_idx
]
.
legend
(
loc
=
"lower right"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment