Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
C
covid_analysis
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
COMPARA
covid_analysis
Commits
61193933
Commit
61193933
authored
May 23, 2024
by
Joaquin Torres
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Generating ROC curves for each fold in progress
parent
b497f37d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
47 additions
and
5 deletions
+47
-5
model_selection/cv_metric_gen.py
model_selection/cv_metric_gen.py
+47
-5
No files found.
model_selection/cv_metric_gen.py
View file @
61193933
...
@@ -183,25 +183,67 @@ if __name__ == "__main__":
...
@@ -183,25 +183,67 @@ if __name__ == "__main__":
y_train
=
data_dic
[
'y_train_'
+
method
+
group
]
y_train
=
data_dic
[
'y_train_'
+
method
+
group
]
# Get tuned models for this group and method
# Get tuned models for this group and method
models
=
get_tuned_models
(
group
,
method_names
[
j
])
models
=
get_tuned_models
(
group
,
method_names
[
j
])
# Scores df
# Scores df -> one column per cv split, one row for each model-metric
# One column per cv split, one row for each model-metric
scores_df
=
pd
.
DataFrame
(
columns
=
range
(
1
,
11
),
index
=
[
f
"{model_name}_{metric_name}"
for
model_name
in
models
.
keys
()
for
metric_name
in
scorings
.
keys
()])
scores_df
=
pd
.
DataFrame
(
columns
=
range
(
1
,
11
),
index
=
[
f
"{model_name}_{metric_name}"
for
model_name
in
models
.
keys
()
for
metric_name
in
scorings
.
keys
()])
# Create a figure for all models in this group-method
fig
,
axes
=
plt
.
subplots
(
len
(
models
),
2
,
figsize
=
(
10
,
8
*
len
(
models
)))
if
len
(
models
)
==
1
:
# Adjustment if there's only one model (axes indexing issue)
axes
=
[
axes
]
# Metric generation for each model
# Metric generation for each model
for
model_
name
,
model
in
models
.
items
(
):
for
model_
idx
,
(
model_name
,
model
)
in
enumerate
(
models
.
items
()
):
print
(
f
"{group}-{method_names[j]}-{model_name}"
)
print
(
f
"{group}-{method_names[j]}-{model_name}"
)
# Retrieve cv scores for our metrics of interest
# Retrieve cv scores for our metrics of interest
scores
=
cross_validate
(
model
,
X_train
,
y_train
,
scoring
=
scorings
,
cv
=
cv
,
return_train_score
=
True
,
n_jobs
=
10
)
scores
=
cross_validate
(
model
,
X_train
,
y_train
,
scoring
=
scorings
,
cv
=
cv
,
return_train_score
=
True
,
n_jobs
=
10
)
# Save results of each fold
# Save results of each fold
for
metric_name
in
scorings
.
keys
():
for
metric_name
in
scorings
.
keys
():
scores_df
.
loc
[
model_name
+
f
'_{metric_name}'
]
=
list
(
np
.
around
(
np
.
array
(
scores
[
f
"test_{metric_name}"
]),
4
))
scores_df
.
loc
[
model_name
+
f
'_{metric_name}'
]
=
list
(
np
.
around
(
np
.
array
(
scores
[
f
"test_{metric_name}"
]),
4
))
# Generate ROC curves
mean_fpr
=
np
.
linspace
(
0
,
1
,
100
)
tprs
,
aucs
=
[],
[]
for
fold_idx
,
(
train
,
test
)
in
enumerate
(
cv
.
split
(
X_train
,
y_train
)):
model
.
fit
(
X_train
[
train
],
y_train
[
train
])
viz
=
RocCurveDisplay
.
from_estimator
(
model
,
X_train
[
test
],
y_train
[
test
],
name
=
f
"ROC fold {fold_idx}"
,
alpha
=
0.3
,
lw
=
1
,
ax
=
axes
[
model_idx
]
)
interp_tpr
=
np
.
interp
(
mean_fpr
,
viz
.
fpr
,
viz
.
tpr
)
interp_tpr
[
0
]
=
0.0
tprs
.
append
(
interp_tpr
)
aucs
.
append
(
viz
.
roc_auc
)
axes
[
model_idx
]
.
plot
([
0
,
1
],
[
0
,
1
],
linestyle
=
'--'
,
lw
=
2
,
color
=
'r'
,
alpha
=
.8
)
mean_tpr
=
np
.
mean
(
tprs
,
axis
=
0
)
mean_tpr
[
-
1
]
=
1.0
mean_auc
=
auc
(
mean_fpr
,
mean_tpr
)
std_auc
=
np
.
std
(
aucs
)
axes
[
model_idx
]
.
plot
(
mean_fpr
,
mean_tpr
,
color
=
'b'
,
label
=
r'Mean ROC (AUC =
%0.2
f $\pm$
%0.2
f)'
%
(
mean_auc
,
std_auc
),
lw
=
2
,
alpha
=
.8
)
std_tpr
=
np
.
std
(
tprs
,
axis
=
0
)
tprs_upper
=
np
.
minimum
(
mean_tpr
+
std_tpr
,
1
)
tprs_lower
=
np
.
maximum
(
mean_tpr
-
std_tpr
,
0
)
axes
[
model_idx
]
.
fill_between
(
mean_fpr
,
tprs_lower
,
tprs_upper
,
color
=
'grey'
,
alpha
=
.2
,
label
=
r'$\pm$ 1 std. dev.'
)
axes
[
model_idx
]
.
set
(
xlim
=
[
-
0.05
,
1.05
],
ylim
=
[
-
0.05
,
1.05
],
title
=
f
"ROC Curve - {model_name} ({group}-{method_names[j]})"
)
axes
[
model_idx
]
.
legend
(
loc
=
"lower right"
)
# Store the DataFrame in the dictionary with a unique key for each sheet
sheet_name
=
f
"{group}_{method_names[j]}"
scores_sheets
[
sheet_name
]
=
scores_df
# Saving curves plots
# Adjust layout and save/show figure
plt
.
tight_layout
()
plt
.
savefig
(
f
'./output_cv_metrics/curves/{group}_{method_names[j]}.svg'
,
format
=
'svg'
,
dpi
=
500
)
plt
.
close
(
fig
)
# Store the DataFrame in the dictionary with a unique key for each sheet
# Store the DataFrame in the dictionary with a unique key for each sheet
sheet_name
=
f
"{group}_{method_names[j]}"
sheet_name
=
f
"{group}_{method_names[j]}"
scores_sheets
[
sheet_name
]
=
scores_df
scores_sheets
[
sheet_name
]
=
scores_df
# Write results to Excel file
# Write results to Excel file
with
pd
.
ExcelWriter
(
'./output_cv_metrics.xlsx'
)
as
writer
:
with
pd
.
ExcelWriter
(
'./output_cv_metrics
/metrics
.xlsx'
)
as
writer
:
for
sheet_name
,
data
in
scores_sheets
.
items
():
for
sheet_name
,
data
in
scores_sheets
.
items
():
data
.
to_excel
(
writer
,
sheet_name
=
sheet_name
)
data
.
to_excel
(
writer
,
sheet_name
=
sheet_name
)
print
(
"Successful metric generation for tuned models"
)
print
(
"Successful
cv
metric generation for tuned models"
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment