Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
C
covid_analysis
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
COMPARA
covid_analysis
Commits
44116618
Commit
44116618
authored
May 22, 2024
by
Joaquin Torres
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Testing metric generation
parent
97658355
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
9 deletions
+10
-9
model_selection/cv_metric_gen.py
model_selection/cv_metric_gen.py
+10
-9
model_selection/output_cv_metrics.xlsx
model_selection/output_cv_metrics.xlsx
+0
-0
No files found.
model_selection/cv_metric_gen.py
View file @
44116618
...
@@ -81,9 +81,9 @@ def get_tuned_models(group_str, method_str):
...
@@ -81,9 +81,9 @@ def get_tuned_models(group_str, method_str):
tuned_models
=
{}
tuned_models
=
{}
# Iterate through each row of the DataFrame
# Iterate through each row of the DataFrame
for
_
,
row
in
tuned_models_df
.
iterrows
():
for
_
,
row
in
tuned_models_df
.
iterrows
():
model_name
=
row
[
0
]
model_name
=
row
.
iloc
[
0
]
# Read dictionary
# Read dictionary
parameters
=
ast
.
literal_eval
(
row
[
'Parameters'
])
parameters
=
ast
.
literal_eval
(
row
[
'
Best
Parameters'
])
# Add extra parameters
# Add extra parameters
if
model_name
==
'AB'
:
if
model_name
==
'AB'
:
parameters
[
'algorithm'
]
=
'SAMME'
parameters
[
'algorithm'
]
=
'SAMME'
...
@@ -177,7 +177,7 @@ if __name__ == "__main__":
...
@@ -177,7 +177,7 @@ if __name__ == "__main__":
scores_sheets
=
{}
# To store score dfs as sheets in the same excel file
scores_sheets
=
{}
# To store score dfs as sheets in the same excel file
for
i
,
group
in
enumerate
([
'pre'
,
'post'
]):
for
i
,
group
in
enumerate
([
'pre'
,
'post'
]):
for
j
,
method
in
enumerate
([
''
,
''
,
'over_'
,
'under_'
]):
for
j
,
method
in
enumerate
([
''
,
''
,
'over_'
,
'under_'
]):
print
(
f
"{group}-{method_names[j]}"
)
#
print(f"{group}-{method_names[j]}")
# Get train dataset based on group and method
# Get train dataset based on group and method
X_train
=
data_dic
[
'X_train_'
+
method
+
group
]
X_train
=
data_dic
[
'X_train_'
+
method
+
group
]
y_train
=
data_dic
[
'y_train_'
+
method
+
group
]
y_train
=
data_dic
[
'y_train_'
+
method
+
group
]
...
@@ -188,12 +188,13 @@ if __name__ == "__main__":
...
@@ -188,12 +188,13 @@ if __name__ == "__main__":
scores_df
=
pd
.
DataFrame
(
columns
=
range
(
1
,
11
),
index
=
[
f
"{model_name}_{metric_name}"
for
model_name
in
models
.
keys
()
for
metric_name
in
scorings
.
keys
()])
scores_df
=
pd
.
DataFrame
(
columns
=
range
(
1
,
11
),
index
=
[
f
"{model_name}_{metric_name}"
for
model_name
in
models
.
keys
()
for
metric_name
in
scorings
.
keys
()])
# Metric generation for each model
# Metric generation for each model
for
model_name
,
model
in
models
.
items
():
for
model_name
,
model
in
models
.
items
():
# Retrieve cv scores for our metrics of interest
if
model_name
==
'DT'
:
scores
=
cross_validate
(
model
,
X_train
,
y_train
,
scoring
=
scorings
,
cv
=
cv
,
return_train_score
=
True
,
n_jobs
=
10
)
print
(
f
"{group}-{method_names[j]}-{model_name}"
)
# Save results of each fold
# Retrieve cv scores for our metrics of interest
scores_df
.
loc
[
model_name
+
'_F1'
]
=
list
(
np
.
around
(
np
.
array
(
scores
[
"test_f1"
]),
4
))
scores
=
cross_validate
(
model
,
X_train
,
y_train
,
scoring
=
scorings
,
cv
=
cv
,
return_train_score
=
True
,
n_jobs
=
10
)
scores_df
.
loc
[
model_name
+
'_PREC'
]
=
list
(
np
.
around
(
np
.
array
(
scores
[
"test_precision"
]),
4
))
# Save results of each fold
scores_df
.
loc
[
model_name
+
'_REC'
]
=
list
(
np
.
around
(
np
.
array
(
scores
[
"test_recall"
]),
4
))
for
metric_name
in
scorings
.
keys
():
scores_df
.
loc
[
model_name
+
f
'_{metric_name}'
]
=
list
(
np
.
around
(
np
.
array
(
scores
[
f
"test_{metric_name}"
]),
4
))
# Store the DataFrame in the dictionary with a unique key for each sheet
# Store the DataFrame in the dictionary with a unique key for each sheet
sheet_name
=
f
"{group}_{method_names[j]}"
sheet_name
=
f
"{group}_{method_names[j]}"
scores_sheets
[
sheet_name
]
=
scores_df
scores_sheets
[
sheet_name
]
=
scores_df
...
...
model_selection/output_cv_metrics.xlsx
0 → 100644
View file @
44116618
File added
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment