Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
C
covid_analysis
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
COMPARA
covid_analysis
Commits
7847bcd0
Commit
7847bcd0
authored
May 13, 2024
by
Joaquin Torres
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
testing the script with DT
parent
5ca4f7ae
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
44 additions
and
8 deletions
+44
-8
model_selection/hyperparam_tuning.py
model_selection/hyperparam_tuning.py
+0
-1
model_selection/test_models.py
model_selection/test_models.py
+44
-7
No files found.
model_selection/hyperparam_tuning.py
View file @
7847bcd0
...
@@ -27,7 +27,6 @@ import os
...
@@ -27,7 +27,6 @@ import os
# Function to read training datasets
# Function to read training datasets
# --------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------
def
read_data
():
def
read_data
():
import
numpy
as
np
# Load ORIGINAL training data
# Load ORIGINAL training data
X_train_pre
=
np
.
load
(
'../gen_train_data/data/output/pre/X_train_pre.npy'
,
allow_pickle
=
True
)
X_train_pre
=
np
.
load
(
'../gen_train_data/data/output/pre/X_train_pre.npy'
,
allow_pickle
=
True
)
...
...
model_selection/test_models.py
View file @
7847bcd0
...
@@ -25,11 +25,41 @@ def read_test_data():
...
@@ -25,11 +25,41 @@ def read_test_data():
X_test_post
=
np
.
load
(
'../gen_train_data/data/output/post/X_test_post.npy'
,
allow_pickle
=
True
)
X_test_post
=
np
.
load
(
'../gen_train_data/data/output/post/X_test_post.npy'
,
allow_pickle
=
True
)
y_test_post
=
np
.
load
(
'../gen_train_data/data/output/post/y_test_post.npy'
,
allow_pickle
=
True
)
y_test_post
=
np
.
load
(
'../gen_train_data/data/output/post/y_test_post.npy'
,
allow_pickle
=
True
)
# Load ORIGINAL training data
X_train_pre
=
np
.
load
(
'../gen_train_data/data/output/pre/X_train_pre.npy'
,
allow_pickle
=
True
)
y_train_pre
=
np
.
load
(
'../gen_train_data/data/output/pre/y_train_pre.npy'
,
allow_pickle
=
True
)
X_train_post
=
np
.
load
(
'../gen_train_data/data/output/post/X_train_post.npy'
,
allow_pickle
=
True
)
y_train_post
=
np
.
load
(
'../gen_train_data/data/output/post/y_train_post.npy'
,
allow_pickle
=
True
)
# Load oversampled training data
X_train_over_pre
=
np
.
load
(
'../gen_train_data/data/output/pre/X_train_over_pre.npy'
,
allow_pickle
=
True
)
y_train_over_pre
=
np
.
load
(
'../gen_train_data/data/output/pre/y_train_over_pre.npy'
,
allow_pickle
=
True
)
X_train_over_post
=
np
.
load
(
'../gen_train_data/data/output/post/X_train_over_post.npy'
,
allow_pickle
=
True
)
y_train_over_post
=
np
.
load
(
'../gen_train_data/data/output/post/y_train_over_post.npy'
,
allow_pickle
=
True
)
# Load undersampled training data
X_train_under_pre
=
np
.
load
(
'../gen_train_data/data/output/pre/X_train_under_pre.npy'
,
allow_pickle
=
True
)
y_train_under_pre
=
np
.
load
(
'../gen_train_data/data/output/pre/y_train_under_pre.npy'
,
allow_pickle
=
True
)
X_train_under_post
=
np
.
load
(
'../gen_train_data/data/output/post/X_train_under_post.npy'
,
allow_pickle
=
True
)
y_train_under_post
=
np
.
load
(
'../gen_train_data/data/output/post/y_train_under_post.npy'
,
allow_pickle
=
True
)
data_dic
=
{
data_dic
=
{
"X_test_pre"
:
X_test_pre
,
"X_test_pre"
:
X_test_pre
,
"y_test_pre"
:
y_test_pre
,
"y_test_pre"
:
y_test_pre
,
"X_test_post"
:
X_test_post
,
"X_test_post"
:
X_test_post
,
"y_test_post"
:
y_test_post
,
"y_test_post"
:
y_test_post
,
"X_train_pre"
:
X_train_pre
,
"y_train_pre"
:
y_train_pre
,
"X_train_post"
:
X_train_post
,
"y_train_post"
:
y_train_post
,
"X_train_over_pre"
:
X_train_over_pre
,
"y_train_over_pre"
:
y_train_over_pre
,
"X_train_over_post"
:
X_train_over_post
,
"y_train_over_post"
:
y_train_over_post
,
"X_train_under_pre"
:
X_train_under_pre
,
"y_train_under_pre"
:
y_train_under_pre
,
"X_train_under_post"
:
X_train_under_post
,
"y_train_under_post"
:
y_train_under_post
,
}
}
return
data_dic
return
data_dic
...
@@ -205,24 +235,31 @@ if __name__ == "__main__":
...
@@ -205,24 +235,31 @@ if __name__ == "__main__":
scores_sheets
=
{}
# To store score dfs as sheets in the same excel file
scores_sheets
=
{}
# To store score dfs as sheets in the same excel file
for
i
,
group
in
enumerate
([
'pre'
,
'post'
]):
for
i
,
group
in
enumerate
([
'pre'
,
'post'
]):
# Get test dataset based on group
# Get test dataset based on group
X
=
data_dic
[
'X_test'
+
group
]
X
_test
=
data_dic
[
'X_test'
+
group
]
y
=
data_dic
[
'y_test'
+
group
]
y
_test
=
data_dic
[
'y_test'
+
group
]
for
j
,
method
in
enumerate
([
''
,
''
,
'over_'
,
'under_'
]):
for
j
,
method
in
enumerate
([
''
,
''
,
'over_'
,
'under_'
]):
# Get train dataset based on group and method
X_train
=
data_dic
[
'X_train_'
+
method
+
group
]
y_train
=
data_dic
[
'y_train_'
+
method
+
group
]
# Get tuned models for this group and method
# Get tuned models for this group and method
models
=
get_tuned_models
(
group_id
=
i
,
method_id
=
j
)
models
=
get_tuned_models
(
group_id
=
i
,
method_id
=
j
)
# Scores df
# Scores df
scores_df
=
pd
.
DataFrame
(
index
=
models
.
keys
(),
columns
=
scorings
.
keys
())
scores_df
=
pd
.
DataFrame
(
index
=
models
.
keys
(),
columns
=
scorings
.
keys
())
# Evaluate each model
# Evaluate each model
for
model_name
,
model
in
models
.
items
():
for
model_name
,
model
in
models
.
items
():
# At each of the scores of interest
# ----------- TEMPORAL -------------
for
score_name
,
scorer
in
scorings
.
items
():
if
model_name
==
"DT"
:
score_value
=
scorer
(
model
,
X
,
y
)
# Train the model (it was just initialized above)
scores_df
.
at
[
model_name
,
score_name
]
=
score_value
model
.
fit
(
X_train
,
y_train
)
# Evaluate at each of the scores of interest
for
score_name
,
scorer
in
scorings
.
items
():
score_value
=
scorer
(
model
,
X_test
,
y_test
)
scores_df
.
at
[
model_name
,
score_name
]
=
score_value
# Store the DataFrame in the dictionary with a unique key for each sheet
# Store the DataFrame in the dictionary with a unique key for each sheet
sheet_name
=
f
"{group}_{method_names[j]}"
sheet_name
=
f
"{group}_{method_names[j]}"
scores_sheets
[
sheet_name
]
=
scores_df
scores_sheets
[
sheet_name
]
=
scores_df
# Write results to Excel file
# Write results to Excel file
with
pd
.
ExcelWriter
(
'./
training_models/output
/testing_tuned_models.xlsx'
)
as
writer
:
with
pd
.
ExcelWriter
(
'./
model_selection/test_results
/testing_tuned_models.xlsx'
)
as
writer
:
for
sheet_name
,
data
in
scores_sheets
.
items
():
for
sheet_name
,
data
in
scores_sheets
.
items
():
data
.
to_excel
(
writer
,
sheet_name
=
sheet_name
)
data
.
to_excel
(
writer
,
sheet_name
=
sheet_name
)
# --------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment