Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
C
covid_analysis
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
COMPARA
covid_analysis
Commits
51eae1a7
Commit
51eae1a7
authored
May 08, 2024
by
Joaquin Torres
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixing small details, getting ready to run script
parent
604fc0eb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
36 additions
and
34 deletions
+36
-34
training_models/hyperparam_tuning.py
training_models/hyperparam_tuning.py
+36
-34
No files found.
training_models/hyperparam_tuning.py
View file @
51eae1a7
...
@@ -30,28 +30,28 @@ def read_data():
...
@@ -30,28 +30,28 @@ def read_data():
import
numpy
as
np
import
numpy
as
np
# Load test data
# Load test data
X_test_pre
=
np
.
load
(
'./gen_train_data/data/output/pre/X_test_pre.npy'
,
allow_pickle
=
True
)
X_test_pre
=
np
.
load
(
'.
.
/gen_train_data/data/output/pre/X_test_pre.npy'
,
allow_pickle
=
True
)
y_test_pre
=
np
.
load
(
'./gen_train_data/data/output/pre/y_test_pre.npy'
,
allow_pickle
=
True
)
y_test_pre
=
np
.
load
(
'.
.
/gen_train_data/data/output/pre/y_test_pre.npy'
,
allow_pickle
=
True
)
X_test_post
=
np
.
load
(
'./gen_train_data/data/output/post/X_test_post.npy'
,
allow_pickle
=
True
)
X_test_post
=
np
.
load
(
'.
.
/gen_train_data/data/output/post/X_test_post.npy'
,
allow_pickle
=
True
)
y_test_post
=
np
.
load
(
'./gen_train_data/data/output/post/y_test_post.npy'
,
allow_pickle
=
True
)
y_test_post
=
np
.
load
(
'.
.
/gen_train_data/data/output/post/y_test_post.npy'
,
allow_pickle
=
True
)
# Load ORIGINAL training data
# Load ORIGINAL training data
X_train_pre
=
np
.
load
(
'./gen_train_data/data/output/pre/X_train_pre.npy'
,
allow_pickle
=
True
)
X_train_pre
=
np
.
load
(
'.
.
/gen_train_data/data/output/pre/X_train_pre.npy'
,
allow_pickle
=
True
)
y_train_pre
=
np
.
load
(
'./gen_train_data/data/output/pre/y_train_pre.npy'
,
allow_pickle
=
True
)
y_train_pre
=
np
.
load
(
'.
.
/gen_train_data/data/output/pre/y_train_pre.npy'
,
allow_pickle
=
True
)
X_train_post
=
np
.
load
(
'./gen_train_data/data/output/post/X_train_post.npy'
,
allow_pickle
=
True
)
X_train_post
=
np
.
load
(
'.
.
/gen_train_data/data/output/post/X_train_post.npy'
,
allow_pickle
=
True
)
y_train_post
=
np
.
load
(
'./gen_train_data/data/output/post/y_train_post.npy'
,
allow_pickle
=
True
)
y_train_post
=
np
.
load
(
'.
.
/gen_train_data/data/output/post/y_train_post.npy'
,
allow_pickle
=
True
)
# Load oversampled training data
# Load oversampled training data
X_train_over_pre
=
np
.
load
(
'./gen_train_data/data/output/pre/X_train_over_pre.npy'
,
allow_pickle
=
True
)
X_train_over_pre
=
np
.
load
(
'.
.
/gen_train_data/data/output/pre/X_train_over_pre.npy'
,
allow_pickle
=
True
)
y_train_over_pre
=
np
.
load
(
'./gen_train_data/data/output/pre/y_train_over_pre.npy'
,
allow_pickle
=
True
)
y_train_over_pre
=
np
.
load
(
'.
.
/gen_train_data/data/output/pre/y_train_over_pre.npy'
,
allow_pickle
=
True
)
X_train_over_post
=
np
.
load
(
'./gen_train_data/data/output/post/X_train_over_post.npy'
,
allow_pickle
=
True
)
X_train_over_post
=
np
.
load
(
'.
.
/gen_train_data/data/output/post/X_train_over_post.npy'
,
allow_pickle
=
True
)
y_train_over_post
=
np
.
load
(
'./gen_train_data/data/output/post/y_train_over_post.npy'
,
allow_pickle
=
True
)
y_train_over_post
=
np
.
load
(
'.
.
/gen_train_data/data/output/post/y_train_over_post.npy'
,
allow_pickle
=
True
)
# Load undersampled training data
# Load undersampled training data
X_train_under_pre
=
np
.
load
(
'./gen_train_data/data/output/pre/X_train_under_pre.npy'
,
allow_pickle
=
True
)
X_train_under_pre
=
np
.
load
(
'.
.
/gen_train_data/data/output/pre/X_train_under_pre.npy'
,
allow_pickle
=
True
)
y_train_under_pre
=
np
.
load
(
'./gen_train_data/data/output/pre/y_train_under_pre.npy'
,
allow_pickle
=
True
)
y_train_under_pre
=
np
.
load
(
'.
.
/gen_train_data/data/output/pre/y_train_under_pre.npy'
,
allow_pickle
=
True
)
X_train_under_post
=
np
.
load
(
'./gen_train_data/data/output/post/X_train_under_post.npy'
,
allow_pickle
=
True
)
X_train_under_post
=
np
.
load
(
'.
.
/gen_train_data/data/output/post/X_train_under_post.npy'
,
allow_pickle
=
True
)
y_train_under_post
=
np
.
load
(
'./gen_train_data/data/output/post/y_train_under_post.npy'
,
allow_pickle
=
True
)
y_train_under_post
=
np
.
load
(
'.
.
/gen_train_data/data/output/post/y_train_under_post.npy'
,
allow_pickle
=
True
)
data_dic
=
{
data_dic
=
{
"X_test_pre"
:
X_test_pre
,
"X_test_pre"
:
X_test_pre
,
...
@@ -84,26 +84,26 @@ if __name__ == "__main__":
...
@@ -84,26 +84,26 @@ if __name__ == "__main__":
# --------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------
# 1. No class weight
# 1. No class weight
models_1
=
{
"DT"
:
DecisionTreeClassifier
(),
models_1
=
{
"DT"
:
DecisionTreeClassifier
(),
# "RF" : RandomForestClassifier(n_estimators=50
),
"RF"
:
RandomForestClassifier
(
),
#
"Bagging" : BaggingClassifier(),
"Bagging"
:
BaggingClassifier
(),
#
"AB" : AdaBoostClassifier(),
"AB"
:
AdaBoostClassifier
(),
#
"XGB": XGBClassifier(),
"XGB"
:
XGBClassifier
(),
#
"LR" : LogisticRegression(max_iter=1000),
"LR"
:
LogisticRegression
(
max_iter
=
1000
),
#
"ElNet" : LogisticRegression(max_iter=1000, penalty='elasticnet'),
"ElNet"
:
LogisticRegression
(
max_iter
=
1000
,
penalty
=
'elasticnet'
),
#
"SVM" : SVC(probability=True),
"SVM"
:
SVC
(
probability
=
True
),
# "MLP" : MLPClassifier(max_iter=500),
"MLP"
:
MLPClassifier
(
max_iter
=
500
)
}
}
# 2. Class weight
# 2. Class weight
: cost-sensitive learning
models_2
=
{
"DT"
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
),
models_2
=
{
"DT"
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
),
# "RF" : RandomForestClassifier(n_estimators=50,
class_weight='balanced'),
"RF"
:
RandomForestClassifier
(
class_weight
=
'balanced'
),
# "Bagging" : BaggingClassifier(), # <-
"Bagging"
:
BaggingClassifier
(
estimator
=
DecisionTreeClassifier
(
class_weight
=
'balanced'
)),
# "AB" : AdaBoostClassifier(), # <-
"AB"
:
AdaBoostClassifier
(
estimator
=
DecisionTreeClassifier
(
class_weight
=
'balanced'
)),
# "XGB": XGBClassifier(), # <-
# "XGB": XGBClassifier(), # <-
#
"LR" : LogisticRegression(max_iter=1000, class_weight='balanced'),
"LR"
:
LogisticRegression
(
max_iter
=
1000
,
class_weight
=
'balanced'
),
#
"ElNet" : LogisticRegression(max_iter=1000, penalty='elasticnet', class_weight='balanced'),
"ElNet"
:
LogisticRegression
(
max_iter
=
1000
,
penalty
=
'elasticnet'
,
class_weight
=
'balanced'
),
#
"SVM" : SVC(probability=True, class_weight='balanced'),
"SVM"
:
SVC
(
probability
=
True
,
class_weight
=
'balanced'
),
# "MLP" : MLPClassifier(max_iter=500)
, # <-
# "MLP" : MLPClassifier(max_iter=500)
}
}
# Hyperparameter tuning setup
# Hyperparameter tuning setup
...
@@ -152,8 +152,9 @@ if __name__ == "__main__":
...
@@ -152,8 +152,9 @@ if __name__ == "__main__":
# Store each df as a sheet in an excel file
# Store each df as a sheet in an excel file
sheets_dict
=
{}
sheets_dict
=
{}
for
i
,
group
in
enumerate
([
'pre'
,
'post'
]):
for
i
,
group
in
enumerate
([
'pre'
,
'post'
]):
print
(
group
,
end
=
' '
)
for
j
,
method
in
enumerate
([
''
,
''
,
'over_'
,
'under_'
]):
for
j
,
method
in
enumerate
([
''
,
''
,
'over_'
,
'under_'
]):
print
(
f
"ITERATION {i+j}"
)
print
(
method
,
end
=
' '
)
# Get dataset based on group and method
# Get dataset based on group and method
X
=
data_dic
[
'X_train_'
+
method
+
group
]
X
=
data_dic
[
'X_train_'
+
method
+
group
]
y
=
data_dic
[
'y_train_'
+
method
+
group
]
y
=
data_dic
[
'y_train_'
+
method
+
group
]
...
@@ -162,6 +163,7 @@ if __name__ == "__main__":
...
@@ -162,6 +163,7 @@ if __name__ == "__main__":
# Save results: params and best score for each of the mdodels of this method and group
# Save results: params and best score for each of the mdodels of this method and group
hyperparam_df
=
pd
.
DataFrame
(
index
=
list
(
models
.
keys
()),
columns
=
[
'Parameters'
,
'Score'
])
hyperparam_df
=
pd
.
DataFrame
(
index
=
list
(
models
.
keys
()),
columns
=
[
'Parameters'
,
'Score'
])
for
model_name
,
model
in
models
.
items
():
for
model_name
,
model
in
models
.
items
():
print
(
model_name
+
"
\n\n
"
)
# Find optimal hyperparams for curr model
# Find optimal hyperparams for curr model
params
=
hyperparameters
[
model_name
]
params
=
hyperparameters
[
model_name
]
search
=
RandomizedSearchCV
(
model
,
param_distributions
=
params
,
cv
=
cv
,
n_jobs
=
1
,
scoring
=
'precision'
)
search
=
RandomizedSearchCV
(
model
,
param_distributions
=
params
,
cv
=
cv
,
n_jobs
=
1
,
scoring
=
'precision'
)
...
@@ -174,7 +176,7 @@ if __name__ == "__main__":
...
@@ -174,7 +176,7 @@ if __name__ == "__main__":
sheets_dict
[
sheet_name
]
=
hyperparam_df
sheets_dict
[
sheet_name
]
=
hyperparam_df
# Write results to Excel file
# Write results to Excel file
with
pd
.
ExcelWriter
(
'./
training_models/
output/hyperparam.xlsx'
)
as
writer
:
with
pd
.
ExcelWriter
(
'./output/hyperparam.xlsx'
)
as
writer
:
for
sheet_name
,
data
in
sheets_dict
.
items
():
for
sheet_name
,
data
in
sheets_dict
.
items
():
data
.
to_excel
(
writer
,
sheet_name
=
sheet_name
)
data
.
to_excel
(
writer
,
sheet_name
=
sheet_name
)
# --------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment