Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
C
covid_analysis
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
COMPARA
covid_analysis
Commits
05aa2417
Commit
05aa2417
authored
May 10, 2024
by
Joaquin Torres
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ready to retune
parent
d3a20982
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
17 deletions
+16
-17
model_selection/hyperparam_tuning.py
model_selection/hyperparam_tuning.py
+8
-9
model_selection/test_models.py
model_selection/test_models.py
+8
-8
No files found.
model_selection/hyperparam_tuning.py
View file @
05aa2417
...
@@ -79,7 +79,7 @@ if __name__ == "__main__":
...
@@ -79,7 +79,7 @@ if __name__ == "__main__":
"AB"
:
AdaBoostClassifier
(
algorithm
=
'SAMME'
),
"AB"
:
AdaBoostClassifier
(
algorithm
=
'SAMME'
),
"XGB"
:
XGBClassifier
(),
"XGB"
:
XGBClassifier
(),
"LR"
:
LogisticRegression
(
max_iter
=
1000
),
"LR"
:
LogisticRegression
(
max_iter
=
1000
),
"SVM"
:
SVC
(
max_iter
=
1000
),
"SVM"
:
SVC
(
probability
=
True
,
max_iter
=
1000
),
"MLP"
:
MLPClassifier
(
max_iter
=
500
)
"MLP"
:
MLPClassifier
(
max_iter
=
500
)
# "ElNet" : LogisticRegression(max_iter=1000, penalty='elasticnet')
# "ElNet" : LogisticRegression(max_iter=1000, penalty='elasticnet')
}
}
...
@@ -90,7 +90,7 @@ if __name__ == "__main__":
...
@@ -90,7 +90,7 @@ if __name__ == "__main__":
"Bagging"
:
BaggingClassifier
(
estimator
=
DecisionTreeClassifier
(
class_weight
=
'balanced'
)),
"Bagging"
:
BaggingClassifier
(
estimator
=
DecisionTreeClassifier
(
class_weight
=
'balanced'
)),
"AB"
:
AdaBoostClassifier
(
estimator
=
DecisionTreeClassifier
(
class_weight
=
'balanced'
),
algorithm
=
'SAMME'
),
"AB"
:
AdaBoostClassifier
(
estimator
=
DecisionTreeClassifier
(
class_weight
=
'balanced'
),
algorithm
=
'SAMME'
),
"LR"
:
LogisticRegression
(
max_iter
=
1000
,
class_weight
=
'balanced'
),
"LR"
:
LogisticRegression
(
max_iter
=
1000
,
class_weight
=
'balanced'
),
"SVM"
:
SVC
(
max_iter
=
1000
,
class_weight
=
'balanced'
),
"SVM"
:
SVC
(
probability
=
True
,
max_iter
=
1000
,
class_weight
=
'balanced'
),
# "ElNet" : LogisticRegression(max_iter=1000, penalty='elasticnet', class_weight='balanced'),
# "ElNet" : LogisticRegression(max_iter=1000, penalty='elasticnet', class_weight='balanced'),
# "XGB": XGBClassifier(), # <-
# "XGB": XGBClassifier(), # <-
# "MLP" : MLPClassifier(max_iter=500) # <-
# "MLP" : MLPClassifier(max_iter=500) # <-
...
@@ -142,18 +142,17 @@ if __name__ == "__main__":
...
@@ -142,18 +142,17 @@ if __name__ == "__main__":
# --------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------
# Store each df as a sheet in an excel file
# Store each df as a sheet in an excel file
sheets_dict
=
{}
sheets_dict
=
{}
for
i
,
group
in
enumerate
([
'post'
]):
for
i
,
group
in
enumerate
([
'p
re'
,
'p
ost'
]):
for
j
,
method
in
enumerate
([
''
]):
#['', '', 'over_', 'under_']
for
j
,
method
in
enumerate
([
''
,
''
,
'over_'
,
'under_'
]):
# Get dataset based on group and method
# Get dataset based on group and method
X
=
data_dic
[
'X_train_'
+
method
+
group
]
X
=
data_dic
[
'X_train_'
+
method
+
group
]
y
=
data_dic
[
'y_train_'
+
method
+
group
]
y
=
data_dic
[
'y_train_'
+
method
+
group
]
# Use group of models with class weight if needed
# Use group of models with class weight if needed
# models = models_CS if j == 1 else models_simple
models
=
models_CS
if
j
==
1
else
models_simple
models
=
models_CS
# Save results: params and best score for each of the mdodels of this method and group
# Save results: params and best score for each of the mdodels of this method and group
hyperparam_df
=
pd
.
DataFrame
(
index
=
list
(
models
.
keys
()),
columns
=
[
'Parameters'
,
'Score'
])
hyperparam_df
=
pd
.
DataFrame
(
index
=
list
(
models
.
keys
()),
columns
=
[
'Parameters'
,
'Score'
])
for
model_name
,
model
in
models
.
items
():
for
model_name
,
model
in
models
.
items
():
print
(
f
"{group}-{method_names[
1
]}-{model_name}"
)
print
(
f
"{group}-{method_names[
j
]}-{model_name}"
)
# Find optimal hyperparams for curr model
# Find optimal hyperparams for curr model
params
=
hyperparameters
[
model_name
]
params
=
hyperparameters
[
model_name
]
search
=
RandomizedSearchCV
(
model
,
param_distributions
=
params
,
cv
=
cv
,
n_jobs
=
8
,
scoring
=
'precision'
)
search
=
RandomizedSearchCV
(
model
,
param_distributions
=
params
,
cv
=
cv
,
n_jobs
=
8
,
scoring
=
'precision'
)
...
@@ -162,11 +161,11 @@ if __name__ == "__main__":
...
@@ -162,11 +161,11 @@ if __name__ == "__main__":
hyperparam_df
.
at
[
model_name
,
'Score'
]
=
round
(
search
.
best_score_
,
4
)
hyperparam_df
.
at
[
model_name
,
'Score'
]
=
round
(
search
.
best_score_
,
4
)
# Store the DataFrame in the dictionary with a unique key for each sheet
# Store the DataFrame in the dictionary with a unique key for each sheet
sheet_name
=
f
"{group}_{method_names[
1
]}"
sheet_name
=
f
"{group}_{method_names[
j
]}"
sheets_dict
[
sheet_name
]
=
hyperparam_df
sheets_dict
[
sheet_name
]
=
hyperparam_df
# Write results to Excel file
# Write results to Excel file
with
pd
.
ExcelWriter
(
'./output/hyperparam
_post_ORIG_CS.xlsx
'
)
as
writer
:
with
pd
.
ExcelWriter
(
'./output/hyperparam
ers_pre_and_post
'
)
as
writer
:
for
sheet_name
,
data
in
sheets_dict
.
items
():
for
sheet_name
,
data
in
sheets_dict
.
items
():
data
.
to_excel
(
writer
,
sheet_name
=
sheet_name
)
data
.
to_excel
(
writer
,
sheet_name
=
sheet_name
)
...
...
model_selection/test_models.py
View file @
05aa2417
...
@@ -49,7 +49,7 @@ def get_tuned_models(group_id, method_id):
...
@@ -49,7 +49,7 @@ def get_tuned_models(group_id, method_id):
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.9189147333140566
,
'n_estimators'
:
131
,
'algorithm'
:
'SAMME'
}),
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.9189147333140566
,
'n_estimators'
:
131
,
'algorithm'
:
'SAMME'
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.22870029177880222
,
'max_depth'
:
8
,
'n_estimators'
:
909
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.22870029177880222
,
'max_depth'
:
8
,
'n_estimators'
:
909
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
}),
#"SVM" : SVC(**{'C': 0.9872682949695772, 'kernel': 'linear', 'max_iter':1000}),
#"SVM" : SVC(**{'C': 0.9872682949695772, 'kernel': 'linear', 'max_iter':1000
, 'probability': True
}),
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'identity'
,
'hidden_layer_sizes'
:
122
,
'learning_rate'
:
'invscaling'
,
'max_iter'
:
500
})
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'identity'
,
'hidden_layer_sizes'
:
122
,
'learning_rate'
:
'invscaling'
,
'max_iter'
:
500
})
}
}
# 1.2) Trained with original dataset and cost-sensitive learning
# 1.2) Trained with original dataset and cost-sensitive learning
...
@@ -60,7 +60,7 @@ def get_tuned_models(group_id, method_id):
...
@@ -60,7 +60,7 @@ def get_tuned_models(group_id, method_id):
"Bagging"
:
BaggingClassifier
(
**
{
'max_features'
:
1.0
,
'max_samples'
:
1.0
,
'n_estimators'
:
15
,
'warm_start'
:
False
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"Bagging"
:
BaggingClassifier
(
**
{
'max_features'
:
1.0
,
'max_samples'
:
1.0
,
'n_estimators'
:
15
,
'warm_start'
:
False
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
0.8159074545140872
,
'n_estimators'
:
121
,
'algorithm'
:
'SAMME'
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
0.8159074545140872
,
'n_estimators'
:
121
,
'algorithm'
:
'SAMME'
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
,
'class_weight'
:
'balanced'
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
,
'class_weight'
:
'balanced'
}),
#"SVM": SVC(**{'C': 1.5550524351360953, 'kernel': 'linear', 'max_iter': 1000, 'class_weight': 'balanced'}),
#"SVM": SVC(**{'C': 1.5550524351360953, 'kernel': 'linear', 'max_iter': 1000, 'class_weight': 'balanced'
, 'probability': True
}),
}
}
# 1.3) Trained with oversampled training dataset
# 1.3) Trained with oversampled training dataset
elif
method_id
==
2
:
elif
method_id
==
2
:
...
@@ -71,7 +71,7 @@ def get_tuned_models(group_id, method_id):
...
@@ -71,7 +71,7 @@ def get_tuned_models(group_id, method_id):
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.6590924545876917
,
'n_estimators'
:
141
,
'algorithm'
:
'SAMME'
}),
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.6590924545876917
,
'n_estimators'
:
141
,
'algorithm'
:
'SAMME'
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.26946295284728783
,
'max_depth'
:
7
,
'n_estimators'
:
893
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.26946295284728783
,
'max_depth'
:
7
,
'n_estimators'
:
893
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
'l2'
,
'max_iter'
:
1000
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
'l2'
,
'max_iter'
:
1000
}),
#"SVM" : SVC(**{'C': 1.676419306008229, 'kernel': 'poly', 'max_iter':1000}),
#"SVM" : SVC(**{'C': 1.676419306008229, 'kernel': 'poly', 'max_iter':1000
, 'probability': True
}),
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'relu'
,
'hidden_layer_sizes'
:
116
,
'learning_rate'
:
'invscaling'
,
'max_iter'
:
500
})
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'relu'
,
'hidden_layer_sizes'
:
116
,
'learning_rate'
:
'invscaling'
,
'max_iter'
:
500
})
}
}
# 1.4) Trained with undersampled training dataset
# 1.4) Trained with undersampled training dataset
...
@@ -83,7 +83,7 @@ def get_tuned_models(group_id, method_id):
...
@@ -83,7 +83,7 @@ def get_tuned_models(group_id, method_id):
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.6996764264041269
,
'n_estimators'
:
93
,
'algorithm'
:
'SAMME'
}),
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.6996764264041269
,
'n_estimators'
:
93
,
'algorithm'
:
'SAMME'
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.26480707899668926
,
'max_depth'
:
7
,
'n_estimators'
:
959
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.26480707899668926
,
'max_depth'
:
7
,
'n_estimators'
:
959
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
}),
#"SVM" : SVC(**{'C': 1.1996501173654208, 'kernel': 'poly', 'max_iter':1000}),
#"SVM" : SVC(**{'C': 1.1996501173654208, 'kernel': 'poly', 'max_iter':1000
, 'probability': True
}),
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'relu'
,
'hidden_layer_sizes'
:
131
,
'learning_rate'
:
'constant'
,
'max_iter'
:
500
})
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'relu'
,
'hidden_layer_sizes'
:
131
,
'learning_rate'
:
'constant'
,
'max_iter'
:
500
})
}
}
# 2. POST
# 2. POST
...
@@ -97,7 +97,7 @@ def get_tuned_models(group_id, method_id):
...
@@ -97,7 +97,7 @@ def get_tuned_models(group_id, method_id):
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.7806904141367559
,
'n_estimators'
:
66
,
'algorithm'
:
'SAMME'
}),
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.7806904141367559
,
'n_estimators'
:
66
,
'algorithm'
:
'SAMME'
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.21889089898592098
,
'max_depth'
:
6
,
'n_estimators'
:
856
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.21889089898592098
,
'max_depth'
:
6
,
'n_estimators'
:
856
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
}),
#"SVM" : SVC(**{'C': 1.9890638540240584, 'kernel': 'linear', 'max_iter':1000}),
#"SVM" : SVC(**{'C': 1.9890638540240584, 'kernel': 'linear', 'max_iter':1000
, 'probability': True
}),
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'logistic'
,
'hidden_layer_sizes'
:
112
,
'learning_rate'
:
'constant'
,
'max_iter'
:
500
})
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'logistic'
,
'hidden_layer_sizes'
:
112
,
'learning_rate'
:
'constant'
,
'max_iter'
:
500
})
}
}
# 2.2) Trained with original dataset and cost-sensitive learning
# 2.2) Trained with original dataset and cost-sensitive learning
...
@@ -108,7 +108,7 @@ def get_tuned_models(group_id, method_id):
...
@@ -108,7 +108,7 @@ def get_tuned_models(group_id, method_id):
"Bagging"
:
BaggingClassifier
(
**
{
'max_features'
:
1.0
,
'max_samples'
:
0.8
,
'n_estimators'
:
11
,
'warm_start'
:
True
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"Bagging"
:
BaggingClassifier
(
**
{
'max_features'
:
1.0
,
'max_samples'
:
0.8
,
'n_estimators'
:
11
,
'warm_start'
:
True
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.7102248217141944
,
'n_estimators'
:
108
,
'algorithm'
:
'SAMME'
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.7102248217141944
,
'n_estimators'
:
108
,
'algorithm'
:
'SAMME'
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
,
'class_weight'
:
'balanced'
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
,
'class_weight'
:
'balanced'
}),
#"SVM": SVC(**{'C': 1.1313840454519628, 'kernel': 'sigmoid', 'max_iter': 1000, 'class_weight': 'balanced'})
#"SVM": SVC(**{'C': 1.1313840454519628, 'kernel': 'sigmoid', 'max_iter': 1000, 'class_weight': 'balanced'
, 'probability': True
})
}
}
# 2.3) Trained with oversampled training dataset
# 2.3) Trained with oversampled training dataset
elif
method_id
==
2
:
elif
method_id
==
2
:
...
@@ -119,7 +119,7 @@ def get_tuned_models(group_id, method_id):
...
@@ -119,7 +119,7 @@ def get_tuned_models(group_id, method_id):
# "AB" : AdaBoostClassifier(**{'learning_rate': 1.6590924545876917, 'n_estimators': 141, 'algorithm': 'SAMME'}),
# "AB" : AdaBoostClassifier(**{'learning_rate': 1.6590924545876917, 'n_estimators': 141, 'algorithm': 'SAMME'}),
# "XGB": XGBClassifier(**{'learning_rate': 0.26946295284728783, 'max_depth': 7, 'n_estimators': 893}),
# "XGB": XGBClassifier(**{'learning_rate': 0.26946295284728783, 'max_depth': 7, 'n_estimators': 893}),
# "LR" : LogisticRegression(**{'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 1000}),
# "LR" : LogisticRegression(**{'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 1000}),
# "SVM" : SVC(**{'C': 1.676419306008229, 'kernel': 'poly', 'max_iter':1000}),
# "SVM" : SVC(**{'C': 1.676419306008229, 'kernel': 'poly', 'max_iter':1000
, 'probability': True
}),
# "MLP" : MLPClassifier(**{'activation': 'relu', 'hidden_layer_sizes': 116, 'learning_rate': 'invscaling', 'max_iter':500})
# "MLP" : MLPClassifier(**{'activation': 'relu', 'hidden_layer_sizes': 116, 'learning_rate': 'invscaling', 'max_iter':500})
}
}
# 2.4) Trained with undersampled training dataset
# 2.4) Trained with undersampled training dataset
...
@@ -131,7 +131,7 @@ def get_tuned_models(group_id, method_id):
...
@@ -131,7 +131,7 @@ def get_tuned_models(group_id, method_id):
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.836659462701278
,
'n_estimators'
:
138
,
'algorithm'
:
'SAMME'
}),
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.836659462701278
,
'n_estimators'
:
138
,
'algorithm'
:
'SAMME'
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.2517946893282251
,
'max_depth'
:
4
,
'n_estimators'
:
646
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.2517946893282251
,
'max_depth'
:
4
,
'n_estimators'
:
646
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
'l2'
,
'max_iter'
:
1000
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
'l2'
,
'max_iter'
:
1000
}),
#"SVM" : SVC(**{'C': 1.8414678085000697, 'kernel': 'linear', 'max_iter':1000}),
#"SVM" : SVC(**{'C': 1.8414678085000697, 'kernel': 'linear', 'max_iter':1000
, 'probability': True
}),
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'relu'
,
'hidden_layer_sizes'
:
76
,
'learning_rate'
:
'constant'
,
'max_iter'
:
500
})
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'relu'
,
'hidden_layer_sizes'
:
76
,
'learning_rate'
:
'constant'
,
'max_iter'
:
500
})
}
}
return
tuned_models
return
tuned_models
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment