Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
C
covid_analysis
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
COMPARA
covid_analysis
Commits
d3a20982
Commit
d3a20982
authored
May 10, 2024
by
Joaquin Torres
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
identified problem with svm: need probability=true for AUROC
parent
37c5050e
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
10 deletions
+11
-10
model_selection/test_models.py
model_selection/test_models.py
+11
-10
No files found.
model_selection/test_models.py
View file @
d3a20982
...
...
@@ -8,7 +8,7 @@ import pandas as pd
import
numpy
as
np
from
xgboost
import
XGBClassifier
from
sklearn.metrics
import
confusion_matrix
from
sklearn.metrics
import
f1_score
,
make_scorer
,
precision_score
,
recall_score
,
accuracy_score
from
sklearn.metrics
import
f1_score
,
make_scorer
,
precision_score
,
recall_score
,
accuracy_score
,
roc_auc_score
,
average_precision_score
from
sklearn.ensemble
import
RandomForestClassifier
,
BaggingClassifier
,
AdaBoostClassifier
from
sklearn.neural_network
import
MLPClassifier
from
sklearn.svm
import
SVC
...
...
@@ -49,7 +49,7 @@ def get_tuned_models(group_id, method_id):
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.9189147333140566
,
'n_estimators'
:
131
,
'algorithm'
:
'SAMME'
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.22870029177880222
,
'max_depth'
:
8
,
'n_estimators'
:
909
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
}),
"SVM"
:
SVC
(
**
{
'C'
:
0.9872682949695772
,
'kernel'
:
'linear'
,
'max_iter'
:
1000
}),
#
"SVM" : SVC(**{'C': 0.9872682949695772, 'kernel': 'linear', 'max_iter':1000}),
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'identity'
,
'hidden_layer_sizes'
:
122
,
'learning_rate'
:
'invscaling'
,
'max_iter'
:
500
})
}
# 1.2) Trained with original dataset and cost-sensitive learning
...
...
@@ -60,7 +60,7 @@ def get_tuned_models(group_id, method_id):
"Bagging"
:
BaggingClassifier
(
**
{
'max_features'
:
1.0
,
'max_samples'
:
1.0
,
'n_estimators'
:
15
,
'warm_start'
:
False
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
0.8159074545140872
,
'n_estimators'
:
121
,
'algorithm'
:
'SAMME'
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
,
'class_weight'
:
'balanced'
}),
"SVM"
:
SVC
(
**
{
'C'
:
1.5550524351360953
,
'kernel'
:
'linear'
,
'max_iter'
:
1000
,
'class_weight'
:
'balanced'
}),
#
"SVM": SVC(**{'C': 1.5550524351360953, 'kernel': 'linear', 'max_iter': 1000, 'class_weight': 'balanced'}),
}
# 1.3) Trained with oversampled training dataset
elif
method_id
==
2
:
...
...
@@ -71,7 +71,7 @@ def get_tuned_models(group_id, method_id):
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.6590924545876917
,
'n_estimators'
:
141
,
'algorithm'
:
'SAMME'
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.26946295284728783
,
'max_depth'
:
7
,
'n_estimators'
:
893
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
'l2'
,
'max_iter'
:
1000
}),
"SVM"
:
SVC
(
**
{
'C'
:
1.676419306008229
,
'kernel'
:
'poly'
,
'max_iter'
:
1000
}),
#
"SVM" : SVC(**{'C': 1.676419306008229, 'kernel': 'poly', 'max_iter':1000}),
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'relu'
,
'hidden_layer_sizes'
:
116
,
'learning_rate'
:
'invscaling'
,
'max_iter'
:
500
})
}
# 1.4) Trained with undersampled training dataset
...
...
@@ -83,7 +83,7 @@ def get_tuned_models(group_id, method_id):
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.6996764264041269
,
'n_estimators'
:
93
,
'algorithm'
:
'SAMME'
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.26480707899668926
,
'max_depth'
:
7
,
'n_estimators'
:
959
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
}),
"SVM"
:
SVC
(
**
{
'C'
:
1.1996501173654208
,
'kernel'
:
'poly'
,
'max_iter'
:
1000
}),
#
"SVM" : SVC(**{'C': 1.1996501173654208, 'kernel': 'poly', 'max_iter':1000}),
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'relu'
,
'hidden_layer_sizes'
:
131
,
'learning_rate'
:
'constant'
,
'max_iter'
:
500
})
}
# 2. POST
...
...
@@ -97,7 +97,7 @@ def get_tuned_models(group_id, method_id):
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.7806904141367559
,
'n_estimators'
:
66
,
'algorithm'
:
'SAMME'
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.21889089898592098
,
'max_depth'
:
6
,
'n_estimators'
:
856
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
}),
"SVM"
:
SVC
(
**
{
'C'
:
1.9890638540240584
,
'kernel'
:
'linear'
,
'max_iter'
:
1000
}),
#
"SVM" : SVC(**{'C': 1.9890638540240584, 'kernel': 'linear', 'max_iter':1000}),
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'logistic'
,
'hidden_layer_sizes'
:
112
,
'learning_rate'
:
'constant'
,
'max_iter'
:
500
})
}
# 2.2) Trained with original dataset and cost-sensitive learning
...
...
@@ -108,7 +108,7 @@ def get_tuned_models(group_id, method_id):
"Bagging"
:
BaggingClassifier
(
**
{
'max_features'
:
1.0
,
'max_samples'
:
0.8
,
'n_estimators'
:
11
,
'warm_start'
:
True
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.7102248217141944
,
'n_estimators'
:
108
,
'algorithm'
:
'SAMME'
,
'estimator'
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
)}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
None
,
'max_iter'
:
1000
,
'class_weight'
:
'balanced'
}),
"SVM"
:
SVC
(
**
{
'C'
:
1.1313840454519628
,
'kernel'
:
'sigmoid'
,
'max_iter'
:
1000
,
'class_weight'
:
'balanced'
})
#
"SVM": SVC(**{'C': 1.1313840454519628, 'kernel': 'sigmoid', 'max_iter': 1000, 'class_weight': 'balanced'})
}
# 2.3) Trained with oversampled training dataset
elif
method_id
==
2
:
...
...
@@ -131,7 +131,7 @@ def get_tuned_models(group_id, method_id):
"AB"
:
AdaBoostClassifier
(
**
{
'learning_rate'
:
1.836659462701278
,
'n_estimators'
:
138
,
'algorithm'
:
'SAMME'
}),
"XGB"
:
XGBClassifier
(
**
{
'learning_rate'
:
0.2517946893282251
,
'max_depth'
:
4
,
'n_estimators'
:
646
}),
"LR"
:
LogisticRegression
(
**
{
'solver'
:
'lbfgs'
,
'penalty'
:
'l2'
,
'max_iter'
:
1000
}),
"SVM"
:
SVC
(
**
{
'C'
:
1.8414678085000697
,
'kernel'
:
'linear'
,
'max_iter'
:
1000
}),
#
"SVM" : SVC(**{'C': 1.8414678085000697, 'kernel': 'linear', 'max_iter':1000}),
"MLP"
:
MLPClassifier
(
**
{
'activation'
:
'relu'
,
'hidden_layer_sizes'
:
76
,
'learning_rate'
:
'constant'
,
'max_iter'
:
500
})
}
return
tuned_models
...
...
@@ -188,9 +188,10 @@ if __name__ == "__main__":
'TN'
:
TN_scorer
,
'FN'
:
FN_scorer
,
'FP'
:
FP_scorer
,
'TP'
:
TP_scorer
'TP'
:
TP_scorer
,
'AUROC'
:
make_scorer
(
roc_auc_score
,
needs_threshold
=
True
),
# AUROC requires decision function or probability outputs
'AUPRC'
:
make_scorer
(
average_precision_score
,
needs_proba
=
True
)
# AUPRC requires probability outputs
}
# AUROC and AUPRC (plot?)
method_names
=
{
0
:
"ORIG"
,
1
:
"ORIG_CW"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment