Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
C
covid_analysis
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
COMPARA
covid_analysis
Commits
f26ac8cd
Commit
f26ac8cd
authored
May 05, 2024
by
Joaquin Torres
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
working on loop for model evaluation
parent
d72df2cb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
97 additions
and
20 deletions
+97
-20
training_models/train_models.py
training_models/train_models.py
+97
-20
No files found.
training_models/train_models.py
View file @
f26ac8cd
...
@@ -23,10 +23,41 @@ from sklearn.tree import DecisionTreeClassifier
...
@@ -23,10 +23,41 @@ from sklearn.tree import DecisionTreeClassifier
# --------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------
if
__name__
==
"__main__"
:
def
negative_recall_scorer
(
clf
,
X
,
y
):
"""Gives the negative recall defined as the (number of true_negative_samples)/(total number of negative samples)"""
y_pred
=
clf
.
predict
(
X
)
cm
=
confusion_matrix
(
y
,
y_pred
)
TN_prop
=
cm
[
0
,
0
]
/
(
cm
[
0
,
1
]
+
cm
[
0
,
0
])
return
TN_prop
def
TN_scorer
(
clf
,
X
,
y
):
"""Gives the number of samples predicted as true negatives"""
y_pred
=
clf
.
predict
(
X
)
cm
=
confusion_matrix
(
y
,
y_pred
)
TN
=
cm
[
0
,
0
]
return
TN
def
FN_scorer
(
clf
,
X
,
y
):
"""Gives the number of samples predicted as false negatives"""
y_pred
=
clf
.
predict
(
X
)
cm
=
confusion_matrix
(
y
,
y_pred
)
FN
=
cm
[
0
,
1
]
return
FN
def
FP_scorer
(
clf
,
X
,
y
):
"""Gives the number of samples predicted as false positive"""
y_pred
=
clf
.
predict
(
X
)
cm
=
confusion_matrix
(
y
,
y_pred
)
FP
=
cm
[
1
,
0
]
return
FP
def
TP_scorer
(
clf
,
X
,
y
):
"""Gives the number of samples predicted as true positive"""
y_pred
=
clf
.
predict
(
X
)
cm
=
confusion_matrix
(
y
,
y_pred
)
TP
=
cm
[
1
,
1
]
return
TP
def
read_data
():
import
numpy
as
np
# Reading training data
# --------------------------------------------------------------------------------------------------------
# Load test data
# Load test data
X_test_pre
=
np
.
load
(
'gen_train_data/data/output/pre/X_test_pre.npy'
,
allow_pickle
=
True
)
X_test_pre
=
np
.
load
(
'gen_train_data/data/output/pre/X_test_pre.npy'
,
allow_pickle
=
True
)
y_test_pre
=
np
.
load
(
'gen_train_data/data/output/pre/y_test_pre.npy'
,
allow_pickle
=
True
)
y_test_pre
=
np
.
load
(
'gen_train_data/data/output/pre/y_test_pre.npy'
,
allow_pickle
=
True
)
...
@@ -50,31 +81,77 @@ if __name__ == "__main__":
...
@@ -50,31 +81,77 @@ if __name__ == "__main__":
y_train_under_pre
=
np
.
load
(
'gen_train_data/data/output/pre/y_train_under_pre.npy'
,
allow_pickle
=
True
)
y_train_under_pre
=
np
.
load
(
'gen_train_data/data/output/pre/y_train_under_pre.npy'
,
allow_pickle
=
True
)
X_train_under_post
=
np
.
load
(
'gen_train_data/data/output/post/X_train_under_post.npy'
,
allow_pickle
=
True
)
X_train_under_post
=
np
.
load
(
'gen_train_data/data/output/post/X_train_under_post.npy'
,
allow_pickle
=
True
)
y_train_under_post
=
np
.
load
(
'gen_train_data/data/output/post/y_train_under_post.npy'
,
allow_pickle
=
True
)
y_train_under_post
=
np
.
load
(
'gen_train_data/data/output/post/y_train_under_post.npy'
,
allow_pickle
=
True
)
# --------------------------------------------------------------------------------------------------------
data_dic
=
{
"X_test_pre"
:
X_test_pre
,
"y_test_pre"
:
y_test_pre
,
"X_test_post"
:
X_test_post
,
"y_test_post"
:
y_test_post
,
"X_train_pre"
:
X_train_pre
,
"y_train_pre"
:
y_train_pre
,
"X_train_post"
:
X_train_post
,
"y_train_post"
:
y_train_post
,
"X_train_over_pre"
:
X_train_over_pre
,
"y_train_over_pre"
:
y_train_over_pre
,
"X_train_over_post"
:
X_train_over_post
,
"y_train_over_post"
:
y_train_over_post
,
"X_train_under_pre"
:
X_train_under_pre
,
"y_train_under_pre"
:
y_train_under_pre
,
"X_train_under_post"
:
X_train_under_post
,
"y_train_under_post"
:
y_train_under_post
,
}
return
data_dic
if
__name__
==
"__main__"
:
# Reading training data
data_dic
=
read_data
()
# Defining the models to train
# Defining the models to train
# --------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------
# 1. No class weight
# 1. No class weight
models_1
=
{
"DT"
:
DecisionTreeClassifier
(),
models_1
=
{
"DT"
:
DecisionTreeClassifier
(),
"RF"
:
RandomForestClassifier
(),
#
"RF" : RandomForestClassifier(),
"Bagging"
:
BaggingClassifier
(),
#
"Bagging" : BaggingClassifier(),
"AB"
:
AdaBoostClassifier
(),
#
"AB" : AdaBoostClassifier(),
"XGB"
:
XGBClassifier
(),
#
"XGB": XGBClassifier(),
"LR"
:
LogisticRegression
(),
#
"LR" : LogisticRegression(),
"ElNet"
:
LogisticRegression
(
penalty
=
'elasticnet'
),
#
"ElNet" : LogisticRegression(penalty='elasticnet'),
"SVM"
:
SVC
(),
#
"SVM" : SVC(),
"MLP"
:
MLPClassifier
(),
#
"MLP" : MLPClassifier(),
}
}
# 2. Class weight
# 2. Class weight
models_2
=
{
"DT"
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
),
models_2
=
{
"DT"
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
),
"RF"
:
RandomForestClassifier
(
class_weight
=
'balanced'
),
#
"RF" : RandomForestClassifier(class_weight='balanced'),
"Bagging"
:
BaggingClassifier
(),
# <-
#
"Bagging" : BaggingClassifier(), # <-
"AB"
:
AdaBoostClassifier
(),
# <-
#
"AB" : AdaBoostClassifier(), # <-
"XGB"
:
XGBClassifier
(),
# <-
#
"XGB": XGBClassifier(), # <-
"LR"
:
LogisticRegression
(
class_weight
=
'balanced'
),
#
"LR" : LogisticRegression(class_weight='balanced'),
"ElNet"
:
LogisticRegression
(
penalty
=
'elasticnet'
,
class_weight
=
'balanced'
),
#
"ElNet" : LogisticRegression(penalty='elasticnet', class_weight='balanced'),
"SVM"
:
SVC
(
class_weight
=
'balanced'
),
#
"SVM" : SVC(class_weight='balanced'),
"MLP"
:
MLPClassifier
(),
# <-
#
"MLP" : MLPClassifier(), # <-
}
}
# --------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------
# Setup
# --------------------------------------------------------------------------------------------------------
# Scorings to use for model evaluation
scorings
=
{
'f1'
:
make_scorer
(
f1_score
),
'negative_recall'
:
negative_recall_scorer
,
'recall'
:
make_scorer
(
recall_score
),
'precision'
:
make_scorer
(
precision_score
),
'TN'
:
TN_scorer
,
'FN'
:
FN_scorer
,
'FP'
:
FP_scorer
,
'TP'
:
TP_scorer
}
# Defining cross-validation protocol
cv
=
StratifiedKFold
(
n_splits
=
10
,
shuffle
=
True
,
random_state
=
1
)
# --------------------------------------------------------------------------------------------------------
for
i
,
group
in
enumerate
([
'pre'
,
'post'
]):
for
j
,
method
in
enumerate
([
''
,
''
,
'over_'
,
'under_'
]):
# Get dataset based on group and method
X
=
data_dic
[
'X_train_'
+
method
+
group
]
y
=
data_dic
[
'y_train_'
+
method
+
group
]
# Use group of models with class weight if needed
models
=
models_2
if
j
==
2
else
models_1
# Create df to keep track of each group-method for all its models
results
=
pd
.
DataFrame
()
for
model_name
,
model
in
models
.
items
():
cv_results
=
cross_validate
(
model
,
X
,
y
,
scoring
=
scorings
,
cv
=
cv
,
return_train_score
=
True
,
n_jobs
=
1
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment