Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
C
covid_analysis
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
COMPARA
covid_analysis
Commits
d72df2cb
Commit
d72df2cb
authored
May 04, 2024
by
Joaquin Torres
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
loaded data and defined models
parent
743c0bd1
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
79 additions
and
1 deletion
+79
-1
training_models/train_models.py
training_models/train_models.py
+79
-1
No files found.
training_models/train_models.py
View file @
d72df2cb
"""
Selecting best models through cross validation and hyperparameter tunning
for each method:
1. Original training dataset
2. Original training dataset - Cost sensitive
3. Oversampling
4. Undersampling
"""
# Libraries
# --------------------------------------------------------------------------------------------------------
import
pandas
as
pd
import
numpy
as
np
from
xgboost
import
XGBClassifier
from
sklearn.metrics
import
confusion_matrix
from
sklearn.metrics
import
f1_score
,
make_scorer
,
precision_score
,
recall_score
from
sklearn.model_selection
import
StratifiedKFold
,
cross_validate
from
sklearn.ensemble
import
RandomForestClassifier
,
BaggingClassifier
,
AdaBoostClassifier
from
sklearn.neural_network
import
MLPClassifier
from
sklearn.svm
import
SVC
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.tree
import
DecisionTreeClassifier
# --------------------------------------------------------------------------------------------------------
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
print
(
"Hello World!"
)
\ No newline at end of file
# Reading training data
# --------------------------------------------------------------------------------------------------------
# Load test data
X_test_pre
=
np
.
load
(
'gen_train_data/data/output/pre/X_test_pre.npy'
,
allow_pickle
=
True
)
y_test_pre
=
np
.
load
(
'gen_train_data/data/output/pre/y_test_pre.npy'
,
allow_pickle
=
True
)
X_test_post
=
np
.
load
(
'gen_train_data/data/output/post/X_test_post.npy'
,
allow_pickle
=
True
)
y_test_post
=
np
.
load
(
'gen_train_data/data/output/post/y_test_post.npy'
,
allow_pickle
=
True
)
# Load ORIGINAL training data
X_train_pre
=
np
.
load
(
'gen_train_data/data/output/pre/X_train_pre.npy'
,
allow_pickle
=
True
)
y_train_pre
=
np
.
load
(
'gen_train_data/data/output/pre/y_train_pre.npy'
,
allow_pickle
=
True
)
X_train_post
=
np
.
load
(
'gen_train_data/data/output/post/X_train_post.npy'
,
allow_pickle
=
True
)
y_train_post
=
np
.
load
(
'gen_train_data/data/output/post/y_train_post.npy'
,
allow_pickle
=
True
)
# Load oversampled training data
X_train_over_pre
=
np
.
load
(
'gen_train_data/data/output/pre/X_train_over_pre.npy'
,
allow_pickle
=
True
)
y_train_over_pre
=
np
.
load
(
'gen_train_data/data/output/pre/y_train_over_pre.npy'
,
allow_pickle
=
True
)
X_train_over_post
=
np
.
load
(
'gen_train_data/data/output/post/X_train_over_post.npy'
,
allow_pickle
=
True
)
y_train_over_post
=
np
.
load
(
'gen_train_data/data/output/post/y_train_over_post.npy'
,
allow_pickle
=
True
)
# Load undersampled training data
X_train_under_pre
=
np
.
load
(
'gen_train_data/data/output/pre/X_train_under_pre.npy'
,
allow_pickle
=
True
)
y_train_under_pre
=
np
.
load
(
'gen_train_data/data/output/pre/y_train_under_pre.npy'
,
allow_pickle
=
True
)
X_train_under_post
=
np
.
load
(
'gen_train_data/data/output/post/X_train_under_post.npy'
,
allow_pickle
=
True
)
y_train_under_post
=
np
.
load
(
'gen_train_data/data/output/post/y_train_under_post.npy'
,
allow_pickle
=
True
)
# --------------------------------------------------------------------------------------------------------
# Defining the models to train
# --------------------------------------------------------------------------------------------------------
# 1. No class weight
models_1
=
{
"DT"
:
DecisionTreeClassifier
(),
"RF"
:
RandomForestClassifier
(),
"Bagging"
:
BaggingClassifier
(),
"AB"
:
AdaBoostClassifier
(),
"XGB"
:
XGBClassifier
(),
"LR"
:
LogisticRegression
(),
"ElNet"
:
LogisticRegression
(
penalty
=
'elasticnet'
),
"SVM"
:
SVC
(),
"MLP"
:
MLPClassifier
(),
}
# 2. Class weight
models_2
=
{
"DT"
:
DecisionTreeClassifier
(
class_weight
=
'balanced'
),
"RF"
:
RandomForestClassifier
(
class_weight
=
'balanced'
),
"Bagging"
:
BaggingClassifier
(),
# <-
"AB"
:
AdaBoostClassifier
(),
# <-
"XGB"
:
XGBClassifier
(),
# <-
"LR"
:
LogisticRegression
(
class_weight
=
'balanced'
),
"ElNet"
:
LogisticRegression
(
penalty
=
'elasticnet'
,
class_weight
=
'balanced'
),
"SVM"
:
SVC
(
class_weight
=
'balanced'
),
"MLP"
:
MLPClassifier
(),
# <-
}
# --------------------------------------------------------------------------------------------------------
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment