Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
C
covid_analysis
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
COMPARA
covid_analysis
Commits
6d590283
Commit
6d590283
authored
Jul 08, 2024
by
Joaquin Torres
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Completed comments
parent
762a245e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
9 deletions
+23
-9
model_selection/cv_metric_gen.py
model_selection/cv_metric_gen.py
+23
-9
No files found.
model_selection/cv_metric_gen.py
View file @
6d590283
# CV Metric Generation
# Author: Joaquín Torres Bravo
"""
Metric generation for each tuned model.
Done in a different script for perfomance and clarity purposes.
...
...
@@ -5,20 +7,25 @@
# Libraries
# --------------------------------------------------------------------------------------------------------
# Basics
import
pandas
as
pd
import
numpy
as
np
import
matplotlib.pyplot
as
plt
# Models
from
xgboost
import
XGBClassifier
from
sklearn.metrics
import
confusion_matrix
from
sklearn.metrics
import
f1_score
,
make_scorer
,
precision_score
,
recall_score
,
accuracy_score
,
roc_auc_score
,
average_precision_score
from
sklearn.ensemble
import
RandomForestClassifier
,
BaggingClassifier
,
AdaBoostClassifier
from
sklearn.neural_network
import
MLPClassifier
from
sklearn.svm
import
SVC
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.tree
import
DecisionTreeClassifier
from
sklearn.model_selection
import
StratifiedKFold
# Metrics
from
sklearn.metrics
import
confusion_matrix
from
sklearn.metrics
import
f1_score
,
make_scorer
,
precision_score
,
recall_score
,
accuracy_score
,
roc_auc_score
,
average_precision_score
from
sklearn.metrics
import
RocCurveDisplay
,
auc
from
sklearn.metrics
import
PrecisionRecallDisplay
,
precision_recall_curve
import
matplotlib.pyplot
as
plt
# CV
from
sklearn.model_selection
import
StratifiedKFold
# Misc
import
ast
# String to dictionary
# --------------------------------------------------------------------------------------------------------
...
...
@@ -82,9 +89,9 @@ def get_tuned_models(group_str, method_str):
# Iterate through each row of the DataFrame
for
_
,
row
in
tuned_models_df
.
iterrows
():
model_name
=
row
.
iloc
[
0
]
# Read dictionary
# Read dictionary
with parameters
parameters
=
ast
.
literal_eval
(
row
[
'Best Parameters'
])
# Add extra parameters
# Add extra parameters
if needed
if
model_name
==
'AB'
:
parameters
[
'algorithm'
]
=
'SAMME'
elif
model_name
==
'LR'
:
...
...
@@ -140,24 +147,31 @@ def negative_recall_scorer(clf, X, y):
TN_prop
=
cm
[
0
,
0
]
/
(
cm
[
0
,
1
]
+
cm
[
0
,
0
])
return
TN_prop
# Custom scorers for AUROC
and AUPRC
# Custom scorers for AUROC
(Area Under the Receiver Operating Characteristic Curve) and AUPRC (Area Under the Precision-Recall Curve)
def
AUROC_scorer
(
clf
,
X
,
y
):
# Check if the classifier has a decision_function method
if
hasattr
(
clf
,
"decision_function"
):
# If so, use the decision function to get the scores for X
y_score
=
clf
.
decision_function
(
X
)
else
:
# Otherwise, use predict_proba to get the probabilities, and take the probabilities for the positive class (index 1)
y_score
=
clf
.
predict_proba
(
X
)[:,
1
]
# Compute and return the ROC AUC score using the true labels and the predicted scores
return
roc_auc_score
(
y
,
y_score
)
def
AUPRC_scorer
(
clf
,
X
,
y
):
# Check if the classifier has a decision_function method
if
hasattr
(
clf
,
"decision_function"
):
# If so, use the decision function to get the scores for X
y_score
=
clf
.
decision_function
(
X
)
else
:
# Otherwise, use predict_proba to get the probabilities, and take the probabilities for the positive class (index 1)
y_score
=
clf
.
predict_proba
(
X
)[:,
1
]
# Compute and return the average precision score using the true labels and the predicted scores
return
average_precision_score
(
y
,
y_score
)
# --------------------------------------------------------------------------------------------------------
if
__name__
==
"__main__"
:
# Setup
# --------------------------------------------------------------------------------------------------------
# Reading training data
...
...
@@ -188,7 +202,7 @@ if __name__ == "__main__":
cmap
=
plt
.
get_cmap
(
'tab10'
)
# --------------------------------------------------------------------------------------------------------
# Metric generation through cv for tuned models
3
# Metric generation through cv for tuned models
# --------------------------------------------------------------------------------------------------------
scores_sheets
=
{}
# To store score dfs as sheets in the same excel file
for
i
,
group
in
enumerate
([
'pre'
,
'post'
]):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment