Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
C
covid_analysis
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
COMPARA
covid_analysis
Commits
aa9797c1
Commit
aa9797c1
authored
Jun 06, 2024
by
Joaquin Torres
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Able to retrieve chosen tuned models based on model name easily
parent
050d8a00
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
80 additions
and
26 deletions
+80
-26
explicability/shap_vals.py
explicability/shap_vals.py
+80
-26
No files found.
explicability/shap_vals.py
View file @
aa9797c1
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
import
pandas
as
pd
import
pandas
as
pd
import
numpy
as
np
import
numpy
as
np
import
shap
import
shap
import
ast
from
xgboost
import
XGBClassifier
from
xgboost
import
XGBClassifier
from
sklearn.ensemble
import
RandomForestClassifier
,
BaggingClassifier
,
AdaBoostClassifier
from
sklearn.ensemble
import
RandomForestClassifier
,
BaggingClassifier
,
AdaBoostClassifier
...
@@ -61,6 +62,58 @@ def read_data():
...
@@ -61,6 +62,58 @@ def read_data():
return
data_dic
return
data_dic
# --------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------
# Retrieving parameters for chosen models
# --------------------------------------------------------------------------------------------------------
def
get_chosen_model
(
group_str
,
method_str
,
model_name
):
# Read sheet corresponding to group and method with tuned models and their hyperparameters
tuned_models_df
=
pd
.
read_excel
(
"../model_selection/output_hyperparam/hyperparamers.xlsx"
,
sheet_name
=
f
"{group_str}_{method_str}"
)
tuned_models_df
.
columns
=
[
'Model'
,
'Best Parameters'
]
# Define the mapping from model abbreviations to sklearn model classes
model_mapping
=
{
'DT'
:
DecisionTreeClassifier
,
'RF'
:
RandomForestClassifier
,
'Bagging'
:
BaggingClassifier
,
'AB'
:
AdaBoostClassifier
,
'XGB'
:
XGBClassifier
,
'LR'
:
LogisticRegression
,
'SVM'
:
SVC
,
'MLP'
:
MLPClassifier
}
# Access the row for the given model name by checking the first column (index 0)
row
=
tuned_models_df
[
tuned_models_df
[
'Model'
]
==
model_name
]
.
iloc
[
0
]
# Parse the dictionary of parameters from the 'Best Parameters' column
parameters
=
ast
.
literal_eval
(
row
[
'Best Parameters'
])
# Modify parameters based on model specifics or methods if necessary
if
model_name
==
'AB'
:
parameters
[
'algorithm'
]
=
'SAMME'
elif
model_name
==
'LR'
:
parameters
[
'max_iter'
]
=
1000
elif
model_name
==
'SVM'
:
parameters
[
'max_iter'
]
=
1000
parameters
[
'probability'
]
=
True
elif
model_name
==
"MLP"
:
parameters
[
'max_iter'
]
=
500
# Add class_weight argument for cost-sensitive learning method
if
'CW'
in
method_str
:
if
model_name
in
[
'Bagging'
,
'AB'
]:
parameters
[
'estimator'
]
=
DecisionTreeClassifier
(
class_weight
=
'balanced'
)
else
:
parameters
[
'class_weight'
]
=
'balanced'
# Fetch the class of the model
model_class
=
model_mapping
[
model_name
]
# Initialize the model with the parameters
chosen_model
=
model_class
(
**
parameters
)
return
chosen_model
# --------------------------------------------------------------------------------------------------------
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
# Setup
# Setup
...
@@ -73,18 +126,12 @@ if __name__ == "__main__":
...
@@ -73,18 +126,12 @@ if __name__ == "__main__":
2
:
"OVER"
,
2
:
"OVER"
,
3
:
"UNDER"
3
:
"UNDER"
}
}
# Best model initialization (to be completed - manually)
# Mapping group-method -> (isTreeModel:bool, model)
model_choices
=
{
models
=
{
"ORIG"
:
"XGB"
,
"pre_ORIG"
:
(
None
,
None
),
"ORIG_CW"
:
"RF"
,
"pre_ORIG_CW"
:
(
None
,
None
),
"OVER"
:
"XGB"
,
"pre_OVER"
:
(
None
,
None
),
"UNDER"
:
"XGB"
"pre_UNDER"
:
(
None
,
None
),
"post_ORIG"
:
(
None
,
None
),
"post_ORIG"
:
(
None
,
None
),
"post_ORIG_CW"
:
(
None
,
None
),
"post_OVER"
:
(
None
,
None
),
"post_UNDER"
:
(
None
,
None
),
}
}
# # Retrieve attribute names in order
# # Retrieve attribute names in order
# df = pd.read_csv("..\gen_train_data\data\input\pre_dataset.csv")
# df = pd.read_csv("..\gen_train_data\data\input\pre_dataset.csv")
...
@@ -102,19 +149,26 @@ if __name__ == "__main__":
...
@@ -102,19 +149,26 @@ if __name__ == "__main__":
# Get train dataset based on group and method
# Get train dataset based on group and method
X_train
=
data_dic
[
'X_train_'
+
method
+
group
]
X_train
=
data_dic
[
'X_train_'
+
method
+
group
]
y_train
=
data_dic
[
'y_train_'
+
method
+
group
]
y_train
=
data_dic
[
'y_train_'
+
method
+
group
]
# Retrieve best model for this group-method context
method_name
=
method_names
[
j
]
model_info
=
models
[
group
+
'_'
+
method_names
[
j
]]
# Get chosen tuned model for this group and method context
is_tree
=
model_info
[
0
]
model
=
get_chosen_model
(
group_str
=
group
,
method_str
=
method_name
,
model_name
=
model_choices
[
method_name
])
model
=
model_info
[
1
]
print
(
f
'Name: {model_choices[method_name]}'
)
# Fit model with training data
print
(
model
.
get_params
())
fitted_model
=
model
.
fit
(
X_train
[:
500
],
y_train
[:
500
])
# # --------------------------------------------------------------------------------------------------------
# Check if we are dealing with a tree vs nn model
# # Retrieve best model for this group-method context
if
is_tree
:
# model_info = models[group + '_' + method_names[j]]
explainer
=
shap
.
TreeExplainer
(
fitted_model
,
X_test
[:
500
])
# is_tree = model_info[0]
else
:
# model = model_info[1]
explainer
=
shap
.
KernelExplainer
(
fitted_model
.
predict
,
X_test
[:
500
])
# # Fit model with training data
# Compute shap values
# fitted_model = model.fit(X_train[:500], y_train[:500])
shap_vals
=
explainer
.
shap_values
(
X_test
[:
500
],
check_additivity
=
False
)
# Change to true for final results
# # Check if we are dealing with a tree vs nn model
# if is_tree:
# explainer = shap.TreeExplainer(fitted_model, X_test[:500])
# else:
# explainer = shap.KernelExplainer(fitted_model.predict, X_test[:500])
# # Compute shap values
# shap_vals = explainer.shap_values(X_test[:500], check_additivity=False) # Change to true for final results
# # ---------------------------------------------------------------------------------------------------------
# Save results
# Save results
np
.
save
(
f
"shap_values/{group}_{method_names[j]}"
,
shap_vals
)
#
np.save(f"shap_values/{group}_{method_names[j]}", shap_vals)
# --------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment