Commit 070552ff authored by Joaquin Torres's avatar Joaquin Torres

Paths updated

parent 39e40062
......@@ -23,10 +23,10 @@ from sklearn.tree import DecisionTreeClassifier
# --------------------------------------------------------------------------------------------------------
def read_test_data(attribute_names):
# Load test data
X_test_pre = np.load('../gen_train_data/output/pre/X_test_pre.npy', allow_pickle=True)
y_test_pre = np.load('../gen_train_data/output/pre/y_test_pre.npy', allow_pickle=True)
X_test_post = np.load('../gen_train_data/output/post/X_test_post.npy', allow_pickle=True)
y_test_post = np.load('../gen_train_data/output/post/y_test_post.npy', allow_pickle=True)
X_test_pre = np.load('../gen_train_data/results/pre/X_test_pre.npy', allow_pickle=True)
y_test_pre = np.load('../gen_train_data/results/pre/y_test_pre.npy', allow_pickle=True)
X_test_post = np.load('../gen_train_data/results/post/X_test_post.npy', allow_pickle=True)
y_test_post = np.load('../gen_train_data/results/post/y_test_post.npy', allow_pickle=True)
# Type conversion needed
data_dic = {
......@@ -43,7 +43,7 @@ if __name__ == "__main__":
# Setup
# --------------------------------------------------------------------------------------------------------
# Retrieve attribute names in order
attribute_names = list(np.load('../EDA/output/feature_names/all_features.npy', allow_pickle=True))
attribute_names = list(np.load('../EDA/results/feature_names/all_features.npy', allow_pickle=True))
# Reading data
data_dic = read_test_data(attribute_names)
method_names = {
......@@ -70,7 +70,7 @@ if __name__ == "__main__":
print(f"{group}-{method_names[j]}")
method_name = method_names[j]
model_name = model_choices[method_name]
model_path = f"../model_selection/output/fitted_models/{group}_{method_names[j]}_{model_name}.pkl"
model_path = f"../model_selection/results/fitted_models/{group}_{method_names[j]}_{model_name}.pkl"
# Load the fitted model from disk
with open(model_path, 'rb') as file:
fitted_model = pickle.load(file)
......@@ -84,5 +84,5 @@ if __name__ == "__main__":
shap_interaction_values = explainer.shap_interaction_values(X_test)
# ---------------------------------------------------------------------------------------------------------
# Save results
np.save(f"./output/shap_inter_values/{group}_{method_names[j]}", shap_interaction_values)
np.save(f"./results/shap_inter_values/{group}_{method_names[j]}", shap_interaction_values)
# --------------------------------------------------------------------------------------------------------
\ No newline at end of file
......@@ -23,10 +23,10 @@ from sklearn.tree import DecisionTreeClassifier
# --------------------------------------------------------------------------------------------------------
def read_test_data(attribute_names):
# Load test data
X_test_pre = np.load('../gen_train_data/output/pre/X_test_pre.npy', allow_pickle=True)
y_test_pre = np.load('../gen_train_data/output/pre/y_test_pre.npy', allow_pickle=True)
X_test_post = np.load('../gen_train_data/output/post/X_test_post.npy', allow_pickle=True)
y_test_post = np.load('../gen_train_data/output/post/y_test_post.npy', allow_pickle=True)
X_test_pre = np.load('../gen_train_data/results/pre/X_test_pre.npy', allow_pickle=True)
y_test_pre = np.load('../gen_train_data/results/pre/y_test_pre.npy', allow_pickle=True)
X_test_post = np.load('../gen_train_data/results/post/X_test_post.npy', allow_pickle=True)
y_test_post = np.load('../gen_train_data/results/post/y_test_post.npy', allow_pickle=True)
# Type conversion needed
data_dic = {
......@@ -43,7 +43,7 @@ if __name__ == "__main__":
# Setup
# --------------------------------------------------------------------------------------------------------
# Retrieve attribute names in order
attribute_names = list(np.load('../EDA/output/feature_names/all_features.npy', allow_pickle=True))
attribute_names = list(np.load('../EDA/results/feature_names/all_features.npy', allow_pickle=True))
# Reading data
data_dic = read_test_data(attribute_names)
method_names = {
......@@ -70,7 +70,7 @@ if __name__ == "__main__":
print(f"{group}-{method_names[j]}")
method_name = method_names[j]
model_name = model_choices[method_name]
model_path = f"../model_selection/output/fitted_models/{group}_{method_names[j]}_{model_name}.pkl"
model_path = f"../model_selection/results/fitted_models/{group}_{method_names[j]}_{model_name}.pkl"
# Load the fitted model from disk
with open(model_path, 'rb') as file:
fitted_model = pickle.load(file)
......@@ -84,5 +84,5 @@ if __name__ == "__main__":
shap_vals = explainer.shap_values(X_test, check_additivity=True) # Change to true for final results
# ---------------------------------------------------------------------------------------------------------
# Save results
np.save(f"./output/shap_values/{group}_{method_names[j]}", shap_vals)
np.save(f"./results/shap_values/{group}_{method_names[j]}", shap_vals)
# --------------------------------------------------------------------------------------------------------
\ No newline at end of file
......@@ -45,13 +45,13 @@
"outputs": [],
"source": [
"# Retrieve attribute names in order\n",
"attribute_names = attribute_names = list(np.load('../EDA/output/feature_names/all_features.npy', allow_pickle=True))\n",
"attribute_names = attribute_names = list(np.load('../EDA/results/feature_names/all_features.npy', allow_pickle=True))\n",
"\n",
"# Load test data\n",
"X_test_pre = np.load('../gen_train_data/output/pre/X_test_pre.npy', allow_pickle=True)\n",
"y_test_pre = np.load('../gen_train_data/output/pre/y_test_pre.npy', allow_pickle=True)\n",
"X_test_post = np.load('../gen_train_data/output/post/X_test_post.npy', allow_pickle=True)\n",
"y_test_post = np.load('../gen_train_data/output/post/y_test_post.npy', allow_pickle=True)\n",
"X_test_pre = np.load('../gen_train_data/results/pre/X_test_pre.npy', allow_pickle=True)\n",
"y_test_pre = np.load('../gen_train_data/results/pre/y_test_pre.npy', allow_pickle=True)\n",
"X_test_post = np.load('../gen_train_data/results/post/X_test_post.npy', allow_pickle=True)\n",
"y_test_post = np.load('../gen_train_data/results/post/y_test_post.npy', allow_pickle=True)\n",
"\n",
"# Type conversion needed \n",
"data_dic = {\n",
......@@ -82,8 +82,8 @@
"}\n",
"\n",
"# Load names of social and individual attributes\n",
"soc_var_names = np.load('../EDA/output/feature_names/social_factors.npy', allow_pickle=True)\n",
"ind_var_names = np.load('../EDA/output/feature_names/individual_factors.npy', allow_pickle=True)"
"soc_var_names = np.load('../EDA/results/feature_names/social_factors.npy', allow_pickle=True)\n",
"ind_var_names = np.load('../EDA/results/feature_names/individual_factors.npy', allow_pickle=True)"
]
},
{
......@@ -106,7 +106,7 @@
" X_test = data_dic['X_test_' + group]\n",
" y_test = data_dic['y_test_' + group]\n",
" model_name = model_choices[method_name]\n",
" shap_vals = np.load(f'./output/shap_values/{group}_{method_name}.npy')\n",
" shap_vals = np.load(f'./results/shap_values/{group}_{method_name}.npy')\n",
" ax = plt.subplot(2,1,i+1) # 2 rows (pre - post) 1 column\n",
" # show = False to modify plot before showing\n",
" shap.summary_plot(shap_vals, X_test, max_display=len(attribute_names), show=False)\n",
......@@ -129,7 +129,7 @@
"plt.suptitle(f'SHAP Summary Plots PRE vs POST - Pipeline: Oversampling - Model: {model_name}\\n\\n')\n",
"plt.subplots_adjust(wspace=1)\n",
"plt.tight_layout()\n",
"plt.savefig(f'./output/plots/shap_summary/{method_name}_{model_name}.svg', format='svg', dpi=1250)\n",
"plt.savefig(f'./results/plots/shap_summary/{method_name}_{model_name}.svg', format='svg', dpi=1250)\n",
"plt.show()"
]
},
......@@ -145,7 +145,7 @@
" X_test = data_dic['X_test_' + group]\n",
" y_test = data_dic['y_test_' + group]\n",
" model_name = model_choices[method_name]\n",
" shap_vals = np.load(f'./output/shap_values/{group}_{method_name}.npy')\n",
" shap_vals = np.load(f'./results/shap_values/{group}_{method_name}.npy')\n",
" shap_vals = shap_vals[:,:,1] # Select shap values for positive class\n",
" ax = plt.subplot(2,1,i+1)\n",
" shap.summary_plot(shap_vals, X_test, max_display=len(attribute_names), show=False)\n",
......@@ -166,7 +166,7 @@
"plt.suptitle(f'SHAP Summary Plots PRE vs POST - Pipeline: Original with Class Weight - Model: {model_name}\\n\\n')\n",
"plt.subplots_adjust(wspace=1)\n",
"plt.tight_layout()\n",
"plt.savefig(f'./output/plots/shap_summary/{method_name}_{model_name}.svg', format='svg', dpi=1250)\n",
"plt.savefig(f'./results/plots/shap_summary/{method_name}_{model_name}.svg', format='svg', dpi=1250)\n",
"plt.show()"
]
},
......@@ -203,7 +203,7 @@
" y_test = data_dic['y_test_' + group]\n",
" model_name = model_choices[method_name]\n",
"\n",
" shap_inter_vals = np.load(f'./output/shap_inter_values/{group}_{method_name}.npy')\n",
" shap_inter_vals = np.load(f'./results/shap_inter_values/{group}_{method_name}.npy')\n",
" if method_name == 'ORIG_CW':\n",
" shap_inter_vals = shap_inter_vals[:,:,:,1] # Take info about positive class\n",
"\n",
......@@ -255,7 +255,7 @@
" # plt.suptitle(f'Simplified Example SHAP Summary Interaction Plot\\n', fontsize=15, fontweight='bold', x=0.5, y=0.95, ha='center')\n",
" plt.suptitle(f'SHAP Summary Interaction Plot - {method_name} - {str.upper(group)}\\n', fontsize=20, fontweight='bold') #, x=0.5, y=0.95, ha='center'\n",
" plt.tight_layout()\n",
" plt.savefig(f'./output/plots/shap_inter_summary/{str.upper(group)}_{method_name}_{model_name}.svg', format='svg', dpi=700)\n",
" plt.savefig(f'./results/plots/shap_inter_summary/{str.upper(group)}_{method_name}_{model_name}.svg', format='svg', dpi=700)\n",
" # plt.show()"
]
},
......@@ -300,8 +300,8 @@
"outputs": [],
"source": [
"# Load array of shap inter matrices for pre and post for the chosen method\n",
"shap_inter_vals_pre = np.load(f'./output/shap_inter_values/pre_{method_name}.npy')\n",
"shap_inter_vals_post = np.load(f'./output/shap_inter_values/post_{method_name}.npy')\n",
"shap_inter_vals_pre = np.load(f'./results/shap_inter_values/pre_{method_name}.npy')\n",
"shap_inter_vals_post = np.load(f'./results/shap_inter_values/post_{method_name}.npy')\n",
"if method_name == 'ORIG_CW':\n",
" shap_inter_vals_pre = shap_inter_vals_pre[:,:,:,1] # Take info about positive class\n",
" shap_inter_vals_post = shap_inter_vals_post[:,:,:,1]\n",
......@@ -367,7 +367,7 @@
" cbar = ax.collections[0].colorbar\n",
" cbar.set_label('Interaction POST - Interaction PRE', labelpad=15, rotation=270, verticalalignment='bottom')\n",
"\n",
" plt.savefig(f'./output/plots/heatmaps_interactions/DIST_{method_name}.svg', format='svg', dpi=600)\n",
" plt.savefig(f'./results/plots/heatmaps_interactions/DIST_{method_name}.svg', format='svg', dpi=600)\n",
" \n",
" plt.show()"
]
......@@ -426,7 +426,7 @@
" interactions_df['SHAP Inter Variation PRE-POST'].abs().sort_values(ascending=False).index)\n",
"\n",
"# Export to Excel\n",
"sorted_interactions_df.to_excel(f'./output/inter_variation_{method_name}.xlsx', index=False)\n",
"sorted_interactions_df.to_excel(f'./results/inter_variation_{method_name}.xlsx', index=False)\n",
"\n",
"print(\"Excel file has been created.\")"
]
......@@ -448,7 +448,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
"version": "3.12.2"
}
},
"nbformat": 4,
......
......@@ -36,8 +36,8 @@
"outputs": [],
"source": [
"# Load clean datasets\n",
"df_pre = pd.read_csv('../EDA/output/datasets/pre_dataset.csv')\n",
"df_post = pd.read_csv('../EDA/output/datasets/post_dataset.csv')"
"df_pre = pd.read_csv('../EDA/results/datasets/pre_dataset.csv')\n",
"df_post = pd.read_csv('../EDA/results/datasets/post_dataset.csv')"
]
},
{
......@@ -90,10 +90,10 @@
"outputs": [],
"source": [
"# Save test data\n",
"np.save('./output/pre/X_test_pre.npy', X_test_pre)\n",
"np.save('./output/pre/y_test_pre.npy', y_test_pre)\n",
"np.save('./output/post/X_test_post.npy', X_test_post)\n",
"np.save('./output/post/y_test_post.npy', y_test_post)"
"np.save('./results/pre/X_test_pre.npy', X_test_pre)\n",
"np.save('./results/pre/y_test_pre.npy', y_test_pre)\n",
"np.save('./results/post/X_test_post.npy', X_test_post)\n",
"np.save('./results/post/y_test_post.npy', y_test_post)"
]
},
{
......@@ -103,10 +103,10 @@
"outputs": [],
"source": [
"# Save ORIGINAL training data\n",
"np.save('./output/pre/X_train_pre.npy', X_train_pre)\n",
"np.save('./output/pre/y_train_pre.npy', y_train_pre)\n",
"np.save('./output/post/X_train_post.npy', X_train_post)\n",
"np.save('./output/post/y_train_post.npy', y_train_post)"
"np.save('./results/pre/X_train_pre.npy', X_train_pre)\n",
"np.save('./results/pre/y_train_pre.npy', y_train_pre)\n",
"np.save('./results/post/X_train_post.npy', X_train_post)\n",
"np.save('./results/post/y_train_post.npy', y_train_post)"
]
},
{
......@@ -135,10 +135,10 @@
"outputs": [],
"source": [
"# Save oversampled training data\n",
"np.save('./output/pre/X_train_over_pre.npy', X_train_over_pre)\n",
"np.save('./output/pre/y_train_over_pre.npy', y_train_over_pre)\n",
"np.save('./output/post/X_train_over_post.npy', X_train_over_post)\n",
"np.save('./output/post/y_train_over_post.npy', y_train_over_post)"
"np.save('./results/pre/X_train_over_pre.npy', X_train_over_pre)\n",
"np.save('./results/pre/y_train_over_pre.npy', y_train_over_pre)\n",
"np.save('./results/post/X_train_over_post.npy', X_train_over_post)\n",
"np.save('./results/post/y_train_over_post.npy', y_train_over_post)"
]
},
{
......@@ -167,10 +167,10 @@
"outputs": [],
"source": [
"# Save undersampled training data\n",
"np.save('./output/pre/X_train_under_pre.npy', X_train_under_pre)\n",
"np.save('./output/pre/y_train_under_pre.npy', y_train_under_pre)\n",
"np.save('./output/post/X_train_under_post.npy', X_train_under_post)\n",
"np.save('./output/post/y_train_under_post.npy', y_train_under_post)"
"np.save('./results/pre/X_train_under_pre.npy', X_train_under_pre)\n",
"np.save('./results/pre/y_train_under_pre.npy', y_train_under_pre)\n",
"np.save('./results/post/X_train_under_post.npy', X_train_under_post)\n",
"np.save('./results/post/y_train_under_post.npy', y_train_under_post)"
]
}
],
......
......@@ -21,7 +21,7 @@ if __name__ == "__main__":
for group in ['pre', 'post']:
for method in ['_ORIG', '_ORIG_CW', '_OVER', '_UNDER']:
# Read CV metrics sheet for current group and method
df = pd.read_excel('./output_cv_metrics/metrics.xlsx', sheet_name=group+method)
df = pd.read_excel('./results/cv_metrics/metrics.xlsx', sheet_name=group+method)
# Model names based on cost-senstive training or not
if method == '_ORIG_CW':
model_names = model_names_cs
......@@ -47,7 +47,7 @@ if __name__ == "__main__":
if metric_name in ['F1', 'PREC', 'REC', 'ACC', 'AUROC', 'AUPRC']:
ax.set_ylim(0, 1)
plt.tight_layout()
fig.savefig(f'./output/cv_metrics/distributions/{group}{method}.svg', format='svg', dpi=600)
fig.savefig(f'./results/cv_metrics/distributions/{group}{method}.svg', format='svg', dpi=600)
plt.close(fig)
print("Succesful distribution plots generation")
......
......@@ -34,22 +34,22 @@ import ast # String to dictionary
def read_data():
# Load ORIGINAL training data
X_train_pre = np.load('../gen_train_data/data/output/pre/X_train_pre.npy', allow_pickle=True)
y_train_pre = np.load('../gen_train_data/data/output/pre/y_train_pre.npy', allow_pickle=True)
X_train_post = np.load('../gen_train_data/data/output/post/X_train_post.npy', allow_pickle=True)
y_train_post = np.load('../gen_train_data/data/output/post/y_train_post.npy', allow_pickle=True)
X_train_pre = np.load('../gen_train_data/data/results/pre/X_train_pre.npy', allow_pickle=True)
y_train_pre = np.load('../gen_train_data/data/results/pre/y_train_pre.npy', allow_pickle=True)
X_train_post = np.load('../gen_train_data/data/results/post/X_train_post.npy', allow_pickle=True)
y_train_post = np.load('../gen_train_data/data/results/post/y_train_post.npy', allow_pickle=True)
# Load oversampled training data
X_train_over_pre = np.load('../gen_train_data/data/output/pre/X_train_over_pre.npy', allow_pickle=True)
y_train_over_pre = np.load('../gen_train_data/data/output/pre/y_train_over_pre.npy', allow_pickle=True)
X_train_over_post = np.load('../gen_train_data/data/output/post/X_train_over_post.npy', allow_pickle=True)
y_train_over_post = np.load('../gen_train_data/data/output/post/y_train_over_post.npy', allow_pickle=True)
X_train_over_pre = np.load('../gen_train_data/data/results/pre/X_train_over_pre.npy', allow_pickle=True)
y_train_over_pre = np.load('../gen_train_data/data/results/pre/y_train_over_pre.npy', allow_pickle=True)
X_train_over_post = np.load('../gen_train_data/data/results/post/X_train_over_post.npy', allow_pickle=True)
y_train_over_post = np.load('../gen_train_data/data/results/post/y_train_over_post.npy', allow_pickle=True)
# Load undersampled training data
X_train_under_pre = np.load('../gen_train_data/data/output/pre/X_train_under_pre.npy', allow_pickle=True)
y_train_under_pre = np.load('../gen_train_data/data/output/pre/y_train_under_pre.npy', allow_pickle=True)
X_train_under_post = np.load('../gen_train_data/data/output/post/X_train_under_post.npy', allow_pickle=True)
y_train_under_post = np.load('../gen_train_data/data/output/post/y_train_under_post.npy', allow_pickle=True)
X_train_under_pre = np.load('../gen_train_data/data/results/pre/X_train_under_pre.npy', allow_pickle=True)
y_train_under_pre = np.load('../gen_train_data/data/results/pre/y_train_under_pre.npy', allow_pickle=True)
X_train_under_post = np.load('../gen_train_data/data/results/post/X_train_under_post.npy', allow_pickle=True)
y_train_under_post = np.load('../gen_train_data/data/results/post/y_train_under_post.npy', allow_pickle=True)
data_dic = {
"X_train_pre": X_train_pre,
......@@ -73,7 +73,7 @@ def read_data():
# --------------------------------------------------------------------------------------------------------
def get_tuned_models(group_str, method_str):
# Read sheet corresponding to group and method with tuned models and their hyperparam
tuned_models_df = pd.read_excel("./output_hyperparam/hyperparamers.xlsx",sheet_name=f"{group_str}_{method_str}")
tuned_models_df = pd.read_excel("./results_hyperparam/hyperparamers.xlsx",sheet_name=f"{group_str}_{method_str}")
# Mapping from model abbreviations to sklearn model classes
model_mapping = {
'DT': DecisionTreeClassifier,
......@@ -290,10 +290,10 @@ if __name__ == "__main__":
scores_sheets[sheet_name] = scores_df
# Adjust layout and save figure
plt.tight_layout()
plt.savefig(f'./output/cv_metrics/curves/{group}_{method_names[j]}.svg', format='svg', dpi=500)
plt.savefig(f'./results/cv_metrics/curves/{group}_{method_names[j]}.svg', format='svg', dpi=500)
plt.close(fig)
# Write results to Excel file
with pd.ExcelWriter('./output./cv_metrics/metrics.xlsx') as writer:
with pd.ExcelWriter('./results./cv_metrics/metrics.xlsx') as writer:
for sheet_name, data in scores_sheets.items():
data.to_excel(writer, sheet_name=sheet_name)
print("Successful cv metric generation for tuned models")
......
......@@ -24,22 +24,22 @@ import ast # String to dictionary
# --------------------------------------------------------------------------------------------------------
def read_training_data(attribute_names):
# Load ORIGINAL training data
X_train_pre = np.load('../gen_train_data/output/pre/X_train_pre.npy', allow_pickle=True)
y_train_pre = np.load('../gen_train_data/output/pre/y_train_pre.npy', allow_pickle=True)
X_train_post = np.load('../gen_train_data/output/post/X_train_post.npy', allow_pickle=True)
y_train_post = np.load('../gen_train_data/output/post/y_train_post.npy', allow_pickle=True)
X_train_pre = np.load('../gen_train_data/results/pre/X_train_pre.npy', allow_pickle=True)
y_train_pre = np.load('../gen_train_data/results/pre/y_train_pre.npy', allow_pickle=True)
X_train_post = np.load('../gen_train_data/results/post/X_train_post.npy', allow_pickle=True)
y_train_post = np.load('../gen_train_data/results/post/y_train_post.npy', allow_pickle=True)
# Load oversampled training data
X_train_over_pre = np.load('../gen_train_data/output/pre/X_train_over_pre.npy', allow_pickle=True)
y_train_over_pre = np.load('../gen_train_data/output/pre/y_train_over_pre.npy', allow_pickle=True)
X_train_over_post = np.load('../gen_train_data/output/post/X_train_over_post.npy', allow_pickle=True)
y_train_over_post = np.load('../gen_train_data/output/post/y_train_over_post.npy', allow_pickle=True)
X_train_over_pre = np.load('../gen_train_data/results/pre/X_train_over_pre.npy', allow_pickle=True)
y_train_over_pre = np.load('../gen_train_data/results/pre/y_train_over_pre.npy', allow_pickle=True)
X_train_over_post = np.load('../gen_train_data/results/post/X_train_over_post.npy', allow_pickle=True)
y_train_over_post = np.load('../gen_train_data/results/post/y_train_over_post.npy', allow_pickle=True)
# Load undersampled training data
X_train_under_pre = np.load('../gen_train_data/output/pre/X_train_under_pre.npy', allow_pickle=True)
y_train_under_pre = np.load('../gen_train_data/output/pre/y_train_under_pre.npy', allow_pickle=True)
X_train_under_post = np.load('../gen_train_data/output/post/X_train_under_post.npy', allow_pickle=True)
y_train_under_post = np.load('../gen_train_data/output/post/y_train_under_post.npy', allow_pickle=True)
X_train_under_pre = np.load('../gen_train_data/results/pre/X_train_under_pre.npy', allow_pickle=True)
y_train_under_pre = np.load('../gen_train_data/results/pre/y_train_under_pre.npy', allow_pickle=True)
X_train_under_post = np.load('../gen_train_data/results/post/X_train_under_post.npy', allow_pickle=True)
y_train_under_post = np.load('../gen_train_data/results/post/y_train_under_post.npy', allow_pickle=True)
# Type conversion needed
data_dic = {
......@@ -63,7 +63,7 @@ def read_training_data(attribute_names):
# --------------------------------------------------------------------------------------------------------
def get_chosen_model(group_str, method_str, model_name):
# Read sheet corresponding to group and method with tuned models and their hyperparameters
tuned_models_df = pd.read_excel("../model_selection/output/hyperparam/hyperparamers.xlsx", sheet_name=f"{group_str}_{method_str}")
tuned_models_df = pd.read_excel("../model_selection/results/hyperparam/hyperparamers.xlsx", sheet_name=f"{group_str}_{method_str}")
tuned_models_df.columns = ['Model', 'Best Parameters']
# Define the mapping from model abbreviations to sklearn model classes
......@@ -117,7 +117,7 @@ if __name__ == "__main__":
# Setup
# --------------------------------------------------------------------------------------------------------
# Retrieve attribute names in order
attribute_names = list(np.load('../EDA/output/feature_names/all_features.npy', allow_pickle=True))
attribute_names = list(np.load('../EDA/results/feature_names/all_features.npy', allow_pickle=True))
# Reading data
data_dic = read_training_data(attribute_names)
method_names = {
......@@ -147,7 +147,7 @@ if __name__ == "__main__":
model, is_tree = get_chosen_model(group_str=group, method_str=method_name, model_name=model_choices[method_name])
fitted_model = model.fit(X_train, y_train)
# Define the file path where you want to save the model
model_save_path = f"./output/fitted_models/{group}_{method_names[j]}_{model_choices[method_name]}.pkl"
model_save_path = f"./results/fitted_models/{group}_{method_names[j]}_{model_choices[method_name]}.pkl"
# Save the model to disk
with open(model_save_path, 'wb') as f:
pickle.dump(fitted_model, f)
......
......@@ -32,22 +32,22 @@ from scipy.stats import randint, uniform
def read_data():
# Load ORIGINAL training data
X_train_pre = np.load('../gen_train_data/data/output/pre/X_train_pre.npy', allow_pickle=True)
y_train_pre = np.load('../gen_train_data/data/output/pre/y_train_pre.npy', allow_pickle=True)
X_train_post = np.load('../gen_train_data/data/output/post/X_train_post.npy', allow_pickle=True)
y_train_post = np.load('../gen_train_data/data/output/post/y_train_post.npy', allow_pickle=True)
X_train_pre = np.load('../gen_train_data/data/results/pre/X_train_pre.npy', allow_pickle=True)
y_train_pre = np.load('../gen_train_data/data/results/pre/y_train_pre.npy', allow_pickle=True)
X_train_post = np.load('../gen_train_data/data/results/post/X_train_post.npy', allow_pickle=True)
y_train_post = np.load('../gen_train_data/data/results/post/y_train_post.npy', allow_pickle=True)
# Load oversampled training data
X_train_over_pre = np.load('../gen_train_data/data/output/pre/X_train_over_pre.npy', allow_pickle=True)
y_train_over_pre = np.load('../gen_train_data/data/output/pre/y_train_over_pre.npy', allow_pickle=True)
X_train_over_post = np.load('../gen_train_data/data/output/post/X_train_over_post.npy', allow_pickle=True)
y_train_over_post = np.load('../gen_train_data/data/output/post/y_train_over_post.npy', allow_pickle=True)
X_train_over_pre = np.load('../gen_train_data/data/results/pre/X_train_over_pre.npy', allow_pickle=True)
y_train_over_pre = np.load('../gen_train_data/data/results/pre/y_train_over_pre.npy', allow_pickle=True)
X_train_over_post = np.load('../gen_train_data/data/results/post/X_train_over_post.npy', allow_pickle=True)
y_train_over_post = np.load('../gen_train_data/data/results/post/y_train_over_post.npy', allow_pickle=True)
# Load undersampled training data
X_train_under_pre = np.load('../gen_train_data/data/output/pre/X_train_under_pre.npy', allow_pickle=True)
y_train_under_pre = np.load('../gen_train_data/data/output/pre/y_train_under_pre.npy', allow_pickle=True)
X_train_under_post = np.load('../gen_train_data/data/output/post/X_train_under_post.npy', allow_pickle=True)
y_train_under_post = np.load('../gen_train_data/data/output/post/y_train_under_post.npy', allow_pickle=True)
X_train_under_pre = np.load('../gen_train_data/data/results/pre/X_train_under_pre.npy', allow_pickle=True)
y_train_under_pre = np.load('../gen_train_data/data/results/pre/y_train_under_pre.npy', allow_pickle=True)
X_train_under_post = np.load('../gen_train_data/data/results/post/X_train_under_post.npy', allow_pickle=True)
y_train_under_post = np.load('../gen_train_data/data/results/post/y_train_under_post.npy', allow_pickle=True)
data_dic = {
"X_train_pre": X_train_pre,
......@@ -163,7 +163,7 @@ if __name__ == "__main__":
sheets_dict[sheet_name] = hyperparam_df
# Write results to Excel file
with pd.ExcelWriter('./output/hyperparam/hyperparamers.xlsx') as writer:
with pd.ExcelWriter('./results/hyperparam/hyperparamers.xlsx') as writer:
for sheet_name, data in sheets_dict.items():
data.to_excel(writer, sheet_name=sheet_name)
......
......@@ -33,28 +33,28 @@ from mpl_toolkits.axes_grid1 import make_axes_locatable # Custom color bar for c
# --------------------------------------------------------------------------------------------------------
def read_data():
# Load test data
X_test_pre = np.load('../gen_train_data/data/output/pre/X_test_pre.npy', allow_pickle=True)
y_test_pre = np.load('../gen_train_data/data/output/pre/y_test_pre.npy', allow_pickle=True)
X_test_post = np.load('../gen_train_data/data/output/post/X_test_post.npy', allow_pickle=True)
y_test_post = np.load('../gen_train_data/data/output/post/y_test_post.npy', allow_pickle=True)
X_test_pre = np.load('../gen_train_data/data/results/pre/X_test_pre.npy', allow_pickle=True)
y_test_pre = np.load('../gen_train_data/data/results/pre/y_test_pre.npy', allow_pickle=True)
X_test_post = np.load('../gen_train_data/data/results/post/X_test_post.npy', allow_pickle=True)
y_test_post = np.load('../gen_train_data/data/results/post/y_test_post.npy', allow_pickle=True)
# Load ORIGINAL training data
X_train_pre = np.load('../gen_train_data/data/output/pre/X_train_pre.npy', allow_pickle=True)
y_train_pre = np.load('../gen_train_data/data/output/pre/y_train_pre.npy', allow_pickle=True)
X_train_post = np.load('../gen_train_data/data/output/post/X_train_post.npy', allow_pickle=True)
y_train_post = np.load('../gen_train_data/data/output/post/y_train_post.npy', allow_pickle=True)
X_train_pre = np.load('../gen_train_data/data/results/pre/X_train_pre.npy', allow_pickle=True)
y_train_pre = np.load('../gen_train_data/data/results/pre/y_train_pre.npy', allow_pickle=True)
X_train_post = np.load('../gen_train_data/data/results/post/X_train_post.npy', allow_pickle=True)
y_train_post = np.load('../gen_train_data/data/results/post/y_train_post.npy', allow_pickle=True)
# Load oversampled training data
X_train_over_pre = np.load('../gen_train_data/data/output/pre/X_train_over_pre.npy', allow_pickle=True)
y_train_over_pre = np.load('../gen_train_data/data/output/pre/y_train_over_pre.npy', allow_pickle=True)
X_train_over_post = np.load('../gen_train_data/data/output/post/X_train_over_post.npy', allow_pickle=True)
y_train_over_post = np.load('../gen_train_data/data/output/post/y_train_over_post.npy', allow_pickle=True)
X_train_over_pre = np.load('../gen_train_data/data/results/pre/X_train_over_pre.npy', allow_pickle=True)
y_train_over_pre = np.load('../gen_train_data/data/results/pre/y_train_over_pre.npy', allow_pickle=True)
X_train_over_post = np.load('../gen_train_data/data/results/post/X_train_over_post.npy', allow_pickle=True)
y_train_over_post = np.load('../gen_train_data/data/results/post/y_train_over_post.npy', allow_pickle=True)
# Load undersampled training data
X_train_under_pre = np.load('../gen_train_data/data/output/pre/X_train_under_pre.npy', allow_pickle=True)
y_train_under_pre = np.load('../gen_train_data/data/output/pre/y_train_under_pre.npy', allow_pickle=True)
X_train_under_post = np.load('../gen_train_data/data/output/post/X_train_under_post.npy', allow_pickle=True)
y_train_under_post = np.load('../gen_train_data/data/output/post/y_train_under_post.npy', allow_pickle=True)
X_train_under_pre = np.load('../gen_train_data/data/results/pre/X_train_under_pre.npy', allow_pickle=True)
y_train_under_pre = np.load('../gen_train_data/data/results/pre/y_train_under_pre.npy', allow_pickle=True)
X_train_under_post = np.load('../gen_train_data/data/results/post/X_train_under_post.npy', allow_pickle=True)
y_train_under_post = np.load('../gen_train_data/data/results/post/y_train_under_post.npy', allow_pickle=True)
data_dic = {
"X_test_pre": X_test_pre,
......@@ -83,7 +83,7 @@ def read_data():
def get_tuned_models(group_str, method_str):
# Read sheet corresponding to group and method with tuned models and their hyperparam
tuned_models_df = pd.read_excel("./output/hyperparam/hyperparamers.xlsx",sheet_name=f"{group_str}_{method_str}")
tuned_models_df = pd.read_excel("./results/hyperparam/hyperparamers.xlsx",sheet_name=f"{group_str}_{method_str}")
# Mapping from model abbreviations to sklearn model classes
model_mapping = {
'DT': DecisionTreeClassifier,
......@@ -280,13 +280,13 @@ if __name__ == "__main__":
# ----------------------------------------------------------
# Adjust layout and save/show figure
plt.tight_layout()
plt.savefig(f'./output/testing/plots/{group}_{method_names[j]}.svg', format='svg', dpi=500)
plt.savefig(f'./results/testing/plots/{group}_{method_names[j]}.svg', format='svg', dpi=500)
plt.close(fig)
# Store the DataFrame in the dictionary with a unique key for each sheet
sheet_name = f"{group}_{method_names[j]}"
scores_sheets[sheet_name] = scores_df
# Write results to Excel file
with pd.ExcelWriter('./output/testing/testing_tuned_models.xlsx') as writer:
with pd.ExcelWriter('./results/testing/testing_tuned_models.xlsx') as writer:
for sheet_name, data in scores_sheets.items():
data.to_excel(writer, sheet_name=sheet_name)
print("Successful evaluation with test dataset")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment