diff --git a/model_selection/hyperparam_tuning.py b/model_selection/hyperparam_tuning.py index 980b3114888cdeb507ec7007fff22d9274a0de22..c4fc4314e335a823d1b7f4b39d5466f5e0c30080 100644 --- a/model_selection/hyperparam_tuning.py +++ b/model_selection/hyperparam_tuning.py @@ -27,7 +27,6 @@ import os # Function to read training datasets # -------------------------------------------------------------------------------------------------------- def read_data(): - import numpy as np # Load ORIGINAL training data X_train_pre = np.load('../gen_train_data/data/output/pre/X_train_pre.npy', allow_pickle=True) diff --git a/model_selection/test_models.py b/model_selection/test_models.py index 0662ec111815a9330bfb904f82bfab627932618c..bf5697e2e6d49ed54f118b80d0bb3afab098fad7 100644 --- a/model_selection/test_models.py +++ b/model_selection/test_models.py @@ -25,11 +25,41 @@ def read_test_data(): X_test_post = np.load('../gen_train_data/data/output/post/X_test_post.npy', allow_pickle=True) y_test_post = np.load('../gen_train_data/data/output/post/y_test_post.npy', allow_pickle=True) + # Load ORIGINAL training data + X_train_pre = np.load('../gen_train_data/data/output/pre/X_train_pre.npy', allow_pickle=True) + y_train_pre = np.load('../gen_train_data/data/output/pre/y_train_pre.npy', allow_pickle=True) + X_train_post = np.load('../gen_train_data/data/output/post/X_train_post.npy', allow_pickle=True) + y_train_post = np.load('../gen_train_data/data/output/post/y_train_post.npy', allow_pickle=True) + + # Load oversampled training data + X_train_over_pre = np.load('../gen_train_data/data/output/pre/X_train_over_pre.npy', allow_pickle=True) + y_train_over_pre = np.load('../gen_train_data/data/output/pre/y_train_over_pre.npy', allow_pickle=True) + X_train_over_post = np.load('../gen_train_data/data/output/post/X_train_over_post.npy', allow_pickle=True) + y_train_over_post = np.load('../gen_train_data/data/output/post/y_train_over_post.npy', allow_pickle=True) + + # Load undersampled training data + X_train_under_pre = np.load('../gen_train_data/data/output/pre/X_train_under_pre.npy', allow_pickle=True) + y_train_under_pre = np.load('../gen_train_data/data/output/pre/y_train_under_pre.npy', allow_pickle=True) + X_train_under_post = np.load('../gen_train_data/data/output/post/X_train_under_post.npy', allow_pickle=True) + y_train_under_post = np.load('../gen_train_data/data/output/post/y_train_under_post.npy', allow_pickle=True) + data_dic = { "X_test_pre": X_test_pre, "y_test_pre": y_test_pre, "X_test_post": X_test_post, "y_test_post": y_test_post, + "X_train_pre": X_train_pre, + "y_train_pre": y_train_pre, + "X_train_post": X_train_post, + "y_train_post": y_train_post, + "X_train_over_pre": X_train_over_pre, + "y_train_over_pre": y_train_over_pre, + "X_train_over_post": X_train_over_post, + "y_train_over_post": y_train_over_post, + "X_train_under_pre": X_train_under_pre, + "y_train_under_pre": y_train_under_pre, + "X_train_under_post": X_train_under_post, + "y_train_under_post": y_train_under_post, } return data_dic @@ -205,24 +235,31 @@ if __name__ == "__main__": scores_sheets = {} # To store score dfs as sheets in the same excel file for i, group in enumerate(['pre', 'post']): # Get test dataset based on group - X = data_dic['X_test' + group] - y = data_dic['y_test' + group] + X_test = data_dic['X_test' + group] + y_test = data_dic['y_test' + group] for j, method in enumerate(['', '', 'over_', 'under_']): + # Get train dataset based on group and method + X_train = data_dic['X_train_' + method + group] + y_train = data_dic['y_train_' + method + group] # Get tuned models for this group and method models = get_tuned_models(group_id=i, method_id=j) # Scores df scores_df = pd.DataFrame(index=models.keys(), columns=scorings.keys()) # Evaluate each model for model_name, model in models.items(): - # At each of the scores of interest - for score_name, scorer in scorings.items(): - score_value = scorer(model, X, y) - scores_df.at[model_name, score_name] = score_value + # ----------- TEMPORAL ------------- + if model_name == "DT": + # Train the model (it was just initialized above) + model.fit(X_train, y_train) + # Evaluate at each of the scores of interest + for score_name, scorer in scorings.items(): + score_value = scorer(model, X_test, y_test) + scores_df.at[model_name, score_name] = score_value # Store the DataFrame in the dictionary with a unique key for each sheet sheet_name = f"{group}_{method_names[j]}" scores_sheets[sheet_name] = scores_df # Write results to Excel file - with pd.ExcelWriter('./training_models/output/testing_tuned_models.xlsx') as writer: + with pd.ExcelWriter('./model_selection/test_results/testing_tuned_models.xlsx') as writer: for sheet_name, data in scores_sheets.items(): data.to_excel(writer, sheet_name=sheet_name) # --------------------------------------------------------------------------------------------------------