""" Selecting best models through cross validation and hyperparameter tunning for each method: 1. Original training dataset 2. Original training dataset - Cost sensitive 3. Oversampling 4. Undersampling """ # Libraries # -------------------------------------------------------------------------------------------------------- import pandas as pd import numpy as np from xgboost import XGBClassifier from sklearn.metrics import confusion_matrix from sklearn.metrics import f1_score, make_scorer, precision_score, recall_score from sklearn.model_selection import StratifiedKFold, cross_validate from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier from sklearn.neural_network import MLPClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier # -------------------------------------------------------------------------------------------------------- if __name__ == "__main__": # Reading training data # -------------------------------------------------------------------------------------------------------- # Load test data X_test_pre = np.load('gen_train_data/data/output/pre/X_test_pre.npy', allow_pickle=True) y_test_pre = np.load('gen_train_data/data/output/pre/y_test_pre.npy', allow_pickle=True) X_test_post = np.load('gen_train_data/data/output/post/X_test_post.npy', allow_pickle=True) y_test_post = np.load('gen_train_data/data/output/post/y_test_post.npy', allow_pickle=True) # Load ORIGINAL training data X_train_pre = np.load('gen_train_data/data/output/pre/X_train_pre.npy', allow_pickle=True) y_train_pre = np.load('gen_train_data/data/output/pre/y_train_pre.npy', allow_pickle=True) X_train_post = np.load('gen_train_data/data/output/post/X_train_post.npy', allow_pickle=True) y_train_post = np.load('gen_train_data/data/output/post/y_train_post.npy', allow_pickle=True) # Load oversampled training data X_train_over_pre = np.load('gen_train_data/data/output/pre/X_train_over_pre.npy', allow_pickle=True) y_train_over_pre = np.load('gen_train_data/data/output/pre/y_train_over_pre.npy', allow_pickle=True) X_train_over_post = np.load('gen_train_data/data/output/post/X_train_over_post.npy', allow_pickle=True) y_train_over_post = np.load('gen_train_data/data/output/post/y_train_over_post.npy', allow_pickle=True) # Load undersampled training data X_train_under_pre = np.load('gen_train_data/data/output/pre/X_train_under_pre.npy', allow_pickle=True) y_train_under_pre = np.load('gen_train_data/data/output/pre/y_train_under_pre.npy', allow_pickle=True) X_train_under_post = np.load('gen_train_data/data/output/post/X_train_under_post.npy', allow_pickle=True) y_train_under_post = np.load('gen_train_data/data/output/post/y_train_under_post.npy', allow_pickle=True) # -------------------------------------------------------------------------------------------------------- # Defining the models to train # -------------------------------------------------------------------------------------------------------- # 1. No class weight models_1 = {"DT" : DecisionTreeClassifier(), "RF" : RandomForestClassifier(), "Bagging" : BaggingClassifier(), "AB" : AdaBoostClassifier(), "XGB": XGBClassifier(), "LR" : LogisticRegression(), "ElNet" : LogisticRegression(penalty='elasticnet'), "SVM" : SVC(), "MLP" : MLPClassifier(), } # 2. Class weight models_2 = {"DT" : DecisionTreeClassifier(class_weight='balanced'), "RF" : RandomForestClassifier(class_weight='balanced'), "Bagging" : BaggingClassifier(), # <- "AB" : AdaBoostClassifier(), # <- "XGB": XGBClassifier(), # <- "LR" : LogisticRegression(class_weight='balanced'), "ElNet" : LogisticRegression(penalty='elasticnet', class_weight='balanced'), "SVM" : SVC(class_weight='balanced'), "MLP" : MLPClassifier(), # <- } # --------------------------------------------------------------------------------------------------------