From d72df2cbf5866c89eb94e8eee44941552d8e8931 Mon Sep 17 00:00:00 2001 From: joaquintb Date: Sat, 4 May 2024 12:23:22 +0200 Subject: [PATCH] loaded data and defined models --- training_models/train_models.py | 80 ++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/training_models/train_models.py b/training_models/train_models.py index d98b69f..a16f1bd 100644 --- a/training_models/train_models.py +++ b/training_models/train_models.py @@ -1,2 +1,80 @@ +""" + Selecting best models through cross validation and hyperparameter tunning + for each method: + 1. Original training dataset + 2. Original training dataset - Cost sensitive + 3. Oversampling + 4. Undersampling +""" + +# Libraries +# -------------------------------------------------------------------------------------------------------- +import pandas as pd +import numpy as np +from xgboost import XGBClassifier +from sklearn.metrics import confusion_matrix +from sklearn.metrics import f1_score, make_scorer, precision_score, recall_score +from sklearn.model_selection import StratifiedKFold, cross_validate +from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier +from sklearn.neural_network import MLPClassifier +from sklearn.svm import SVC +from sklearn.linear_model import LogisticRegression +from sklearn.tree import DecisionTreeClassifier +# -------------------------------------------------------------------------------------------------------- + + if __name__ == "__main__": - print("Hello World!") \ No newline at end of file + + # Reading training data + # -------------------------------------------------------------------------------------------------------- + # Load test data + X_test_pre = np.load('gen_train_data/data/output/pre/X_test_pre.npy', allow_pickle=True) + y_test_pre = np.load('gen_train_data/data/output/pre/y_test_pre.npy', allow_pickle=True) + X_test_post = np.load('gen_train_data/data/output/post/X_test_post.npy', allow_pickle=True) + y_test_post = np.load('gen_train_data/data/output/post/y_test_post.npy', allow_pickle=True) + + # Load ORIGINAL training data + X_train_pre = np.load('gen_train_data/data/output/pre/X_train_pre.npy', allow_pickle=True) + y_train_pre = np.load('gen_train_data/data/output/pre/y_train_pre.npy', allow_pickle=True) + X_train_post = np.load('gen_train_data/data/output/post/X_train_post.npy', allow_pickle=True) + y_train_post = np.load('gen_train_data/data/output/post/y_train_post.npy', allow_pickle=True) + + # Load oversampled training data + X_train_over_pre = np.load('gen_train_data/data/output/pre/X_train_over_pre.npy', allow_pickle=True) + y_train_over_pre = np.load('gen_train_data/data/output/pre/y_train_over_pre.npy', allow_pickle=True) + X_train_over_post = np.load('gen_train_data/data/output/post/X_train_over_post.npy', allow_pickle=True) + y_train_over_post = np.load('gen_train_data/data/output/post/y_train_over_post.npy', allow_pickle=True) + + # Load undersampled training data + X_train_under_pre = np.load('gen_train_data/data/output/pre/X_train_under_pre.npy', allow_pickle=True) + y_train_under_pre = np.load('gen_train_data/data/output/pre/y_train_under_pre.npy', allow_pickle=True) + X_train_under_post = np.load('gen_train_data/data/output/post/X_train_under_post.npy', allow_pickle=True) + y_train_under_post = np.load('gen_train_data/data/output/post/y_train_under_post.npy', allow_pickle=True) + # -------------------------------------------------------------------------------------------------------- + + # Defining the models to train + # -------------------------------------------------------------------------------------------------------- + # 1. No class weight + models_1 = {"DT" : DecisionTreeClassifier(), + "RF" : RandomForestClassifier(), + "Bagging" : BaggingClassifier(), + "AB" : AdaBoostClassifier(), + "XGB": XGBClassifier(), + "LR" : LogisticRegression(), + "ElNet" : LogisticRegression(penalty='elasticnet'), + "SVM" : SVC(), + "MLP" : MLPClassifier(), + } + + # 2. Class weight + models_2 = {"DT" : DecisionTreeClassifier(class_weight='balanced'), + "RF" : RandomForestClassifier(class_weight='balanced'), + "Bagging" : BaggingClassifier(), # <- + "AB" : AdaBoostClassifier(), # <- + "XGB": XGBClassifier(), # <- + "LR" : LogisticRegression(class_weight='balanced'), + "ElNet" : LogisticRegression(penalty='elasticnet', class_weight='balanced'), + "SVM" : SVC(class_weight='balanced'), + "MLP" : MLPClassifier(), # <- + } + # -------------------------------------------------------------------------------------------------------- -- 2.24.1