# The aim of this code is to get an ovrview of the performances of the selected models on the filtered data set whetehr thery are clustered of not and for both outputs
# We train with a 10-folds scenario and get as an output the following metrics for each fold :
sin_cluster_data_features.replace({False:0,True:1},inplace=True)#convertion of boolean by integers for the features set (necessary for newest numpy versions)
sin_cluster_data_label.replace({False:0,True:1},inplace=True)#convertion of boolean by integers for the features set (necessary for newest numpy versions)
# Creation of train and test sets for the dataset with cluster (same steps)
datasets=["Dropout_1"]#select the dataset to train on
filtering=["FSS","noFSS"]#select whether the dataset has been through the filtering step or not
scorings=("roc_auc","f1","neg_log_loss")#scorings to be used for model evaluation
models={"Tree":DecisionTreeClassifier(),"RF":RandomForestClassifier(n_estimators=50),"Boosting":AdaBoostClassifier(),"Bagging":BaggingClassifier(),"LR":LogisticRegression(max_iter=1000),"SVM":SVC(probability=True),"NN":MLPClassifier(max_iter=500)}#models selected for training
cluster_params=pd.DataFrame(index=['SVM','NN','LR','Bagging','RF','Boosting','Tree'],columns=['Parameters','Score'])#dataframe to save the results in for the cluster dataset
sin_cluster_params=cluster_params.copy(deep=True)#dataframe to save the results in for the cluster dataset