sin_cluster_data_label.replace({False:0,True:1},inplace=True)#convertion of boolean by integers for the features set (necessary for newest numpy versions)
# Creation of train and test sets for the dataset with cluster (same steps)
datasets=["Dropout_1"]#select the dataset to train on
filtering=["FSS","noFSS"]#select whether the dataset has been through the filtering step or not
scorings={'f1':make_scorer(f1_score),'negative_recall':negative_recall_scorer,'recall':make_scorer(recall_score),'precision':make_scorer(precision_score),'TN':TN_scorer,'FN':FN_scorer,'FP':FP_scorer,'TP':TP_scorer}#scorings to be used for model evaluation
resample=SMOTETomek()#the method used to balance the output classes
cv=StratifiedKFold(n_splits=10,shuffle=True,random_state=1)#the cross-validation protocole used
sin_cluster_df.loc[k+'_F1']=list(np.around(np.array(sin_cluster_scores["test_f1"]),4))#the F1 score for the database without cluster is stored in a dataframe
sin_cluster_df.loc[k+'_Precision']=list(np.around(np.array(sin_cluster_scores["test_precision"]),4))#same for precision
sin_cluster_df.loc[k+'_Recall']=list(np.around(np.array(sin_cluster_scores["test_recall"]),4))#same for recall
sin_cluster_df.loc[k+'_TN-prop']=list(np.around(np.array(sin_cluster_scores["test_negative_recall"]),4))#same for negative_recall
sin_cluster_cm.loc[k+'_TN']=sin_cluster_scores["test_TN"]#the number of true negative samples for the database without cluster is stored in a dataframe
sin_cluster_cm.loc[k+'_FN']=sin_cluster_scores["test_FN"]#same for false negative
sin_cluster_cm.loc[k+'_FP']=sin_cluster_scores["test_FP"]#same for false positive
sin_cluster_cm.loc[k+'_TP']=sin_cluster_scores["test_TP"]#same for true positive
sin_cluster_df.loc[k+'_F1']=list(np.around(np.array(sin_cluster_scores["test_f1"]),4))#the F1 score for the database without cluster is stored in a dataframe
sin_cluster_df.loc[k+'_Precision']=list(np.around(np.array(sin_cluster_scores["test_precision"]),4))#same for precision
sin_cluster_df.loc[k+'_Recall']=list(np.around(np.array(sin_cluster_scores["test_recall"]),4))#same for recall
sin_cluster_df.loc[k+'_TN-prop']=list(np.around(np.array(sin_cluster_scores["test_negative_recall"]),4))#same for negative_recall
sin_cluster_cm.loc[k+'_TN']=sin_cluster_scores["test_TN"]#the number of true negative samples for the database without cluster is stored in a dataframe
sin_cluster_cm.loc[k+'_FN']=sin_cluster_scores["test_FN"]#same for false negative
sin_cluster_cm.loc[k+'_FP']=sin_cluster_scores["test_FP"]#same for false positive
sin_cluster_cm.loc[k+'_TP']=sin_cluster_scores["test_TP"]#same for true positive
model=models[k][1]#selection of the second model of the tuple which is the one with clusters