Commit 44116618 authored by Joaquin Torres's avatar Joaquin Torres

Testing metric generation

parent 97658355
...@@ -81,9 +81,9 @@ def get_tuned_models(group_str, method_str): ...@@ -81,9 +81,9 @@ def get_tuned_models(group_str, method_str):
tuned_models = {} tuned_models = {}
# Iterate through each row of the DataFrame # Iterate through each row of the DataFrame
for _, row in tuned_models_df.iterrows(): for _, row in tuned_models_df.iterrows():
model_name = row[0] model_name = row.iloc[0]
# Read dictionary # Read dictionary
parameters = ast.literal_eval(row['Parameters']) parameters = ast.literal_eval(row['Best Parameters'])
# Add extra parameters # Add extra parameters
if model_name == 'AB': if model_name == 'AB':
parameters['algorithm'] = 'SAMME' parameters['algorithm'] = 'SAMME'
...@@ -177,7 +177,7 @@ if __name__ == "__main__": ...@@ -177,7 +177,7 @@ if __name__ == "__main__":
scores_sheets = {} # To store score dfs as sheets in the same excel file scores_sheets = {} # To store score dfs as sheets in the same excel file
for i, group in enumerate(['pre', 'post']): for i, group in enumerate(['pre', 'post']):
for j, method in enumerate(['', '', 'over_', 'under_']): for j, method in enumerate(['', '', 'over_', 'under_']):
print(f"{group}-{method_names[j]}") # print(f"{group}-{method_names[j]}")
# Get train dataset based on group and method # Get train dataset based on group and method
X_train = data_dic['X_train_' + method + group] X_train = data_dic['X_train_' + method + group]
y_train = data_dic['y_train_' + method + group] y_train = data_dic['y_train_' + method + group]
...@@ -188,12 +188,13 @@ if __name__ == "__main__": ...@@ -188,12 +188,13 @@ if __name__ == "__main__":
scores_df = pd.DataFrame(columns=range(1,11), index=[f"{model_name}_{metric_name}" for model_name in models.keys() for metric_name in scorings.keys()]) scores_df = pd.DataFrame(columns=range(1,11), index=[f"{model_name}_{metric_name}" for model_name in models.keys() for metric_name in scorings.keys()])
# Metric generation for each model # Metric generation for each model
for model_name, model in models.items(): for model_name, model in models.items():
if model_name == 'DT':
print(f"{group}-{method_names[j]}-{model_name}")
# Retrieve cv scores for our metrics of interest # Retrieve cv scores for our metrics of interest
scores = cross_validate(model, X_train, y_train, scoring=scorings, cv=cv, return_train_score=True, n_jobs=10) scores = cross_validate(model, X_train, y_train, scoring=scorings, cv=cv, return_train_score=True, n_jobs=10)
# Save results of each fold # Save results of each fold
scores_df.loc[model_name +'_F1']=list(np.around(np.array(scores["test_f1"]),4)) for metric_name in scorings.keys():
scores_df.loc[model_name +'_PREC']=list(np.around(np.array(scores["test_precision"]),4)) scores_df.loc[model_name + f'_{metric_name}']=list(np.around(np.array(scores[f"test_{metric_name}"]),4))
scores_df.loc[model_name +'_REC']=list(np.around(np.array(scores["test_recall"]),4))
# Store the DataFrame in the dictionary with a unique key for each sheet # Store the DataFrame in the dictionary with a unique key for each sheet
sheet_name = f"{group}_{method_names[j]}" sheet_name = f"{group}_{method_names[j]}"
scores_sheets[sheet_name] = scores_df scores_sheets[sheet_name] = scores_df
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment