diff --git a/model_selection/fixing_fp_fn/fixing_metrics.py b/model_selection/fixing_fp_fn/fixing_metrics.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..604c964e8e0b353bc3ebde4a4a9b25fa52efa1ec 100644 --- a/model_selection/fixing_fp_fn/fixing_metrics.py +++ b/model_selection/fixing_fp_fn/fixing_metrics.py @@ -0,0 +1,40 @@ +import pandas as pd + +def swap_fp_fn_rows(excel_path, output_path): + # Load the Excel file + excel_data = pd.ExcelFile(excel_path) + + # Create a Pandas ExcelWriter using the xlsxwriter engine + with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer: + # Iterate through each sheet in the Excel file + for sheet_name in excel_data.sheet_names: + # Read the current sheet into a DataFrame, setting the first column as the index + df = excel_data.parse(sheet_name, index_col=0) + + # Convert index to string if it's not already to handle string operations + df.index = df.index.map(str) + + # Identify all rows containing 'FP' and 'FN' for swapping + fp_rows = df.filter(like='_FP', axis=0) + fn_rows = df.filter(like='_FN', axis=0) + + # Swap values between FP and FN rows for each corresponding pair + for fp_index, fn_index in zip(fp_rows.index, fn_rows.index): + # Temporary store FN row + temp = df.loc[fn_index].copy() + # Swap rows + df.loc[fn_index] = df.loc[fp_index] + df.loc[fp_index] = temp + + # Write the modified DataFrame back to the Excel file without adding an index column + df.to_excel(writer, sheet_name=sheet_name, index=True, index_label=None) + + print(f"Swapped FP and FN rows in '{excel_path}' and saved to '{output_path}'.") + +# Path to your existing Excel file +input_excel_path = '../output_cv_metrics/metrics.xlsx' +# Path where the modified Excel file will be saved +output_excel_path = '../output_cv_metrics/metrics_fixed.xlsx' + +# Call the function +swap_fp_fn_rows(input_excel_path, output_excel_path) diff --git a/model_selection/output_cv_metrics/metrics_fixed.xlsx b/model_selection/output_cv_metrics/metrics_fixed.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..147982c7ade7f78d210ec80d1cdd8010c7b0c1b1 Binary files /dev/null and b/model_selection/output_cv_metrics/metrics_fixed.xlsx differ