# -*- coding: utf-8 -*- """ Created on Wed Nov 10 12:41:12 2021 @author: ctb """ import datetime import pandas as pd import sys import math datafile_path = sys.argv[1] csv_separator = "," if len(sys.argv) == 3: csv_separator = sys.argv[2] datafile = pd.read_csv(datafile_path, csv_separator) age_ranges = list(range(30, 100, 10)) if "DMRAGEYR" in datafile.columns: derived_age_range = [] for x in datafile["DMRAGEYR"]: age_range = -1 for i, mark in enumerate(age_ranges): if x > mark: age_range= str(mark) + "-" + str(age_ranges[i+1] - 1) break if age_range == -1: age_range = "18-29" derived_age_range.append(age_range) datafile["AGE_RANGE"] = derived_age_range if "DATDS" in datafile.columns: derived_outcome_month = [] for x in datafile["DATDS"]: if str(x) == 'nan': #math.isnan(x): derived_outcome_month.append(None) else: month_num = x.split("/") if len(month_num) == 3: month_num = month_num[1] datetime_object = datetime.datetime.strptime(month_num, "%m") full_month_name = datetime_object.strftime("%B") derived_outcome_month.append(full_month_name) else: derived_outcome_month.append(None) datafile["DISCHARGE_DATE"] = derived_outcome_month if "DATAD" in datafile.columns: derived_outcome_month = [] for x in datafile["DATAD"]: if str(x) == 'nan': # math.isnan(x): derived_outcome_month.append(None) else: month_num = x.split("/") if len(month_num) == 3: month_num = month_num[1] datetime_object = datetime.datetime.strptime(month_num, "%m") full_month_name = datetime_object.strftime("%B") derived_outcome_month.append(full_month_name) else: derived_outcome_month.append(None) datafile["ADMISSION_DATE"] = derived_outcome_month new_datafile_path = datafile_path.replace(".csv", "_derived.csv") datafile.to_csv(new_datafile_path, index = False)