derived_variables_generator.py 2.21 KB
Newer Older
Alberto Blázquez Herranz's avatar
Alberto Blázquez Herranz committed
1 2 3 4 5 6 7 8 9 10
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 10 12:41:12 2021

@author: ctb
"""

import datetime
import pandas as pd
import sys
11
import math
Alberto Blázquez Herranz's avatar
Alberto Blázquez Herranz committed
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43

datafile_path = sys.argv[1]
csv_separator = ","

if len(sys.argv) == 3:
    csv_separator = sys.argv[2]

datafile = pd.read_csv(datafile_path, csv_separator)


age_ranges = list(range(30, 100, 10))


if "DMRAGEYR" in datafile.columns:
    
    derived_age_range = []
    for x in datafile["DMRAGEYR"]:
        age_range = -1
        for i, mark in enumerate(age_ranges):
            if x > mark:
                age_range= str(mark) + "-" + str(age_ranges[i+1] - 1)
                break
        
        if age_range == -1:
            age_range = "18-29"
            
        derived_age_range.append(age_range)
    
    datafile["AGE_RANGE"] = derived_age_range
        
    

44
if "DATDS" in datafile.columns:
Alberto Blázquez Herranz's avatar
Alberto Blázquez Herranz committed
45 46
    
    derived_outcome_month = []
47
    for x in datafile["DATDS"]:
48 49 50 51 52 53 54 55 56 57 58 59
        if str(x) == 'nan': #math.isnan(x):
            derived_outcome_month.append(None)
        else:
            month_num = x.split("/")
            if len(month_num) == 3:
                month_num = month_num[1]
                datetime_object = datetime.datetime.strptime(month_num, "%m")
                full_month_name = datetime_object.strftime("%B")

                derived_outcome_month.append(full_month_name)
            else:
                derived_outcome_month.append(None)
Alberto Blázquez Herranz's avatar
Alberto Blázquez Herranz committed
60
    
61
    datafile["DISCHARGE_DATE"] = derived_outcome_month
Alberto Blázquez Herranz's avatar
Alberto Blázquez Herranz committed
62 63


64 65 66 67 68

if "DATAD" in datafile.columns:
    
    derived_outcome_month = []
    for x in datafile["DATAD"]:
69 70 71 72 73 74 75 76 77 78 79 80
        if str(x) == 'nan':  # math.isnan(x):
            derived_outcome_month.append(None)
        else:
            month_num = x.split("/")
            if len(month_num) == 3:
                month_num = month_num[1]
                datetime_object = datetime.datetime.strptime(month_num, "%m")
                full_month_name = datetime_object.strftime("%B")

                derived_outcome_month.append(full_month_name)
            else:
                derived_outcome_month.append(None)
81 82 83 84
    
    datafile["ADMISSION_DATE"] = derived_outcome_month

    
Alberto Blázquez Herranz's avatar
Alberto Blázquez Herranz committed
85 86 87
    
new_datafile_path = datafile_path.replace(".csv", "_derived.csv")
datafile.to_csv(new_datafile_path, index = False)