setwd("C:/Users/victor/Documents/TFG/r-analytics-master") codebook <- read.csv("new_harmon.csv", sep = ",") binary <- unlist(codebook[codebook["Variable.type"] == "Binary",]["Harmonised.variable.name"]) categorical <- unlist(codebook[codebook["Variable.type"] == "Categorical",]["Harmonised.variable.name"]) continuous <- unlist(codebook[codebook["Variable.type"] == "Continuous",]["Harmonised.variable.name"]) categoric_vars <- c(binary, categorical) replace_with_Missing_categoric <- function(column){ for (i in 1:length(column)){ x <- column[i] if (is.na(x)){ x <- "Missing" }else{ if(x == "" | x == "NA" | x == "Unknown") x <- "Missing" } column[i] <- x } return (column) } replace_with_Missing_num_categoric <- function(column){ for (i in 1:length(column)){ x <- column[i] if (is.na(x)){ x <- 9999 }else{ if(x == "" | x == "NA" | x == "Unknown") x <- 9999 } column[i] <- x } return (column) } replace_with_Missing_continuous <- function(column){ for (i in 1:length(column)){ x <- column[i] if (is.na(x)){ x <- "" }else{ if(x == "NA" | x == "Unknown") x <- "" } column[i] <- x } return (column) } add_missing_values <- function(path_to_file){ setwd("C:/Users/victor/Documents/TFG/r-analytics-master/ressources/current_db") data <- read.csv(path_to_file, sep = ",") data_colnames <- colnames(data) num_categoric <- data_colnames[grepl("_numeric", data_colnames)] aux <- length(data_colnames) for (i in 1:aux){ colname <- data_colnames[i] progress <- round((100*i/aux),digits = 0) print(paste(progress,"%", sep = "")) if(colname %in% categoric_vars){ column <- unlist(data[colname]) data[colname] <- replace_with_Missing_categoric(column) } if(colname %in% num_categoric){ column <- unlist(data[colname]) data[colname] <- replace_with_Missing_num_categoric(column) } if(colname %in% continuous){ column <- unlist(data[colname]) data[colname] <- replace_with_Missing_continuous(column) } } return(data) }