Commit 12a192eb authored by GNajeral's avatar GNajeral

valid variables script local, values not duplicated

parent bae733c0
rm(list=ls())
dir_name <- readline("Introduce the name of the directory please: ")
# C:\Users\guill\Documents\harmonize_scripts
setwd(dir_name)
......@@ -98,94 +99,201 @@ check_valid_name <- function(col_name){
}
check_valid_values_continuous <- function(colname , codebook_param , column){
# check_valid_values_continuous <- function(colname , codebook_param , column){
#
# column <- column[column != "."]
# possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
# possible_values_list = str_split(possible_values_format , "/")[[1]]
#
# range_as_str <- str_trim(possible_values_list[1])
# missing_value_format <- str_trim(str_trim(possible_values_list[2]))
#
# separate_range <- str_split(range_as_str , "-")[[1]]
# min_value <- strtoi(separate_range[1])
# max_value <- strtoi(separate_range[2])
#
# failing_values <- column[column < min_value | column > max_value]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# str_res <- ""
# if (number_of_failing_values == 0)
# str_res <- "No failing values"
# else{
# failing_values <- failing_values[!is.na(failing_values)]
# failing_value_counts <- table(failing_values)
#
# str_res <- paste(colname, "has failing values:")
#
# for (i in seq_along(failing_value_counts)) {
# value <- names(failing_value_counts)[i]
# count <- failing_value_counts[i]
# str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ")
# }
#
# str_res <- paste(str_res, "should be in range", range_as_str, "(continuous)", sep = " ")
# }
#
# return(str_res)
# }
#
#
#
# check_valid_values_categorical <- function(colname, codebook_param, column) {
# column <- column[column != "."]
# possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
# possible_values_list <- str_split(possible_values_format, "/")[[1]]
#
# possible_values_list <- lapply(possible_values_list, str_trim)
#
# str_res <- ""
# min_value <- 0
# max_value <- 0
# if (length(possible_values_list[[1]]) == 2) {
# separate_range <- str_split(possible_values_list[[1]][1], "-")[[1]]
# min_value <- strtoi(separate_range[1])
# max_value <- strtoi(separate_range[2])
# } else {
# possible_values_list <- lapply(possible_values_list, strtoi)[[1]]
# min_value <- possible_values_list[1]
# max_value <- possible_values_list[length(possible_values_list) - 1]
# }
#
# failing_values <- column[column < min_value | column > max_value]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# if (number_of_failing_values == 0) {
# str_res <- "No failing values"
# } else {
# range_as_str <- paste(min_value, "-", max_value, " (categorical)")
# failing_values <- failing_values[!is.na(failing_values)]
# failing_value_counts <- table(failing_values)
#
# str_res <- paste(colname, "has failing values:")
#
# for (i in seq_along(failing_value_counts)) {
# value <- names(failing_value_counts)[i]
# count <- failing_value_counts[i]
# str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ")
# }
#
# str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
# }
#
# return(str_res)
# }
#
# check_valid_values_binary <- function(colname, column) {
# column <- column[column != "."]
# failing_values <- column[column < 0 | column > 1]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# str_res <- ""
# if (number_of_failing_values == 0)
# str_res <- "No failing values"
# else {
# range_as_str <- "0-1 (binary)"
# failing_values <- failing_values[!is.na(failing_values)]
# failing_value_counts <- table(failing_values)
#
# str_res <- paste(colname, "has failing values:")
#
# for (i in seq_along(failing_value_counts)) {
# value <- names(failing_value_counts)[i]
# count <- failing_value_counts[i]
# str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ")
# }
#
# str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
# }
#
# return(str_res)
# }
check_valid_values_continuous <- function(colname, codebook_param, column) {
column <- column[column != "."]
possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
possible_values_list = str_split(possible_values_format , "/")[[1]]
possible_values_list = str_split(possible_values_format, "/")[[1]]
# Fallará cuando el codebook no tenga min-max / .
range_as_str <- str_trim(possible_values_list[1])
missing_value_format <- str_trim(str_trim(possible_values_list[2]))
separate_range <- str_split(range_as_str , "-")[[1]]
min_value <- strtoi(separate_range[1])
max_value <- strtoi(separate_range[2])
separate_range <- str_split(range_as_str, "-")[[1]]
min_value <- strtoi(separate_range[1])
max_value <- strtoi(separate_range[2])
failing_values <- column[column < min_value | column > max_value]
number_of_failing_values <- length(failing_values[!is.na(failing_values)])
str_res <- ""
if (number_of_failing_values == 0)
if (number_of_failing_values == 0) {
str_res <- "No failing values"
else{
failing_values <- failing_values[!is.na(failing_values)]
str_res <- paste( colname , paste(unlist(failing_values) , collapse =" "))
#str_res <- paste(colname , collapse =" ")
str_res <- paste(str_res , "should be in range" , range_as_str, "(continuous)", sep = " ")
}
} else {
range_as_str <- paste(min_value, "-", max_value, "(continuous)")
return(str_res)
}
check_valid_values_binary <- function(colname , column){
column <- column[column != "."]
failing_values <- column[column < 0 | column > 1]
number_of_failing_values <- length(failing_values[!is.na(failing_values)])
str_res <- ""
if (number_of_failing_values == 0)
str_res <- "No failing values"
else{
range_as_str <- "0-1 (binary)"
failing_values <- failing_values[!is.na(failing_values)]
str_res <- paste(colname , paste(unlist(failing_values) , collapse =" "))
#str_res <- paste(colname , collapse =" ")
str_res <- paste(str_res , "should be in range" , range_as_str, sep = " ")
str_res <- paste(colname, "has", number_of_failing_values, "failing values")
str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
}
return(str_res)
}
check_valid_values_categorical <- function(colname , codebook_param , column){
check_valid_values_categorical <- function(colname, codebook_param, column) {
column <- column[column != "."]
possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
possible_values_list <- str_split(possible_values_format , "/")[[1]]
possible_values_list <- str_split(possible_values_format, "/")[[1]]
possible_values_list <- lapply(possible_values_list , str_trim)
possible_values_list <- lapply(possible_values_list, str_trim)
str_res <- ""
min_value <- 0
max_value <- 0
if (length(possible_values_list[[1]]) == 2){
separate_range <- str_split(possible_values_list[[1]][1], "-")[[1]]
min_value <- strtoi(separate_range[1])
max_value <- strtoi(separate_range[2])
}else{
possible_values_list <- lapply(possible_values_list , strtoi)[[1]]
max_value <- 0
if (length(possible_values_list[[1]]) == 2) {
separate_range <- str_split(possible_values_list[[1]][1], "-")[[1]]
min_value <- strtoi(separate_range[1])
max_value <- strtoi(separate_range[2])
} else {
possible_values_list <- lapply(possible_values_list, strtoi)[[1]]
min_value <- possible_values_list[1]
max_value <- possible_values_list[length(possible_values_list) - 1]
}
failing_values <- column[column < min_value | column > max_value ]
failing_values <- column[column < min_value | column > max_value]
number_of_failing_values <- length(failing_values[!is.na(failing_values)])
if(number_of_failing_values == 0){
if (number_of_failing_values == 0) {
str_res <- "No failing values"
}else{
range_as_str <- paste(min_value , "-" , max_value , " (categorical)")
failing_values <- failing_values[!is.na(failing_values)]
#str_res <- paste(colname , paste(unlist(failing_values) , collapse =" "))
str_res <- paste(colname , collapse =" ")
str_res <- paste(str_res , "should be in range" , range_as_str, sep = " ")
} else {
range_as_str <- paste(min_value, "-", max_value, " (categorical)")
str_res <- paste(colname, "has", number_of_failing_values, "failing values")
str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
}
return(str_res)
}
check_valid_values_binary <- function(colname, column) {
column <- column[column != "."]
failing_values <- column[column < 0 | column > 1]
number_of_failing_values <- length(failing_values[!is.na(failing_values)])
str_res <- ""
if (number_of_failing_values == 0)
str_res <- "No failing values"
else {
range_as_str <- "0-1 (binary)"
str_res <- paste(colname, "has", number_of_failing_values, "failing values")
str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
}
return(str_res)
}
check_valid_values <- function(valid_colnames, codebook_param){
res <- ""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment