Commit 3e986eb3 authored by GNajeral's avatar GNajeral

Update to local script to show the range in which failing variables lie

parent 2c02e92b
......@@ -16,7 +16,7 @@ source("necessary_functions_connection.R")
dep_list = c("jsonlite", "stringr","DSI","DSOpal","DSLite", "fields", "metafor", "ggplot2", "gridExtra", "data.table", "dsBaseClient", "openxlsx")
install_dependencies(dep_list)
codebook_file <- "20220315_Data Harmonisation.xlsb.xlsx"
codebook_file <- "20220315_Data_Harmonisation.xlsb.xlsx"
codebook_demo <- read.xlsx(codebook_file , sheet = 2 )
codebook_com_and_rf <- read.xlsx(codebook_file , sheet = 3 )
......
......@@ -2,6 +2,7 @@ rm(list=ls())
dir_name <- readline("Introduce the name of the directory please: ")
# C:\Users\guill\Documents\harmonize_scripts
# /Users/gnl/Documents/CTB UPM/UNCOVER/uncover_harmonization
setwd(dir_name)
......@@ -11,7 +12,7 @@ source("dependency_installer.R")
dep_list = c("jsonlite", "stringr","DSI","DSOpal","DSLite", "fields", "metafor", "ggplot2", "gridExtra", "data.table", "dsBaseClient", "openxlsx")
install_dependencies(dep_list)
codebook_file <- "20220315_Data Harmonisation.xlsb.xlsx"
codebook_file <- "20220315_Data_Harmonisation.xlsb.xlsx"
codebook_demo <- read.xlsx(codebook_file , sheet = 2 )
codebook_com_and_rf <- read.xlsx(codebook_file , sheet = 3 )
......@@ -101,11 +102,13 @@ check_valid_name <- function(col_name){
}
check_valid_values_continuous <- function(colname , codebook_param , column){
column <- as.numeric(column[column != "."])
possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
value_format <- strsplit(possible_values_format, " / ")[[1]]
min_value <- str_trim(gsub(",", ".", (sub("-.*", "", value_format[1]))))
max_value <- str_trim(gsub(",", ".", (sub(".*-", "", value_format[1]))))
if(min_value == ""){
......@@ -114,9 +117,6 @@ check_valid_name <- function(col_name){
}
min_value <- as.double(min_value)
max_value <- as.double(max_value)
print(colname)
print(min_value)
print(max_value)
failing_values <- column[column < min_value | column > max_value]
number_of_failing_values <- length(failing_values[!is.na(failing_values)])
......@@ -128,15 +128,36 @@ check_valid_name <- function(col_name){
failing_values <- failing_values[!is.na(failing_values)]
failing_value_counts <- table(failing_values)
str_res <- paste(colname, "has failing values:")
for (i in seq_along(failing_value_counts)) {
value <- names(failing_value_counts)[i]
count <- failing_value_counts[i]
str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ")
}
str_res <- paste(str_res, "should be in range", value_format, "(continuous)", sep = " ")
str_res <- paste(colname, "has wrong values:")
str_res <- paste(str_res, paste(number_of_failing_values, ",", sep = ""))
str_res <- paste(str_res, "should be in the range:", paste(min_value, max_value, sep = "-"), "(continuous),", sep = " ")
first <- names(failing_value_counts)[1]
last <- names(failing_value_counts)[length(failing_value_counts)]
print(colname)
print("first")
print(first)
print("last")
print(last)
str_res <- paste(str_res, "Values lie in the range:", paste("[", first, " - ",last, "]", sep = "" ))
# if(number_of_failing_values < 30) {
# # list_values <- paste( failing_values , collapse = " ")
# str_res <- paste(str_res, "Wrong values:")
#
# for (i in seq_along(failing_value_counts)) {
# value <- names(failing_value_counts)[i]
# count <- failing_value_counts[i]
# str_res <- paste(str_res, paste(value, paste("(", count, ")", sep = ""), collapse = " "), sep = " ")
# }
# }
}
return(str_res)
......@@ -324,6 +345,13 @@ check_valid_values <- function(valid_colnames, codebook_param){
next
}
if(is.na(codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == name])){
variable <- paste("Variable ", name, " with no value format", sep = " ")
res <- paste(res, variable , sep="\n")
next
}
result = switch(
column_type,
"Continuous"= check_valid_values_continuous(name , codebook_param , column),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment