Commit e778774d authored by GNajeral's avatar GNajeral

updated local script

parent e270001d
......@@ -106,6 +106,9 @@ check_valid_name <- function(col_name){
missing_values_count <- length(column[column == "."])
total_values <- length(column)
missing_values_percentage <- round((missing_values_count/(total_values))*100, 2)
column <- as.numeric(column[column != "."])
string_values_count <- length(column[is.na(column)])
......@@ -127,9 +130,14 @@ check_valid_name <- function(col_name){
number_of_failing_values <- length(failing_values[!is.na(failing_values)]) + string_values_count
str_res <- ""
if (number_of_failing_values == 0)
str_res <- "No failing values"
else{
if (number_of_failing_values == 0){
if(missing_values_percentage > 90){
str_res <- paste(colname, "Missing values:", missing_values_count, "-", paste(round((missing_values_count/(total_values))*100, 2), "%", sep = ""), "of the total")
}
else{
str_res <- paste("No failing values")
}
}else{
failing_values <- failing_values[!is.na(failing_values)]
failing_value_counts <- table(failing_values)
......@@ -149,7 +157,9 @@ check_valid_name <- function(col_name){
str_res <- paste(str_res, "Values lie in the range:", paste("[", first, " - ",last, "],", sep = "" ))
str_res <- paste(str_res, "Wrong values constitute at least:", paste(round((number_of_failing_values/(total_values-missing_values_count))*100, 2), "% of the non-missing", sep = "" ))
str_res <- paste(str_res, "Missing:", paste(round((missing_values_count/(total_values))*100, 2), "%,", sep = "" ))
str_res <- paste(str_res, "Wrong values:", paste(round((number_of_failing_values/(total_values-missing_values_count))*100, 2), "% of the non-missing", sep = "" ))
......@@ -176,6 +186,9 @@ check_valid_name <- function(col_name){
missing_values_count <- length(column[column == "."])
total_values <- length(column)
missing_values_percentage <- round((missing_values_count/(total_values))*100, 2)
column <- as.numeric(column[column != "."])
string_values_count <- length(column[is.na(column)])
......@@ -201,8 +214,13 @@ check_valid_name <- function(col_name){
failing_values <- column[column < min_value | column > max_value]
number_of_failing_values <- length(failing_values[!is.na(failing_values)]) + string_values_count
if (number_of_failing_values == 0) {
str_res <- "No failing values"
if (number_of_failing_values == 0){
if(missing_values_percentage > 90){
str_res <- paste(colname, "Missing values:", missing_values_count, "-", paste(round((missing_values_count/(total_values))*100, 2), "%", sep = ""), "of the total")
}
else{
str_res <- paste("No failing values")
}
} else {
range_as_str <- paste(min_value, "-", max_value, " (categorical)")
failing_values <- failing_values[!is.na(failing_values)]
......@@ -224,6 +242,8 @@ check_valid_name <- function(col_name){
str_res <- paste(str_res, "Values lie in the range:", paste("[", first, " - ",last, "],", sep = "" ))
str_res <- paste(str_res, "Missing:", paste(round((missing_values_count/(total_values))*100, 2), "%,", sep = "" ))
str_res <- paste(str_res, "Wrong values constitute at least:", paste(round((number_of_failing_values/(total_values-missing_values_count))*100, 2), "% of the non-missing", sep = "" ))
......@@ -247,6 +267,8 @@ check_valid_name <- function(col_name){
total_values <- length(column)
missing_values_percentage <- round((missing_values_count/(total_values))*100, 2)
column <- as.numeric(column[column != "."])
string_values_count <- length(column[is.na(column)])
......@@ -254,8 +276,14 @@ check_valid_name <- function(col_name){
number_of_failing_values <- length(failing_values[!is.na(failing_values)]) + string_values_count
str_res <- ""
if (number_of_failing_values == 0)
str_res <- "No failing values"
if (number_of_failing_values == 0){
if(missing_values_percentage > 90){
str_res <- paste(colname, "Missing values:", missing_values_count, "-", paste(round((missing_values_count/(total_values))*100, 2), "%", sep = ""), "of the total")
}
else{
str_res <- paste("No failing values")
}
}
else {
range_as_str <- "0-1 (binary)"
failing_values <- failing_values[!is.na(failing_values)]
......@@ -269,7 +297,7 @@ check_valid_name <- function(col_name){
str_res <- paste(str_res, paste(missing_values_count, ",", sep = ""))
str_res <- paste(str_res, "should be in the range:", paste("0", "1", sep = "-"), "(categorical),", sep = " ")
str_res <- paste(str_res, "should be in the range:", paste("0", "1", sep = "-"), "(binary),", sep = " ")
first <- names(failing_value_counts)[1]
......@@ -277,7 +305,9 @@ check_valid_name <- function(col_name){
str_res <- paste(str_res, "Values lie in the range:", paste("[", first, " - ",last, "],", sep = "" ))
str_res <- paste(str_res, "Wrong values constitute at least:", paste(round((number_of_failing_values/(total_values-missing_values_count))*100, 2), "% of the non-missing", sep = "" ))
str_res <- paste(str_res, "Missing:", paste(round((missing_values_count/(total_values))*100, 2), "%,", sep = "" ))
str_res <- paste(str_res, "Wrong values:", paste(round((number_of_failing_values/(total_values-missing_values_count))*100, 2), "% of the non-missing", sep = "" ))
# str_res <- paste(colname, "has failing values:")
......@@ -294,97 +324,10 @@ check_valid_name <- function(col_name){
return(str_res)
}
# check_valid_values_continuous <- function(colname, codebook_param, column) {
#
# column <- as.numeric(column[column != "."])
# possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
#
# value_format <- strsplit(possible_values_format, " / ")[[1]]
# min_value <- str_trim(gsub(",", ".", (sub("-.*", "", value_format[1]))))
# max_value <- str_trim(gsub(",", ".", (sub(".*-", "", value_format[1]))))
# if(min_value == ""){
# min_value <- str_trim(sub(",.*", "", value_format[1]))
# max_value <- str_trim(sub(".*,", "", value_format[1]))
# }
# min_value <- as.double(min_value)
# max_value <- as.double(max_value)
# print(colname)
# print(min_value)
# print(max_value)
#
# failing_values <- column[column < min_value | column > max_value]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# str_res <- ""
# if (number_of_failing_values == 0) {
# str_res <- "No failing values"
# } else {
# range_as_str <- paste(min_value, "-", max_value, "(continuous)")
#
# str_res <- paste(colname, "has", number_of_failing_values, "failing values")
# str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
# }
#
# return(str_res)
# }
#
#
# check_valid_values_categorical <- function(colname, codebook_param, column) {
# column <- as.numeric(column[column != "."])
# possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
# possible_values_list <- str_split(possible_values_format, "/")[[1]]
#
# possible_values_list <- lapply(possible_values_list, str_trim)
#
# str_res <- ""
# min_value <- 0
# max_value <- 0
# if (length(possible_values_list[[1]]) == 2) {
# separate_range <- str_split(possible_values_list[[1]][1], "-")[[1]]
# min_value <- strtoi(separate_range[1])
# max_value <- strtoi(separate_range[2])
# } else {
# possible_values_list <- lapply(possible_values_list, strtoi)[[1]]
# min_value <- possible_values_list[1]
# max_value <- possible_values_list[length(possible_values_list) - 1]
# }
#
# failing_values <- column[column < min_value | column > max_value]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# if (number_of_failing_values == 0) {
# str_res <- "No failing values"
# } else {
# range_as_str <- paste(min_value, "-", max_value, " (categorical)")
#
# str_res <- paste(colname, "has", number_of_failing_values, "failing values")
# str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
# }
#
# return(str_res)
# }
#
# check_valid_values_binary <- function(colname, column) {
# column <- as.numeric(column[column != "."])
# failing_values <- column[column < 0 | column > 1]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# str_res <- ""
# if (number_of_failing_values == 0)
# str_res <- "No failing values"
# else {
# range_as_str <- "0-1 (binary)"
#
# str_res <- paste(colname, "has", number_of_failing_values, "failing values")
# str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
# }
#
# return(str_res)
# }
check_valid_values <- function(valid_colnames, codebook_param){
res <- ""
res <- paste("Total patients:", length(valid_colnames[,1]))
for(i in 1:(ncol(valid_colnames))){
name <- names(valid_colnames)[i]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment