diff --git a/valid_variables_script2.R b/valid_variables_script2.R index c13a9247a86b1e7703e1f7f252a5d3f35cc1a43d..22771bc38852b7368fdbe4127ec1ec76300e3648 100644 --- a/valid_variables_script2.R +++ b/valid_variables_script2.R @@ -131,51 +131,66 @@ is_number <- function(x){ # Usa codebook param. Si algún cambia el codebook agradeceremos esto. check_values_format <- function(valid_columns, codebook_param){ res <- "" + variables_out_of_range = "Variables out of range:" for(i in 1:length(valid_columns[[1]])){ - print(i) - + current_column <- valid_columns[[1]][[i]] + print(current_column) variable_type <- codebook_param$Variable.type[codebook$Harmonised.variable.name == current_column] - if(variable_type == "Continuous"){ + if(!is.na(variable_type) && variable_type == "Continuous"){ ################## ESTO PODRÍA IR EN UNA FUNC DIFERENTE ############# ### parse del formato de una variable continua ## ## esta sentencia funciona codebook$Possible.values.format[codebook$Harmonised.variable.name == "CMXDE"] pruebala en el interprete. value_format <- strsplit(codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == current_column], " / ")[[1]] - high_limit <- as.numeric(sub("-.*", "", value_format[1])) - low_limit <- as.numeric(sub(".*-", "", value_format[1])) + high_limit <- gsub(",", ".", (sub("-.*", "", value_format[1]))) + low_limit <- gsub(",", ".", (sub(".*-", "", value_format[1]))) ### parse del formato de una variable continua ## - ds.dataFrameSubset(df.name = "data", - V1.name = paste("data$", current_column, sep=""), - V2.name = high_limit, - Boolean.operator = "<=", - newobj = "inRangeHigh", - keep.NAs = TRUE, - datasources = connections) - - - ds.dataFrameSubset(df.name = "inRangeHigh", - V1.name = paste("inRangeHigh$", current_column, sep=""), - V2.name = low_limit, - Boolean.operator = ">=", - newobj = "inRange", - keep.NAs = TRUE, - datasources = connections) - - summary <- ds.summary(paste("inRange$", current_column, sep="")) - if(ds.length(paste("data$", current_column, sep="")) > summary[[1]][[2]]){ - res <- c(res, paste(current_column, "does not follow the established format" , sep="\n")) - } + tryCatch( + error = function(cnd) { + if(grepl("list them with datashield.errors()",cnd)) + error <- paste("Unable to analyse data" , datashield.errors() , sep = " ") + else + error <- paste("Unable to analyse data" , cnd, sep = " ") + print(error) + res <- c(res, error) + }, + { + ds.dataFrameSubset(df.name = "data", + V1.name = paste("data$", current_column, sep=""), + V2.name = high_limit, + Boolean.operator = "<=", + newobj = "inRangeHigh", + keep.NAs = TRUE, + datasources = connections) + + + ds.dataFrameSubset(df.name = "inRangeHigh", + V1.name = paste("inRangeHigh$", current_column, sep=""), + V2.name = low_limit, + Boolean.operator = ">=", + newobj = "inRange", + keep.NAs = TRUE, + datasources = connections) + + summary <- ds.summary(paste("inRange$", current_column, sep="")) + if(ds.length(paste("data$", current_column, sep=""))[[1]] > summary[[1]][[2]]){ + variables_out_of_range <- paste(variables_out_of_range, current_column, sep = " ") + print(paste(current_column, "does not follow the established format", sep=" ")) + } + } + ) ################## FIN ESTO PODRÍA IR EN UNA FUNC DIFERENTE ############# - }else if (variable_type == "Binary"){ - + }else if (!is.na(variable_type) && (variable_type == "Categorical" || variable_type == "Binary")){ + contingency_table <- ds.table(paste("data$",current_column)) + counts <- contingency_table[[1]][[3]] } } - return (res) + return (variables_out_of_range) } @@ -186,32 +201,30 @@ inp <- auxConnections[[2]] #Conexión a la base de datos -ds.dim("data", datasources = connections) -colnames <- ds.colnames("data") -colnames - -# ds.dataFrameSubset(df.name = "data", -# V1.name = "data$LBXAPTTA", -# V2.name = "130", -# Boolean.operator = "<=", -# newobj = "inRangeHigh", -# keep.NAs = TRUE, -# datasources = connections) -# -# lengthHigh <- ds.length(x='inRangeHigh$LBXAPTTA', datasources = connections) -# -# -# ds.dataFrameSubset(df.name = "inRangeHigh", -# V1.name = "inRangeHigh$LBXAPTTA", -# V2.name = "11", -# Boolean.operator = ">=", -# newobj = "inRange", -# keep.NAs = TRUE, -# datasources = connections) -# -# lengthBuenos <- ds.length(x='inRange$LBXAPTTA', datasources = connections) -# -# summary <- ds.summary("inRange$LBXAPTTA") + # ds.dim("data", datasources = connections) + # colnames <- ds.colnames("data") + # colnames + # + # ds.dataFrameSubset(df.name = "data", + # V1.name = "data$DMXBMI", + # V2.name = "130", + # Boolean.operator = "<=", + # newobj = "inRangeHigh", + # keep.NAs = TRUE, + # datasources = connections) + # + # ds.dataFrameSubset(df.name = "inRangeHigh", + # V1.name = "inRangeHigh$DMXBMI", + # V2.name = "11", + # Boolean.operator = ">=", + # newobj = "inRange", + # keep.NAs = TRUE, + # datasources = connections) + # + # summary <- ds.summary("inRange$DMXBMI") + # if(ds.length("data$DMXBMI")[[1]] > summary[[1]][[2]]){ + # res <- c(res, paste(current_column, "does not follow the established format" , sep="\n")) + # } #---------------------------------------------------------------------------- @@ -228,5 +241,5 @@ res <- "" res <- check_values_format(valid_columns, codebook) print(res) - +datashield.logout(connections)