diff --git a/connection_parameters.R b/connection_parameters.R index deb8fa63d5b2cb2c25f0561cd4be3a525de0dfb9..b69224f93780a86e26193338244551cca426e2fb 100755 --- a/connection_parameters.R +++ b/connection_parameters.R @@ -71,7 +71,7 @@ project_names <- c( resource_names <- c( "Harmonized_variables_2", "CIPH_numeric_derivated", - "20220719_HarmonisedUMFIasi", + "20220919_UMFIasi", "SMUC_resource", "20220919_FiHM", "Resource_derived", diff --git a/valid_variables_script2.R b/valid_variables_script2.R index 6eb231cdc5051da908e83e14bce008c055d49753..22d82e2d7700463b53593feb513395864c5cbd02 100644 --- a/valid_variables_script2.R +++ b/valid_variables_script2.R @@ -126,39 +126,53 @@ is_number <- function(x){ } -check_values_format <- function(valid_columns){ + +# A esta funcion la llamamos unicamente con las columnas que el sabemos que el nombre es correcto +# Usa codebook param. Si algún cambia el codebook agradeceremos esto. +check_values_format <- function(valid_columns, codebook_param){ res <- "" for(i in 1:length(valid_columns[[1]])){ print(i) + current_column <- valid_columns[[1]][[i]] - if(current_column %in% codebook_labo$Harmonised.variable.name){ - index <- which(current_column, codebook_labo$Harmonised.variable.name) - value_format <- strsplit(codebook_si_sympt$Possible.values.format[2], " / ")[[1]] + variable_type <- codebook_param$Variable.type[codebook$Harmonised.variable.name == current_column] + + if(variable_type == "Continuous"){ + + ################## ESTO PODRÍA IR EN UNA FUNC DIFERENTE ############# + + ### parse del formato de una variable continua ## + ## esta sentencia funciona codebook$Possible.values.format[codebook$Harmonised.variable.name == "CMXDE"] pruebala en el interprete. + value_format <- strsplit(codebook_param$Possible.values.format[codebook$Harmonised.variable.name == current_column], " / ")[[1]] high_limit <- as.numeric(sub("-.*", "", value_format[1])) low_limit <- as.numeric(sub(".*-", "", value_format[1])) - if(codebook_labo$Variable.type[index] == "Continuous"){ - ds.dataFrameSubset(df.name = "data", - V1.name = paste("data$", current_column, sep=""), - V2.name = high_limit, - Boolean.operator = "<=", - newobj = "inRangeHigh", - keep.NAs = TRUE, - datasources = connections) - - - ds.dataFrameSubset(df.name = "inRangeHigh", - V1.name = paste("inRangeHigh$", current_column, sep=""), - V2.name = low_limit, - Boolean.operator = ">=", - newobj = "inRange", - keep.NAs = TRUE, - datasources = connections) - - summary <- ds.summary(paste("inRange$", current_column, sep="")) - if(ds.length(paste("data$", current_column, sep="")) > summary[[1]][[2]]){ - res <- c(res, paste(current_column, "does not follow the established format" , sep="\n")) - } + ### parse del formato de una variable continua ## + + ds.dataFrameSubset(df.name = "data", + V1.name = paste("data$", current_column, sep=""), + V2.name = high_limit, + Boolean.operator = "<=", + newobj = "inRangeHigh", + keep.NAs = TRUE, + datasources = connections) + + + ds.dataFrameSubset(df.name = "inRangeHigh", + V1.name = paste("inRangeHigh$", current_column, sep=""), + V2.name = low_limit, + Boolean.operator = ">=", + newobj = "inRange", + keep.NAs = TRUE, + datasources = connections) + + summary <- ds.summary(paste("inRange$", current_column, sep="")) + if(ds.length(paste("data$", current_column, sep="")) > summary[[1]][[2]]){ + res <- c(res, paste(current_column, "does not follow the established format" , sep="\n")) } + ################## FIN ESTO PODRÍA IR EN UNA FUNC DIFERENTE ############# + + }else if (variable_type == "Binary"){ + } } return (res) @@ -211,8 +225,8 @@ data_colnames <- as.data.frame(data_colnames) check_valid_columns <- check_column_names(codebook ,data_colnames) valid_columns <- as.data.frame(check_valid_columns$colnames) res <- "" -res <- check_values_format(valid_columns) +res <- check_values_format(valid_columns, codebook) print(res) - +