Commit 3c445cc9 authored by Pepe Márquez Romero's avatar Pepe Márquez Romero

haciendo el codigo un poco mas legible y estructurando para hacer posibles...

haciendo el codigo un poco mas legible y estructurando para hacer posibles funciones de analisis segun el tipo de variable que sea
parent 83078851
......@@ -71,7 +71,7 @@ project_names <- c(
resource_names <- c(
"Harmonized_variables_2",
"CIPH_numeric_derivated",
"20220719_HarmonisedUMFIasi",
"20220919_UMFIasi",
"SMUC_resource",
"20220919_FiHM",
"Resource_derived",
......
......@@ -126,39 +126,53 @@ is_number <- function(x){
}
check_values_format <- function(valid_columns){
# A esta funcion la llamamos unicamente con las columnas que el sabemos que el nombre es correcto
# Usa codebook param. Si algún cambia el codebook agradeceremos esto.
check_values_format <- function(valid_columns, codebook_param){
res <- ""
for(i in 1:length(valid_columns[[1]])){
print(i)
current_column <- valid_columns[[1]][[i]]
if(current_column %in% codebook_labo$Harmonised.variable.name){
index <- which(current_column, codebook_labo$Harmonised.variable.name)
value_format <- strsplit(codebook_si_sympt$Possible.values.format[2], " / ")[[1]]
variable_type <- codebook_param$Variable.type[codebook$Harmonised.variable.name == current_column]
if(variable_type == "Continuous"){
################## ESTO PODRÍA IR EN UNA FUNC DIFERENTE #############
### parse del formato de una variable continua ##
## esta sentencia funciona codebook$Possible.values.format[codebook$Harmonised.variable.name == "CMXDE"] pruebala en el interprete.
value_format <- strsplit(codebook_param$Possible.values.format[codebook$Harmonised.variable.name == current_column], " / ")[[1]]
high_limit <- as.numeric(sub("-.*", "", value_format[1]))
low_limit <- as.numeric(sub(".*-", "", value_format[1]))
if(codebook_labo$Variable.type[index] == "Continuous"){
ds.dataFrameSubset(df.name = "data",
V1.name = paste("data$", current_column, sep=""),
V2.name = high_limit,
Boolean.operator = "<=",
newobj = "inRangeHigh",
keep.NAs = TRUE,
datasources = connections)
ds.dataFrameSubset(df.name = "inRangeHigh",
V1.name = paste("inRangeHigh$", current_column, sep=""),
V2.name = low_limit,
Boolean.operator = ">=",
newobj = "inRange",
keep.NAs = TRUE,
datasources = connections)
summary <- ds.summary(paste("inRange$", current_column, sep=""))
if(ds.length(paste("data$", current_column, sep="")) > summary[[1]][[2]]){
res <- c(res, paste(current_column, "does not follow the established format" , sep="\n"))
}
### parse del formato de una variable continua ##
ds.dataFrameSubset(df.name = "data",
V1.name = paste("data$", current_column, sep=""),
V2.name = high_limit,
Boolean.operator = "<=",
newobj = "inRangeHigh",
keep.NAs = TRUE,
datasources = connections)
ds.dataFrameSubset(df.name = "inRangeHigh",
V1.name = paste("inRangeHigh$", current_column, sep=""),
V2.name = low_limit,
Boolean.operator = ">=",
newobj = "inRange",
keep.NAs = TRUE,
datasources = connections)
summary <- ds.summary(paste("inRange$", current_column, sep=""))
if(ds.length(paste("data$", current_column, sep="")) > summary[[1]][[2]]){
res <- c(res, paste(current_column, "does not follow the established format" , sep="\n"))
}
################## FIN ESTO PODRÍA IR EN UNA FUNC DIFERENTE #############
}else if (variable_type == "Binary"){
}
}
return (res)
......@@ -211,8 +225,8 @@ data_colnames <- as.data.frame(data_colnames)
check_valid_columns <- check_column_names(codebook ,data_colnames)
valid_columns <- as.data.frame(check_valid_columns$colnames)
res <- ""
res <- check_values_format(valid_columns)
res <- check_values_format(valid_columns, codebook)
print(res)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment