Commit aa9e9a77 authored by GNajeral's avatar GNajeral

Updated code for Continous range study and started Binary and Categorical one

parent ea67a196
......@@ -131,51 +131,66 @@ is_number <- function(x){
# Usa codebook param. Si algún cambia el codebook agradeceremos esto.
check_values_format <- function(valid_columns, codebook_param){
res <- ""
variables_out_of_range = "Variables out of range:"
for(i in 1:length(valid_columns[[1]])){
print(i)
current_column <- valid_columns[[1]][[i]]
print(current_column)
variable_type <- codebook_param$Variable.type[codebook$Harmonised.variable.name == current_column]
if(variable_type == "Continuous"){
if(!is.na(variable_type) && variable_type == "Continuous"){
################## ESTO PODRÍA IR EN UNA FUNC DIFERENTE #############
### parse del formato de una variable continua ##
## esta sentencia funciona codebook$Possible.values.format[codebook$Harmonised.variable.name == "CMXDE"] pruebala en el interprete.
value_format <- strsplit(codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == current_column], " / ")[[1]]
high_limit <- as.numeric(sub("-.*", "", value_format[1]))
low_limit <- as.numeric(sub(".*-", "", value_format[1]))
high_limit <- gsub(",", ".", (sub("-.*", "", value_format[1])))
low_limit <- gsub(",", ".", (sub(".*-", "", value_format[1])))
### parse del formato de una variable continua ##
ds.dataFrameSubset(df.name = "data",
V1.name = paste("data$", current_column, sep=""),
V2.name = high_limit,
Boolean.operator = "<=",
newobj = "inRangeHigh",
keep.NAs = TRUE,
datasources = connections)
ds.dataFrameSubset(df.name = "inRangeHigh",
V1.name = paste("inRangeHigh$", current_column, sep=""),
V2.name = low_limit,
Boolean.operator = ">=",
newobj = "inRange",
keep.NAs = TRUE,
datasources = connections)
summary <- ds.summary(paste("inRange$", current_column, sep=""))
if(ds.length(paste("data$", current_column, sep="")) > summary[[1]][[2]]){
res <- c(res, paste(current_column, "does not follow the established format" , sep="\n"))
}
tryCatch(
error = function(cnd) {
if(grepl("list them with datashield.errors()",cnd))
error <- paste("Unable to analyse data" , datashield.errors() , sep = " ")
else
error <- paste("Unable to analyse data" , cnd, sep = " ")
print(error)
res <- c(res, error)
},
{
ds.dataFrameSubset(df.name = "data",
V1.name = paste("data$", current_column, sep=""),
V2.name = high_limit,
Boolean.operator = "<=",
newobj = "inRangeHigh",
keep.NAs = TRUE,
datasources = connections)
ds.dataFrameSubset(df.name = "inRangeHigh",
V1.name = paste("inRangeHigh$", current_column, sep=""),
V2.name = low_limit,
Boolean.operator = ">=",
newobj = "inRange",
keep.NAs = TRUE,
datasources = connections)
summary <- ds.summary(paste("inRange$", current_column, sep=""))
if(ds.length(paste("data$", current_column, sep=""))[[1]] > summary[[1]][[2]]){
variables_out_of_range <- paste(variables_out_of_range, current_column, sep = " ")
print(paste(current_column, "does not follow the established format", sep=" "))
}
}
)
################## FIN ESTO PODRÍA IR EN UNA FUNC DIFERENTE #############
}else if (variable_type == "Binary"){
}else if (!is.na(variable_type) && (variable_type == "Categorical" || variable_type == "Binary")){
contingency_table <- ds.table(paste("data$",current_column))
counts <- contingency_table[[1]][[3]]
}
}
return (res)
return (variables_out_of_range)
}
......@@ -186,32 +201,30 @@ inp <- auxConnections[[2]]
#Conexión a la base de datos
ds.dim("data", datasources = connections)
colnames <- ds.colnames("data")
colnames
# ds.dataFrameSubset(df.name = "data",
# V1.name = "data$LBXAPTTA",
# V2.name = "130",
# Boolean.operator = "<=",
# newobj = "inRangeHigh",
# keep.NAs = TRUE,
# datasources = connections)
#
# lengthHigh <- ds.length(x='inRangeHigh$LBXAPTTA', datasources = connections)
#
#
# ds.dataFrameSubset(df.name = "inRangeHigh",
# V1.name = "inRangeHigh$LBXAPTTA",
# V2.name = "11",
# Boolean.operator = ">=",
# newobj = "inRange",
# keep.NAs = TRUE,
# datasources = connections)
#
# lengthBuenos <- ds.length(x='inRange$LBXAPTTA', datasources = connections)
#
# summary <- ds.summary("inRange$LBXAPTTA")
# ds.dim("data", datasources = connections)
# colnames <- ds.colnames("data")
# colnames
#
# ds.dataFrameSubset(df.name = "data",
# V1.name = "data$DMXBMI",
# V2.name = "130",
# Boolean.operator = "<=",
# newobj = "inRangeHigh",
# keep.NAs = TRUE,
# datasources = connections)
#
# ds.dataFrameSubset(df.name = "inRangeHigh",
# V1.name = "inRangeHigh$DMXBMI",
# V2.name = "11",
# Boolean.operator = ">=",
# newobj = "inRange",
# keep.NAs = TRUE,
# datasources = connections)
#
# summary <- ds.summary("inRange$DMXBMI")
# if(ds.length("data$DMXBMI")[[1]] > summary[[1]][[2]]){
# res <- c(res, paste(current_column, "does not follow the established format" , sep="\n"))
# }
#----------------------------------------------------------------------------
......@@ -228,5 +241,5 @@ res <- ""
res <- check_values_format(valid_columns, codebook)
print(res)
datashield.logout(connections)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment