Commit 3c445cc9 authored by Pepe Márquez Romero's avatar Pepe Márquez Romero

haciendo el codigo un poco mas legible y estructurando para hacer posibles...

haciendo el codigo un poco mas legible y estructurando para hacer posibles funciones de analisis segun el tipo de variable que sea
parent 83078851
...@@ -71,7 +71,7 @@ project_names <- c( ...@@ -71,7 +71,7 @@ project_names <- c(
resource_names <- c( resource_names <- c(
"Harmonized_variables_2", "Harmonized_variables_2",
"CIPH_numeric_derivated", "CIPH_numeric_derivated",
"20220719_HarmonisedUMFIasi", "20220919_UMFIasi",
"SMUC_resource", "SMUC_resource",
"20220919_FiHM", "20220919_FiHM",
"Resource_derived", "Resource_derived",
......
...@@ -126,39 +126,53 @@ is_number <- function(x){ ...@@ -126,39 +126,53 @@ is_number <- function(x){
} }
check_values_format <- function(valid_columns){
# A esta funcion la llamamos unicamente con las columnas que el sabemos que el nombre es correcto
# Usa codebook param. Si algún cambia el codebook agradeceremos esto.
check_values_format <- function(valid_columns, codebook_param){
res <- "" res <- ""
for(i in 1:length(valid_columns[[1]])){ for(i in 1:length(valid_columns[[1]])){
print(i) print(i)
current_column <- valid_columns[[1]][[i]] current_column <- valid_columns[[1]][[i]]
if(current_column %in% codebook_labo$Harmonised.variable.name){ variable_type <- codebook_param$Variable.type[codebook$Harmonised.variable.name == current_column]
index <- which(current_column, codebook_labo$Harmonised.variable.name)
value_format <- strsplit(codebook_si_sympt$Possible.values.format[2], " / ")[[1]] if(variable_type == "Continuous"){
################## ESTO PODRÍA IR EN UNA FUNC DIFERENTE #############
### parse del formato de una variable continua ##
## esta sentencia funciona codebook$Possible.values.format[codebook$Harmonised.variable.name == "CMXDE"] pruebala en el interprete.
value_format <- strsplit(codebook_param$Possible.values.format[codebook$Harmonised.variable.name == current_column], " / ")[[1]]
high_limit <- as.numeric(sub("-.*", "", value_format[1])) high_limit <- as.numeric(sub("-.*", "", value_format[1]))
low_limit <- as.numeric(sub(".*-", "", value_format[1])) low_limit <- as.numeric(sub(".*-", "", value_format[1]))
if(codebook_labo$Variable.type[index] == "Continuous"){ ### parse del formato de una variable continua ##
ds.dataFrameSubset(df.name = "data",
V1.name = paste("data$", current_column, sep=""), ds.dataFrameSubset(df.name = "data",
V2.name = high_limit, V1.name = paste("data$", current_column, sep=""),
Boolean.operator = "<=", V2.name = high_limit,
newobj = "inRangeHigh", Boolean.operator = "<=",
keep.NAs = TRUE, newobj = "inRangeHigh",
datasources = connections) keep.NAs = TRUE,
datasources = connections)
ds.dataFrameSubset(df.name = "inRangeHigh",
V1.name = paste("inRangeHigh$", current_column, sep=""), ds.dataFrameSubset(df.name = "inRangeHigh",
V2.name = low_limit, V1.name = paste("inRangeHigh$", current_column, sep=""),
Boolean.operator = ">=", V2.name = low_limit,
newobj = "inRange", Boolean.operator = ">=",
keep.NAs = TRUE, newobj = "inRange",
datasources = connections) keep.NAs = TRUE,
datasources = connections)
summary <- ds.summary(paste("inRange$", current_column, sep=""))
if(ds.length(paste("data$", current_column, sep="")) > summary[[1]][[2]]){ summary <- ds.summary(paste("inRange$", current_column, sep=""))
res <- c(res, paste(current_column, "does not follow the established format" , sep="\n")) if(ds.length(paste("data$", current_column, sep="")) > summary[[1]][[2]]){
} res <- c(res, paste(current_column, "does not follow the established format" , sep="\n"))
} }
################## FIN ESTO PODRÍA IR EN UNA FUNC DIFERENTE #############
}else if (variable_type == "Binary"){
} }
} }
return (res) return (res)
...@@ -211,8 +225,8 @@ data_colnames <- as.data.frame(data_colnames) ...@@ -211,8 +225,8 @@ data_colnames <- as.data.frame(data_colnames)
check_valid_columns <- check_column_names(codebook ,data_colnames) check_valid_columns <- check_column_names(codebook ,data_colnames)
valid_columns <- as.data.frame(check_valid_columns$colnames) valid_columns <- as.data.frame(check_valid_columns$colnames)
res <- "" res <- ""
res <- check_values_format(valid_columns) res <- check_values_format(valid_columns, codebook)
print(res) print(res)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment