diff --git a/valid_variables_script.R b/valid_variables_script.R index 3729199ffaaf3f20c3a2724fcfa70ccae221ce37..8a89e8528877042c45bf74b7064ff6d2cc7212d2 100644 --- a/valid_variables_script.R +++ b/valid_variables_script.R @@ -125,11 +125,11 @@ is_number <- function(x){ } -check_values_not_categoric <- function(values, colname){ +check_values_not_categoric <- function(values, colname , codebook_param){ valid_vals <- values - possible_vals <- possible_values(colname) + possible_vals <- possible_values(colname, codebook_param) for(i in 1:length(values)){ @@ -194,21 +194,21 @@ check_values_not_categoric <- function(values, colname){ return(valid_vals) } -possible_values <- function(x){ +possible_values <- function(colname , codebook_param){ - if(x=="LBXBEH" | x=="LBXBEHn" | x=="LBXBEM") + if(colname=="LBXBEH" | colname=="LBXBEHn" | colname=="LBXBEM") res <- t(as.data.frame(list(-20,20))) else{ - possible_value <- subset(harmonised_data,harmonised_data$Harmonised.variable.name==x)[1,5] - res <- strsplit(x=possible_value,split="/") + possible_value <- subset(codebook_param,codebook_param$Harmonised.variable.name==colname)[1,5] + res <- strsplit(possible_value,split="/") } return(as.data.frame(res)) } -possible_values_categoric <- function(x){ +possible_values_categoric <- function(codebook_param){ possible_value <- subset(harmonised_data,harmonised_data$Harmonised.variable.name==x)[1,4] res <- strsplit(x=possible_value,split="/") @@ -238,16 +238,16 @@ get_values_from_quantiles <- function(x){ data_summary <- ds.summary(x) - low_quantile <- data_summary[[1]][3][[1]][[1]] + low_quantile <- data_summary$HM$`quantiles & mean`[[1]] - high_quantile <- data_summary[[1]][3][[1]][[7]] + high_quantile <- data_summary$HM$`quantiles & mean`[[7]] return(list(low_quantile,high_quantile)) } -notify_error <- function(invalid_name_list,invalid_value_list,wrong_categoric, wrong_categoric_values, missing_numeric){ +notify_error <- function(invalid_name_list,invalid_value_list,wrong_categoric, wrong_categoric_values, missing_numeric, codebook_param){ res <- "" @@ -280,7 +280,7 @@ notify_error <- function(invalid_name_list,invalid_value_list,wrong_categoric, w res <- paste(res, "\n", sep="") for(i in 1:length(wrong_categoric)){ - res <- paste(res, error_message_categoric(wrong_categoric[i], wrong_categoric_values[[i]]), sep=" ") + res <- paste(res, error_message_categoric(wrong_categoric[i], wrong_categoric_values[[i]], codebook_param), sep=" ") } } @@ -302,13 +302,13 @@ notify_error <- function(invalid_name_list,invalid_value_list,wrong_categoric, w } -error_message_categoric <- function(colname, invalid_values){ +error_message_categoric <- function(colname, invalid_values, codebook_param){ res<- "\nValues in the field" res<- paste(res, colname, sep=" ") res<- paste(res, "should be", sep=" ") - range <- possible_values_categoric(colname) + range <- possible_values_categoric(colname , codebook_param) for(i in 1:nrow(range)){ res <- paste(res, remove_space(range[i,1]), sep=" ") @@ -392,7 +392,7 @@ error_message <- function(colname, invalid_values){ } -check_valid_values <- function(valid_colnames){ +check_valid_values <- function(valid_colnames , codebook_param){ invalid_name_list <- c() cannot_analyse_list <- c() @@ -405,24 +405,29 @@ check_valid_values <- function(valid_colnames){ for(i in 1:(nrow(valid_colnames))){ + data_available <- FALSE data_table <- "empty" colname <- valid_colnames[i, 1] if(grepl("DMRBORN",colname, fixed=TRUE) | (grepl("DAT",colname, fixed=TRUE)) | (grepl("ISO",colname, fixed=TRUE)) | (grepl("BEF", colname, fixed=TRUE))){ next } + column <- "data$" column <- paste(column, colname, sep="") tryCatch( error = function(cnd) { - print("Unable to analyse data") + print(paste("Unable to analyse data" , cnd , sep = " ")) res <- FALSE }, + { data_table <- as.data.frame(ds.table(column)) + data_available <- TRUE + } ) - if(data_table == "empty"){ + if(data_available){ cannot_analyse_list <- c(cannot_analyse_list, colname) @@ -465,7 +470,7 @@ check_valid_values <- function(valid_colnames){ else new_colname <- colname - valid <- check_values_not_categoric(values, new_colname) + valid <- check_values_not_categoric(values, new_colname , codebook_param) if (FALSE %in% valid){ invalid_name_list <- c(invalid_name_list,colname) @@ -485,7 +490,7 @@ check_valid_values <- function(valid_colnames){ missing_numeric if(length(invalid_name_list)>0 | length(wrong_categoric)>0 | length(missing_numeric)>0){ - res <- notify_error(invalid_name_list, invalid_values_list, wrong_categoric, wrong_categoric_values, missing_numeric) + res <- notify_error(invalid_name_list, invalid_values_list, wrong_categoric, wrong_categoric_values, missing_numeric, codebook_param) }else{ res <- "All values are valid \n" } @@ -538,25 +543,13 @@ valid_columns <- as.data.frame(check_valid_columns$colnames) result <- "" -result<-check_valid_values(valid_columns) +result<-check_valid_values(valid_columns, codebook) print(check_valid_columns) datashield.logout(connections) cat(result) - - -# ds.dataFrameSubset(df.name = "data", V1.name = "data$DMXWT", "400" , Boolean.operator = '>', newobj = "columna") -# # -# ds.summary("columna$DMXWT") -# ds.dim("columna$DMXWT") -# ds.table("columna$DMXWT") - file_name<- paste(hospital_name,"_invalid_values.txt", sep="") -#ds.heatmapPlot("data$LBDSALSIA", "data$RFXHC_numeric",type="combine", datasources = connections) - - -#setwd("C:/Users/victor/Desktop/TFG/r-analytics-master/invalid_values") dir.create("../invalid_values", showWarnings = FALSE) setwd("../invalid_values")