Commit 910f1a1b authored by Pepe Márquez Romero's avatar Pepe Márquez Romero

revisando el script, no me acaba de convencer pero creo que funciona

parent 52f4db3d
...@@ -125,11 +125,11 @@ is_number <- function(x){ ...@@ -125,11 +125,11 @@ is_number <- function(x){
} }
check_values_not_categoric <- function(values, colname){ check_values_not_categoric <- function(values, colname , codebook_param){
valid_vals <- values valid_vals <- values
possible_vals <- possible_values(colname) possible_vals <- possible_values(colname, codebook_param)
for(i in 1:length(values)){ for(i in 1:length(values)){
...@@ -194,21 +194,21 @@ check_values_not_categoric <- function(values, colname){ ...@@ -194,21 +194,21 @@ check_values_not_categoric <- function(values, colname){
return(valid_vals) return(valid_vals)
} }
possible_values <- function(x){ possible_values <- function(colname , codebook_param){
if(x=="LBXBEH" | x=="LBXBEHn" | x=="LBXBEM") if(colname=="LBXBEH" | colname=="LBXBEHn" | colname=="LBXBEM")
res <- t(as.data.frame(list(-20,20))) res <- t(as.data.frame(list(-20,20)))
else{ else{
possible_value <- subset(harmonised_data,harmonised_data$Harmonised.variable.name==x)[1,5] possible_value <- subset(codebook_param,codebook_param$Harmonised.variable.name==colname)[1,5]
res <- strsplit(x=possible_value,split="/") res <- strsplit(possible_value,split="/")
} }
return(as.data.frame(res)) return(as.data.frame(res))
} }
possible_values_categoric <- function(x){ possible_values_categoric <- function(codebook_param){
possible_value <- subset(harmonised_data,harmonised_data$Harmonised.variable.name==x)[1,4] possible_value <- subset(harmonised_data,harmonised_data$Harmonised.variable.name==x)[1,4]
res <- strsplit(x=possible_value,split="/") res <- strsplit(x=possible_value,split="/")
...@@ -238,16 +238,16 @@ get_values_from_quantiles <- function(x){ ...@@ -238,16 +238,16 @@ get_values_from_quantiles <- function(x){
data_summary <- ds.summary(x) data_summary <- ds.summary(x)
low_quantile <- data_summary[[1]][3][[1]][[1]] low_quantile <- data_summary$HM$`quantiles & mean`[[1]]
high_quantile <- data_summary[[1]][3][[1]][[7]] high_quantile <- data_summary$HM$`quantiles & mean`[[7]]
return(list(low_quantile,high_quantile)) return(list(low_quantile,high_quantile))
} }
notify_error <- function(invalid_name_list,invalid_value_list,wrong_categoric, wrong_categoric_values, missing_numeric){ notify_error <- function(invalid_name_list,invalid_value_list,wrong_categoric, wrong_categoric_values, missing_numeric, codebook_param){
res <- "" res <- ""
...@@ -280,7 +280,7 @@ notify_error <- function(invalid_name_list,invalid_value_list,wrong_categoric, w ...@@ -280,7 +280,7 @@ notify_error <- function(invalid_name_list,invalid_value_list,wrong_categoric, w
res <- paste(res, "\n", sep="") res <- paste(res, "\n", sep="")
for(i in 1:length(wrong_categoric)){ for(i in 1:length(wrong_categoric)){
res <- paste(res, error_message_categoric(wrong_categoric[i], wrong_categoric_values[[i]]), sep=" ") res <- paste(res, error_message_categoric(wrong_categoric[i], wrong_categoric_values[[i]], codebook_param), sep=" ")
} }
} }
...@@ -302,13 +302,13 @@ notify_error <- function(invalid_name_list,invalid_value_list,wrong_categoric, w ...@@ -302,13 +302,13 @@ notify_error <- function(invalid_name_list,invalid_value_list,wrong_categoric, w
} }
error_message_categoric <- function(colname, invalid_values){ error_message_categoric <- function(colname, invalid_values, codebook_param){
res<- "\nValues in the field" res<- "\nValues in the field"
res<- paste(res, colname, sep=" ") res<- paste(res, colname, sep=" ")
res<- paste(res, "should be", sep=" ") res<- paste(res, "should be", sep=" ")
range <- possible_values_categoric(colname) range <- possible_values_categoric(colname , codebook_param)
for(i in 1:nrow(range)){ for(i in 1:nrow(range)){
res <- paste(res, remove_space(range[i,1]), sep=" ") res <- paste(res, remove_space(range[i,1]), sep=" ")
...@@ -392,7 +392,7 @@ error_message <- function(colname, invalid_values){ ...@@ -392,7 +392,7 @@ error_message <- function(colname, invalid_values){
} }
check_valid_values <- function(valid_colnames){ check_valid_values <- function(valid_colnames , codebook_param){
invalid_name_list <- c() invalid_name_list <- c()
cannot_analyse_list <- c() cannot_analyse_list <- c()
...@@ -405,24 +405,29 @@ check_valid_values <- function(valid_colnames){ ...@@ -405,24 +405,29 @@ check_valid_values <- function(valid_colnames){
for(i in 1:(nrow(valid_colnames))){ for(i in 1:(nrow(valid_colnames))){
data_available <- FALSE
data_table <- "empty" data_table <- "empty"
colname <- valid_colnames[i, 1] colname <- valid_colnames[i, 1]
if(grepl("DMRBORN",colname, fixed=TRUE) | (grepl("DAT",colname, fixed=TRUE)) | (grepl("ISO",colname, fixed=TRUE)) | (grepl("BEF", colname, fixed=TRUE))){ if(grepl("DMRBORN",colname, fixed=TRUE) | (grepl("DAT",colname, fixed=TRUE)) | (grepl("ISO",colname, fixed=TRUE)) | (grepl("BEF", colname, fixed=TRUE))){
next next
} }
column <- "data$" column <- "data$"
column <- paste(column, colname, sep="") column <- paste(column, colname, sep="")
tryCatch( tryCatch(
error = function(cnd) { error = function(cnd) {
print("Unable to analyse data") print(paste("Unable to analyse data" , cnd , sep = " "))
res <- FALSE res <- FALSE
}, },
{
data_table <- as.data.frame(ds.table(column)) data_table <- as.data.frame(ds.table(column))
data_available <- TRUE
}
) )
if(data_table == "empty"){ if(data_available){
cannot_analyse_list <- c(cannot_analyse_list, colname) cannot_analyse_list <- c(cannot_analyse_list, colname)
...@@ -465,7 +470,7 @@ check_valid_values <- function(valid_colnames){ ...@@ -465,7 +470,7 @@ check_valid_values <- function(valid_colnames){
else else
new_colname <- colname new_colname <- colname
valid <- check_values_not_categoric(values, new_colname) valid <- check_values_not_categoric(values, new_colname , codebook_param)
if (FALSE %in% valid){ if (FALSE %in% valid){
invalid_name_list <- c(invalid_name_list,colname) invalid_name_list <- c(invalid_name_list,colname)
...@@ -485,7 +490,7 @@ check_valid_values <- function(valid_colnames){ ...@@ -485,7 +490,7 @@ check_valid_values <- function(valid_colnames){
missing_numeric missing_numeric
if(length(invalid_name_list)>0 | length(wrong_categoric)>0 | length(missing_numeric)>0){ if(length(invalid_name_list)>0 | length(wrong_categoric)>0 | length(missing_numeric)>0){
res <- notify_error(invalid_name_list, invalid_values_list, wrong_categoric, wrong_categoric_values, missing_numeric) res <- notify_error(invalid_name_list, invalid_values_list, wrong_categoric, wrong_categoric_values, missing_numeric, codebook_param)
}else{ }else{
res <- "All values are valid \n" res <- "All values are valid \n"
} }
...@@ -538,25 +543,13 @@ valid_columns <- as.data.frame(check_valid_columns$colnames) ...@@ -538,25 +543,13 @@ valid_columns <- as.data.frame(check_valid_columns$colnames)
result <- "" result <- ""
result<-check_valid_values(valid_columns) result<-check_valid_values(valid_columns, codebook)
print(check_valid_columns) print(check_valid_columns)
datashield.logout(connections) datashield.logout(connections)
cat(result) cat(result)
# ds.dataFrameSubset(df.name = "data", V1.name = "data$DMXWT", "400" , Boolean.operator = '>', newobj = "columna")
# #
# ds.summary("columna$DMXWT")
# ds.dim("columna$DMXWT")
# ds.table("columna$DMXWT")
file_name<- paste(hospital_name,"_invalid_values.txt", sep="") file_name<- paste(hospital_name,"_invalid_values.txt", sep="")
#ds.heatmapPlot("data$LBDSALSIA", "data$RFXHC_numeric",type="combine", datasources = connections)
#setwd("C:/Users/victor/Desktop/TFG/r-analytics-master/invalid_values")
dir.create("../invalid_values", showWarnings = FALSE) dir.create("../invalid_values", showWarnings = FALSE)
setwd("../invalid_values") setwd("../invalid_values")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment