Commit 011cb357 authored by Pepe Márquez Romero's avatar Pepe Márquez Romero

usando el codebook de elly

parent 384689af
......@@ -10,10 +10,35 @@ source("dependency_installer.R")
dep_list = c("jsonlite", "stringr","DSI","DSOpal","DSLite", "fields", "metafor", "ggplot2", "gridExtra", "data.table", "dsBaseClient", "openxlsx")
install_dependencies(dep_list)
#source("connection_parameters.R")
#source("necessary_functions_connection.R")
codebook_file <- "20220315_Data Harmonisation.xlsb.xlsx"
codebook_demo <- read.xlsx(codebook_file , sheet = 2 )
codebook_com_and_rf <- read.xlsx(codebook_file , sheet = 3 )
codebook_home_med <- read.xlsx(codebook_file , sheet = 4 )
codebook_si_sympt <- read.xlsx(codebook_file , sheet = 5 )
codebook_treatments <- read.xlsx(codebook_file , sheet = 6 )
codebook_labo <- read.xlsx(codebook_file , sheet = 7 )
codebook_complications <- read.xlsx(codebook_file , sheet = 8 )
codebook_imaging_data <- read.xlsx(codebook_file , sheet = 9 )
codebook_lifestyle_diet <- read.xlsx(codebook_file , sheet = 10 )
codebook_dates <- read.xlsx(codebook_file , sheet = 11 )
codebook <- rbind(codebook_demo , codebook_com_and_rf)
codebook <- rbind(codebook , codebook_home_med)
codebook <- rbind(codebook , codebook_si_sympt)
codebook <- rbind(codebook , codebook_treatments)
codebook <- rbind(codebook , codebook_labo)
codebook <- rbind(codebook , codebook_complications)
codebook <- rbind(codebook , codebook_imaging_data)
codebook_lifestyle_diet <- codebook_lifestyle_diet[, !names(codebook_lifestyle_diet) %in% c("X2", "X4" , "X10")]
codebook <- rbind(codebook , codebook_lifestyle_diet)
codebook <- rbind(codebook , codebook_dates)
codebook <- read.csv("new_harmon.csv" , sep = ",")
codebook_col_names <- as.data.frame(codebook$Harmonised.variable.name)
......@@ -74,6 +99,7 @@ check_valid_name <- function(col_name){
}
check_valid_values_continuous <- function(colname , codebook_param , column){
column <- column[column != "."]
possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
possible_values_list = str_split(possible_values_format , "/")[[1]]
......@@ -94,7 +120,9 @@ check_valid_values_continuous <- function(colname , codebook_param , column){
str_res <- "No failing values"
else{
failing_values <- failing_values[!is.na(failing_values)]
str_res <- paste("The failing values of column ", colname , paste(unlist(failing_values) , collapse =" "))
#str_res <- paste( colname , paste(unlist(failing_values) , collapse =" "))
str_res <- paste(colname , collapse =" ")
str_res <- paste(str_res , "should be in range" , range_as_str, "(continuous)", sep = " ")
}
......@@ -110,8 +138,11 @@ check_valid_values_binary <- function(colname , column){
if (number_of_failing_values == 0)
str_res <- "No failing values"
else{
range_as_str <- "0-1 (binary)"
failing_values <- failing_values[!is.na(failing_values)]
str_res <- paste("The failing values of column ", colname , paste(unlist(failing_values) , collapse =" "))
#str_res <- paste(colname , paste(unlist(failing_values) , collapse =" "))
str_res <- paste(colname , collapse =" ")
str_res <- paste(str_res , "should be in range" , range_as_str, sep = " ")
}
......@@ -146,8 +177,11 @@ check_valid_values_categorical <- function(colname , codebook_param , column){
if(number_of_failing_values == 0){
str_res <- "No failing values"
}else{
range_as_str <- paste(min_value , "-" , max_value , " (categorical)")
failing_values <- failing_values[!is.na(failing_values)]
str_res <- paste("The failing values of column ", colname , paste(unlist(failing_values) , collapse =" "))
#str_res <- paste(colname , paste(unlist(failing_values) , collapse =" "))
str_res <- paste(colname , collapse =" ")
str_res <- paste(str_res , "should be in range" , range_as_str, sep = " ")
}
}
......@@ -158,6 +192,11 @@ check_valid_values <- function(valid_colnames, codebook_param){
for(i in 1:(ncol(valid_colnames))){
name <- names(valid_colnames)[i]
if (grepl("DAT", name, fixed=TRUE)){
next
}
#if("DMRBORN" == name | grepl("DAT", name, fixed=TRUE) | grepl("ISO", name , fixed=TRUE) | grepl("BEF", name, fixed=TRUE)){
# next
#}
......@@ -167,6 +206,12 @@ check_valid_values <- function(valid_colnames, codebook_param){
# Esto falla si tu codebook no es mismo que new_harmon.csv
column_type <- codebook_param$Variable.type[codebook_param$Harmonised.variable.name == name]
if (is.na(column_type) ) {
variable <- paste("Variable ", name, " wrong", sep = " ")
res <- paste(res, variable , sep="\n")
next
}
result = switch(
column_type,
"Continuous"= check_valid_values_continuous(name , codebook_param , column),
......@@ -174,6 +219,9 @@ check_valid_values <- function(valid_colnames, codebook_param){
"Categorical"= check_valid_values_categorical(name, codebook_param , column),
"Calendar date" = paste("No failing values"),
"ISO country code"= paste("No failing values"),
{
paste("some column " , column_type , sep = " ")
}
)
if (result != "No failing values"){
......@@ -199,20 +247,6 @@ valid_colnames_with_data <- subset(harmonized_data , select = valid_colnames_col
result <- ""
result<-check_valid_values(valid_colnames_with_data, codebook)
print(check_valid_columns)
#datashield.logout(connections)
print(columns_not_valid)
cat(result)
file_name<- paste(hospital_name,"_invalid_values.txt", sep="")
dir.create("../invalid_values", showWarnings = FALSE)
setwd("../invalid_values")
cat(check_valid_columns,file=file_name,sep="\n")
cat(result,file=file_name,append=TRUE)
#datashield.logout(connections)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment