Commit 52f4db3d authored by Pepe Márquez Romero's avatar Pepe Márquez Romero

empezando a modificar el script remoto

parent 011cb357
hospital_names <- c( #Añadir Los de Baskent y sacrocuore hospital_names <- c( #Añadir Los de Baskent y sacrocuore
"Princesa", "Princesa",
"CIPH", "CIPH",
"UMF_Iasis", "UMF_Iasis",
"SMUC", "SMUC",
"HM", "HM",
"Porto", "Porto",
"FJD", "FJD",
"Coimbra", "Coimbra",
"UNAV", "UNAV",
"TU", "TU",
"Ankara Impatient", "Ankara Impatient",
"Konya Impatient", "Konya Impatient",
"Istambul Impatient", "Istambul Impatient",
"Izmir Impatient", "Izmir Impatient",
"Alanya Impatient", "Alanya Impatient",
"Adana Impatient", "Adana Impatient",
"Ankara Outpatient", "Ankara Outpatient",
"Konya Outpatient", "Konya Outpatient",
"Istambul Outpatient", "Istambul Outpatient",
"Izmir Outpatient", "Izmir Outpatient",
"Alanya Outpatient", "Alanya Outpatient",
"Sacrocuore Emergency", "Sacrocuore Emergency",
"Sacrocuore Employees", "Sacrocuore Employees",
"Sacrocuore Verona", "Sacrocuore Verona",
"Sacrocuore Isaric", "Sacrocuore Isaric",
"TUDublin", "TUDublin",
"UMF_Cluj", "UMF_Cluj",
"UdeA", "UdeA",
"Inantro", "Inantro",
"UNSA", "UNSA",
"UZA" "UZA"
) )
project_names <- c( project_names <- c(
"FIBHULP", "FIBHULP",
"CIPH_unCoVer", "CIPH_unCoVer",
"umfiasi", "umfiasi",
"SMUC", "SMUC",
"FiHM", "FiHM",
"uncover-up", "uncover-up",
"IISFJD", "IISFJD",
"IPC", "IPC",
"unCOVer-UNAV", "unCOVer-UNAV",
"TU_Uncover", "TU_Uncover",
"BU", "BU",
"BU", "BU",
"BU", "BU",
"BU", "BU",
"BU", "BU",
"BU", "BU",
"BU", "BU",
"BU", "BU",
"BU", "BU",
"BU", "BU",
"BU", "BU",
"S_uncover", "S_uncover",
"S_uncover", "S_uncover",
"S_uncover", "S_uncover",
"S_uncover", "S_uncover",
"TUDublin", "TUDublin",
"UMF_Cluj", "UMF_Cluj",
"INS_Data", "INS_Data",
"INANTRO", "INANTRO",
"UnCoVer-BiH-Final", "UnCoVer-BiH-Final",
"UZA" "UZA"
) )
resource_names <- c( resource_names <- c(
"Harmonized_variables_2", "Harmonized_variables_2",
"CIPH_numeric_derivated", "CIPH_numeric_derivated",
"20220719_HarmonisedUMFIasi", "20220719_HarmonisedUMFIasi",
"SMUC_resource", "SMUC_resource",
"20220720_HarmonisedHM", "20220919_FiHM",
"Resource_derived", "Resource_derived",
"IISFJD_Harmonized_1", "IISFJD_Harmonized_1",
"IPC_Harmonized", "IPC_Harmonized",
"UNAV_rsc", "UNAV_rsc",
"TU_Harmonized", "TU_Harmonized",
"inpatient_ankara", "inpatient_ankara",
"inpatient_konya", "inpatient_konya",
"inpatient_istanbul", "inpatient_istanbul",
"inpatient_izmir", "inpatient_izmir",
"inpatient_alanya", "inpatient_alanya",
"inpatient_adana", "inpatient_adana",
"outpatient_ankara", "outpatient_ankara",
"outpatient_konya", "outpatient_konya",
"outpatient_istanbul", "outpatient_istanbul",
"outpatient_izmir", "outpatient_izmir",
"outpatient_alanya", "outpatient_alanya",
"emergency", "emergency",
"employees", "employees",
"verona", "verona",
"isaric", "isaric",
"TUDublin_harmonised", "TUDublin_harmonised",
"Romania", "Romania",
"colombia_all", "colombia_all",
"Inantro", "Inantro",
"20220722_HarmonizedUNSA", "20220722_HarmonizedUNSA",
"UZA_prelim" "UZA_prelim"
) )
urls <- c( urls <- c(
"https://192.168.1.200:8001", "https://192.168.1.200:8001",
"https://192.168.1.200:8002", "https://192.168.1.200:8002",
"https://192.168.1.200:8003", "https://192.168.1.200:8003",
"https://192.168.1.200:8006", "https://192.168.1.200:8006",
"https://192.168.1.50:9002", "https://192.168.1.50:9002",
"https://192.168.1.102", "https://192.168.1.102",
"https://uncover.itg.be", "https://uncover.itg.be",
"https://uncover.itg.be", "https://uncover.itg.be",
"https://192.168.1.50:9001", "https://192.168.1.50:9001",
"https://192.168.1.200:8004", "https://192.168.1.200:8004",
"https://192.168.1.101:8443", "https://192.168.1.101:8443",
"https://192.168.1.101:8443", "https://192.168.1.101:8443",
"https://192.168.1.101:8443", "https://192.168.1.101:8443",
"https://192.168.1.101:8443", "https://192.168.1.101:8443",
"https://192.168.1.101:8443", "https://192.168.1.101:8443",
"https://192.168.1.101:8443", "https://192.168.1.101:8443",
"https://192.168.1.101:8443", "https://192.168.1.101:8443",
"https://192.168.1.101:8443", "https://192.168.1.101:8443",
"https://192.168.1.101:8443", "https://192.168.1.101:8443",
"https://192.168.1.101:8443", "https://192.168.1.101:8443",
"https://192.168.1.101:8443", "https://192.168.1.101:8443",
"https://192.168.1.50:8890", "https://192.168.1.50:8890",
"https://192.168.1.50:8890", "https://192.168.1.50:8890",
"https://192.168.1.50:8890", "https://192.168.1.50:8890",
"https://192.168.1.50:8890", "https://192.168.1.50:8890",
"https://uncover.itg.be", "https://uncover.itg.be",
"https://192.168.1.200:8005", "https://192.168.1.200:8005",
"https://fenfisdi.udea.edu.co/opal", "https://fenfisdi.udea.edu.co/opal",
"https://192.168.1.200:8007", "https://192.168.1.200:8007",
"https://192.168.1.200:8008", "https://192.168.1.200:8008",
"https://uncover.itg.be" "https://uncover.itg.be"
) )
users <- c( users <- c(
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"emertens", "emertens",
"emertens", "emertens",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"emertens", "emertens",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"user_analisis", "user_analisis",
"emertens" "emertens"
) )
pass <- c( pass <- c(
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"3^z4AV.)hG5~PT/]", "3^z4AV.)hG5~PT/]",
"3^z4AV.)hG5~PT/]", "3^z4AV.)hG5~PT/]",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"3^z4AV.)hG5~PT/]", "3^z4AV.)hG5~PT/]",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"Ekfl07UUgz", "Ekfl07UUgz",
"3^z4AV.)hG5~PT/]" "3^z4AV.)hG5~PT/]"
) )
rm(list=ls()) rm(list=ls())
setwd("C:/Users/Victor/Documents/TFG/r-analytics-master") dir_name <- readline("Introduce the name of the directory please: ")
source("required_folder_checker.R") setwd(dir_name)
source("argument_hasher.R")
source("dependency_installer.R")
# install.packages("https://cran.r-project.org/src/contrib/Archive/DSI/DSI_1.2.0.tar.gz", repos=NULL, type="source") source("dependency_installer.R")
# install.packages("https://cran.r-project.org/src/contrib/Archive/DSOpal/DSOpal_1.2.0.tar.gz", repos=NULL, type="source") source("connection_parameters.R")
# install.packages("https://cran.r-project.org/src/contrib/Archive/DSLite/DSLite_1.2.0.tar.gz", repos=NULL, type="source") source("necessary_functions_connection.R")
# install.packages("https://cran.r-project.org/src/contrib/Archive/opalr/opalr_2.1.0.tar.gz", repos=NULL, type="source") #source("required_folder_checker.R")
#source("argument_hasher.R")
dep_list = c("jsonlite", "stringr","DSI","DSOpal","DSLite", "fields", "metafor", "ggplot2", "gridExtra", "data.table", "dsBaseClient") dep_list = c("jsonlite", "stringr","DSI","DSOpal","DSLite", "fields", "metafor", "ggplot2", "gridExtra", "data.table", "dsBaseClient", "openxlsx")
install_dependencies(dep_list) install_dependencies(dep_list)
#,"DSI","DSOpal","DSLite" codebook_file <- "20220315_Data Harmonisation.xlsb.xlsx"
setwd("C:/Users/victor/Documents/TFG/r-analytics-master") codebook_demo <- read.xlsx(codebook_file , sheet = 2 )
source("connection_parameters.R") codebook_com_and_rf <- read.xlsx(codebook_file , sheet = 3 )
source("necessary_functions_connection.R")
codebook_home_med <- read.xlsx(codebook_file , sheet = 4 )
codebook_si_sympt <- read.xlsx(codebook_file , sheet = 5 )
setwd("C:/Users/Victor/Documents/TFG/r-analytics-master/harmonised_data") codebook_treatments <- read.xlsx(codebook_file , sheet = 6 )
codebook_labo <- read.xlsx(codebook_file , sheet = 7 )
codebook_complications <- read.xlsx(codebook_file , sheet = 8 )
codebook_imaging_data <- read.xlsx(codebook_file , sheet = 9 )
ComAndRF <- data.frame(read.csv("Com&RF.csv", sep=","))[1:64,1:5] codebook_lifestyle_diet <- read.xlsx(codebook_file , sheet = 10 )
Complications <- data.frame(read.csv("Complications.csv", sep=";"))[1:20,1:5] codebook_dates <- read.xlsx(codebook_file , sheet = 11 )
Dates <- data.frame(read.csv("Dates.csv", sep=";"))[1:12,1:5]
Demographics <- data.frame(read.csv("Demographics.csv", sep=";"))[1:9,1:5]
Home_med <- data.frame(read.csv("Home_med.csv", sep=";"))[1:13,1:5]
Imaging_data <- data.frame(read.csv("Imaging_data.csv", sep=";"))[1:11,1:5]
Labo <- data.frame(read.csv("Labo.csv", sep=";"))[1:143,1:5]
SiAndSympt <- data.frame(read.csv("Si&Sympt.csv", sep=";"))[1:50,1:5]
Treatment <- data.frame(read.csv("Treatment.csv", sep=";"))[1:32,1:5]
LifestyleAndDiet <- data.frame(read.csv("Lifestyle&Diet.csv", sep=";"))[1:165,1:5]
codebook <- rbind(codebook_demo , codebook_com_and_rf)
codebook <- rbind(codebook , codebook_home_med)
codebook <- rbind(codebook , codebook_si_sympt)
codebook <- rbind(codebook , codebook_treatments)
codebook <- rbind(codebook , codebook_labo)
codebook <- rbind(codebook , codebook_complications)
codebook <- rbind(codebook , codebook_imaging_data)
codebook_lifestyle_diet <- codebook_lifestyle_diet[, !names(codebook_lifestyle_diet) %in% c("X2", "X4" , "X10")]
codebook <- rbind(codebook , codebook_lifestyle_diet)
codebook <- rbind(codebook , codebook_dates)
harmonised_data <- rbind(SiAndSympt,ComAndRF)
harmonised_data <- rbind(harmonised_data,Treatment)
harmonised_data <- rbind(harmonised_data,Dates)
harmonised_data <- rbind(harmonised_data,Demographics)
harmonised_data <- rbind(harmonised_data,Home_med)
harmonised_data <- rbind(harmonised_data,Imaging_data)
harmonised_data <- rbind(harmonised_data,Complications)
harmonised_data <- rbind(harmonised_data,Labo)
harmonised_data <- rbind(harmonised_data,LifestyleAndDiet)
rm(list=c("SiAndSympt", codebook_col_names <- as.data.frame(codebook$Harmonised.variable.name)
"Complications",
"ComAndRF", names(codebook_col_names) <- c("col_names")
"Dates",
"Demographics",
"Home_med",
"Imaging_data",
"Complications",
"Labo",
"LifestyleAndDiet"))
categoric_vars = c("DMRGENDR", "DMRBORN", "DMRRETH1", "DMROCCU", "DMRHREDU", "DSXOS", "DSXHO", "DSXIC", "TRXAV","TRXRIB","TRXLR","TRXRM","TRXIA","TRXIB","TRXCH","TRXAB","TRXCS","TRXHEP","TRXAF","TRXCP","TRXOT","TRXECM","TRXIV","TRXNIV","TRXNO","TRXOX","TRXRR","TRXTR","TRXVA","TRXPE","TRXPV","TRXIT","TRXNMB","TRXAC","TRXINA","TRXIS","TRXIM","TRXVC","TRXVD","TRXZN", "CSXCOT","CSXCTR","SMXASAH","SMXFEA","SMXCOA","SMXSTA","SMXSBA","SMXRNA","SMXMYA","SMXARA","SMXCPA","SMXAPA","SMXINA","SMXNAA","SMXDIA","SMXFAA","SMXHEA","SMXCNA","SMXACA","SMXSLA","SMXTLA","SMXSYA","SMXWHA","SMXLYA","SMXANA","SMXIWA","SMXSRA","SMXBLA","CMXPRG","CMXCVD","CMXCMP","CMXHT","CMXDI","CMXCKD","CMXCLD","CMXCPD","CMXASM","CMXCND","CMXRHE","CMXCCI","CMXCBD","CMXDE","CMXPU","CMXST","CMXLY","CMXAP","RFXSM","RFXFSM","RFXOB","RFXTB","RFXIMD","RFXHIV","RFXAIDS","RFXUI","RFXHC","RFXONC","RFXMN", "HMRACI","HMRARB","HMRAHO","HMRNS","HMROS","HMRCS","HMRIS","HMRAV","HMRAB","HMRCOV","IMDXCT","IMDXCTCR","IMDXCTTE","IMDXCTAB","IMDXXR","IMDXPN", "COXRD","COXAR","COXPM","COXMOD","COXPT","COXEC","COXSH","COXIO","COXPE","COXST","COXDIC","COXRIO","COXKF","COXHF","COXBC") categoric_vars = c("DMRGENDR", "DMRBORN", "DMRRETH1", "DMROCCU", "DMRHREDU", "DSXOS", "DSXHO", "DSXIC", "TRXAV","TRXRIB","TRXLR","TRXRM","TRXIA","TRXIB","TRXCH","TRXAB","TRXCS","TRXHEP","TRXAF","TRXCP","TRXOT","TRXECM","TRXIV","TRXNIV","TRXNO","TRXOX","TRXRR","TRXTR","TRXVA","TRXPE","TRXPV","TRXIT","TRXNMB","TRXAC","TRXINA","TRXIS","TRXIM","TRXVC","TRXVD","TRXZN", "CSXCOT","CSXCTR","SMXASAH","SMXFEA","SMXCOA","SMXSTA","SMXSBA","SMXRNA","SMXMYA","SMXARA","SMXCPA","SMXAPA","SMXINA","SMXNAA","SMXDIA","SMXFAA","SMXHEA","SMXCNA","SMXACA","SMXSLA","SMXTLA","SMXSYA","SMXWHA","SMXLYA","SMXANA","SMXIWA","SMXSRA","SMXBLA","CMXPRG","CMXCVD","CMXCMP","CMXHT","CMXDI","CMXCKD","CMXCLD","CMXCPD","CMXASM","CMXCND","CMXRHE","CMXCCI","CMXCBD","CMXDE","CMXPU","CMXST","CMXLY","CMXAP","RFXSM","RFXFSM","RFXOB","RFXTB","RFXIMD","RFXHIV","RFXAIDS","RFXUI","RFXHC","RFXONC","RFXMN", "HMRACI","HMRARB","HMRAHO","HMRNS","HMROS","HMRCS","HMRIS","HMRAV","HMRAB","HMRCOV","IMDXCT","IMDXCTCR","IMDXCTTE","IMDXCTAB","IMDXXR","IMDXPN", "COXRD","COXAR","COXPM","COXMOD","COXPT","COXEC","COXSH","COXIO","COXPE","COXST","COXDIC","COXRIO","COXKF","COXHF","COXBC")
...@@ -65,48 +54,41 @@ categoric_vars = c("DMRGENDR", "DMRBORN", "DMRRETH1", "DMROCCU", "DMRHREDU", "DS ...@@ -65,48 +54,41 @@ categoric_vars = c("DMRGENDR", "DMRBORN", "DMRRETH1", "DMROCCU", "DMRHREDU", "DS
#---------------------------------------------------------------------------- #----------------------------------------------------------------------------
#Test if column names are valid #Test if column names are valid
check_column_names <- function(x){ check_column_names <- function(codebook_param, colnames){
str_res <- "The column names:" str_res <- "The column names:"
valid_colnames <- c()
for(i in 1:(nrow(data_colnames))){
if(!check_valid_name(data_colnames[i,1])){ for(i in 1:(nrow(colnames))){
str_res<- paste(str_res, data_colnames[i,1], sep=" ") colname <- colnames[i,1]
number_of_column <- check_valid_name(colname , colnames)
if(number_of_column != 1){
str_res<- paste(str_res, colname, sep=" ")
}else{
valid_colnames <- c(valid_colnames, colname)
} }
} }
str_res<- paste(str_res,"are not registered in the harmonized data codebook \n", sep=" ") str_res<- paste(str_res,"are not registered in the harmonized data codebook \n", sep=" ")
return (str_res) result <- list("not_colnames" = str_res , "colnames" = valid_colnames)
return (result)
} }
#Test if a single variable name is valid #Test if a single variable name is valid
check_valid_name <- function(x){ check_valid_name <- function(col_name , col_names){
valid <- FALSE
aux <- as.data.frame(strsplit(x , split = "_"))
if(aux[1,1] %in% harmonised_data$Harmonised.variable.name)
valid <- TRUE
return (valid)
}
valid_data_colnames <- function(x){
valid_colnames = c() valid <- 0
for(i in 1:(nrow(data_colnames))){ if(col_name %in% codebook_col_names$col_names){
if(check_valid_name(data_colnames[i,1])){
valid_colnames = c(valid_colnames,data_colnames[i,1]) valid <- length(grep(col_name, col_names))
}
} }
return (valid)
return(valid_colnames)
} }
remove_space <- function(x){ remove_space <- function(x){
...@@ -132,7 +114,7 @@ is_number <- function(x){ ...@@ -132,7 +114,7 @@ is_number <- function(x){
x <- str_replace(x,",",".") x <- str_replace(x,",",".")
aux <- as.numeric(x) aux <- as.numeric(x)
if(!is.na(aux)) if(!is.na(aux))
res <- TRUE res <- TRUE
...@@ -163,7 +145,7 @@ check_values_not_categoric <- function(values, colname){ ...@@ -163,7 +145,7 @@ check_values_not_categoric <- function(values, colname){
if(is.null(value)){ if(is.null(value)){
res <- TRUE res <- TRUE
} }
else if( value == "NA" | value == "nan" | value == ".") else if( value == "NA" | value == "nan" | value == ".")
res <- TRUE res <- TRUE
else{ else{
...@@ -410,7 +392,7 @@ error_message <- function(colname, invalid_values){ ...@@ -410,7 +392,7 @@ error_message <- function(colname, invalid_values){
} }
check_valid_values <- function(){ check_valid_values <- function(valid_colnames){
invalid_name_list <- c() invalid_name_list <- c()
cannot_analyse_list <- c() cannot_analyse_list <- c()
...@@ -423,84 +405,82 @@ check_valid_values <- function(){ ...@@ -423,84 +405,82 @@ check_valid_values <- function(){
for(i in 1:(nrow(valid_colnames))){ for(i in 1:(nrow(valid_colnames))){
data_table ="empty" data_table <- "empty"
colname <- valid_colnames[i, 1]
if(!grepl("DMRBORN",valid_colnames[i,1], fixed=TRUE) & (!grepl("DAT",valid_colnames[i,1], fixed=TRUE)) & (!grepl("ISO",valid_colnames[i,1], fixed=TRUE))& (!grepl("BEF",valid_colnames[i,1], fixed=TRUE))){ if(grepl("DMRBORN",colname, fixed=TRUE) | (grepl("DAT",colname, fixed=TRUE)) | (grepl("ISO",colname, fixed=TRUE)) | (grepl("BEF", colname, fixed=TRUE))){
next
}
column <- "data$"
column <- paste(column, colname, sep="")
tryCatch(
error = function(cnd) {
print("Unable to analyse data")
res <- FALSE
},
data_table <- as.data.frame(ds.table(column))
)
if(data_table == "empty"){
column <- "data$" cannot_analyse_list <- c(cannot_analyse_list, colname)
column <- paste(column, valid_colnames[i,1], sep="")
tryCatch( }else{
error = function(cnd) {
print("Unable to analyse data")
res <- FALSE
},
data_table <- as.data.frame(ds.table(column))
)
if(data_table == "empty"){
if (data_table[[1]] == "All studies failed for reasons identified below")
cannot_analyse_list <- c(cannot_analyse_list,valid_colnames[i,1]) values <- get_values_from_quantiles(column)
else
}else{ values <- row.names(data_table)
numeric_col<- paste(colname,"_numeric", sep="")
if( colname %in% categoric_vars ){
has_numeric <- numeric_col %in% valid_colnames$`valid_data_colnames(data_colnames)`
if (data_table[[1]] == "All studies failed for reasons identified below") if(!has_numeric)
values <- get_values_from_quantiles(column) missing_numeric <- c(missing_numeric, colname)
else
values <- row.names(data_table)
numeric_col<- paste(valid_colnames[i,1],"_numeric", sep="")
if( valid_colnames[i,1] %in% categoric_vars ){ if (data_table[[1]] == "All studies failed for reasons identified below"){
#is_numeric <- grepl("numeric",valid_colnames[i,1], fixed=TRUE)
has_numeric <- numeric_col %in% valid_colnames$`valid_data_colnames(data_colnames)`
if(!has_numeric)
missing_numeric <- c(missing_numeric, valid_colnames[i,1])
if (data_table[[1]] == "All studies failed for reasons identified below"){
cannot_analyse_list <- c(cannot_analyse_list,valid_colnames[i,1])
}else if(!check_values_categoric(values,valid_colnames[i,1])){
print("Wrong categoric value:")
print(valid_colnames[i,1])
wrong_categoric <- c(wrong_categoric, valid_colnames[i,1])
wrong_categoric_values[[k]] <- values
k <- k+1
}
# if((!is_numeric & !has_numeric) | is_numeric)
}else{
if(grepl("numeric", valid_colnames[i,1],fixed=TRUE))
new_colname <- strsplit(x=valid_colnames[i,1],split="_")[[1]][1]
else
new_colname <- valid_colnames[i,1]
valid <- check_values_not_categoric(values, new_colname) cannot_analyse_list <- c(cannot_analyse_list,colname)
if (FALSE %in% valid){ }else if(!check_values_categoric(values,colname)){
invalid_name_list <- c(invalid_name_list,valid_colnames[i,1])
invalid_values_list[[j]] <- values
j <- j+1
}
print("Wrong categoric value:")
print(colname)
#print(valid_colnames[i,1]) wrong_categoric <- c(wrong_categoric, colname)
#print(values) wrong_categoric_values[[k]] <- values
k <- k+1
}#else }
# print("This variable has a numeric version")
} }else{
if(grepl("numeric", colname,fixed=TRUE))
new_colname <- strsplit(x=colname,split="_")[[1]][1]
else
new_colname <- colname
valid <- check_values_not_categoric(values, new_colname)
if (FALSE %in% valid){
invalid_name_list <- c(invalid_name_list,colname)
invalid_values_list[[j]] <- values
j <- j+1
}
#print(colname)
#print(values)
}#else
# print("This variable has a numeric version")
} }
} }
missing_numeric missing_numeric
...@@ -515,7 +495,7 @@ check_valid_values <- function(){ ...@@ -515,7 +495,7 @@ check_valid_values <- function(){
res <- paste(res, notify_unable_analyse(cannot_analyse_list), sep="\n" ) res <- paste(res, notify_unable_analyse(cannot_analyse_list), sep="\n" )
} }
...@@ -553,15 +533,12 @@ data_colnames <- ds.colnames(x=datastructure_name, datasources= connections) ...@@ -553,15 +533,12 @@ data_colnames <- ds.colnames(x=datastructure_name, datasources= connections)
data_colnames <- as.data.frame(data_colnames) data_colnames <- as.data.frame(data_colnames)
check_valid_columns <- check_column_names(data_colnames) check_valid_columns <- check_column_names(codebook ,data_colnames)
valid_columns <- as.data.frame(check_valid_columns$colnames)
valid_colnames <- as.data.frame(valid_data_colnames(data_colnames))
#possible_values("CSXCTR")
result <- "" result <- ""
result<-check_valid_values() result<-check_valid_values(valid_columns)
print(check_valid_columns) print(check_valid_columns)
datashield.logout(connections) datashield.logout(connections)
cat(result) cat(result)
...@@ -588,5 +565,5 @@ cat(check_valid_columns,file=file_name,sep="\n") ...@@ -588,5 +565,5 @@ cat(check_valid_columns,file=file_name,sep="\n")
cat(result,file=file_name,append=TRUE) cat(result,file=file_name,append=TRUE)
datashield.logout(connections) datashield.logout(connections)
...@@ -120,8 +120,8 @@ check_valid_values_continuous <- function(colname , codebook_param , column){ ...@@ -120,8 +120,8 @@ check_valid_values_continuous <- function(colname , codebook_param , column){
str_res <- "No failing values" str_res <- "No failing values"
else{ else{
failing_values <- failing_values[!is.na(failing_values)] failing_values <- failing_values[!is.na(failing_values)]
#str_res <- paste( colname , paste(unlist(failing_values) , collapse =" ")) str_res <- paste( colname , paste(unlist(failing_values) , collapse =" "))
str_res <- paste(colname , collapse =" ") #str_res <- paste(colname , collapse =" ")
str_res <- paste(str_res , "should be in range" , range_as_str, "(continuous)", sep = " ") str_res <- paste(str_res , "should be in range" , range_as_str, "(continuous)", sep = " ")
} }
...@@ -140,8 +140,8 @@ check_valid_values_binary <- function(colname , column){ ...@@ -140,8 +140,8 @@ check_valid_values_binary <- function(colname , column){
else{ else{
range_as_str <- "0-1 (binary)" range_as_str <- "0-1 (binary)"
failing_values <- failing_values[!is.na(failing_values)] failing_values <- failing_values[!is.na(failing_values)]
#str_res <- paste(colname , paste(unlist(failing_values) , collapse =" ")) str_res <- paste(colname , paste(unlist(failing_values) , collapse =" "))
str_res <- paste(colname , collapse =" ") #str_res <- paste(colname , collapse =" ")
str_res <- paste(str_res , "should be in range" , range_as_str, sep = " ") str_res <- paste(str_res , "should be in range" , range_as_str, sep = " ")
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment