From 2dd8e999dd90b87354d8ddc5096d55e122895b77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pepe=20M=C3=A1rquez=20Romero?= Date: Thu, 20 Apr 2023 17:53:43 +0200 Subject: [PATCH] pass ULSS6 --- .Rhistory | 512 -------------------------------------- connection_parameters.R | 4 +- remote_execute_as_admin.R | 2 +- 3 files changed, 3 insertions(+), 515 deletions(-) delete mode 100644 .Rhistory diff --git a/.Rhistory b/.Rhistory deleted file mode 100644 index 5646f5f..0000000 --- a/.Rhistory +++ /dev/null @@ -1,512 +0,0 @@ -install_dependencies(dep_list) -codebook_file <- "20220315_Data Harmonisation.xlsb.xlsx" -codebook_demo <- read.xlsx(codebook_file , sheet = 2 ) -codebook_com_and_rf <- read.xlsx(codebook_file , sheet = 3 ) -codebook_home_med <- read.xlsx(codebook_file , sheet = 4 ) -codebook_si_sympt <- read.xlsx(codebook_file , sheet = 5 ) -codebook_treatments <- read.xlsx(codebook_file , sheet = 6 ) -codebook_labo <- read.xlsx(codebook_file , sheet = 7 ) -codebook_complications <- read.xlsx(codebook_file , sheet = 8 ) -codebook_imaging_data <- read.xlsx(codebook_file , sheet = 9 ) -codebook_lifestyle_diet <- read.xlsx(codebook_file , sheet = 10 ) -codebook_dates <- read.xlsx(codebook_file , sheet = 11 ) -codebook <- rbind(codebook_demo , codebook_com_and_rf) -codebook <- rbind(codebook , codebook_home_med) -codebook <- rbind(codebook , codebook_si_sympt) -codebook <- rbind(codebook , codebook_treatments) -codebook <- rbind(codebook , codebook_labo) -codebook <- rbind(codebook , codebook_complications) -codebook <- rbind(codebook , codebook_imaging_data) -codebook_lifestyle_diet <- codebook_lifestyle_diet[, !names(codebook_lifestyle_diet) %in% c("X2", "X4" , "X10")] -codebook <- rbind(codebook , codebook_lifestyle_diet) -codebook <- rbind(codebook , codebook_dates) -codebook_col_names <- as.data.frame(codebook$Harmonised.variable.name) -names(codebook_col_names) <- c("col_names") -setwd(paste(dir_name ,"/harmonized_data", sep="")) -file_name <- readline("Introduce the name of the file to check the values: ") -harmonized_data <- "" -if (grepl(".csv" , file_name , fixed = TRUE)){ -harmonized_data <- read.csv(file_name) -}else if (grepl(".xlsx" , file_name , fixed = TRUE)){ -harmonized_data <- read.xlsx(file_name) -} -#Test if column names are valid -check_column_names <- function(col_names){ -str_res <- "The column names:" -valid_colnames <- c() -repeated_colnames <- c() -for(i in 1:(nrow(col_names))){ -col_name <- col_names[i,1] -number_of_column <- check_valid_name(col_name) -if( number_of_column == 0){ -str_res<- paste(str_res, col_name, sep=" ") -}else if (number_of_column == 1){ -valid_colnames = c(valid_colnames, col_name) -}else{ -repeated_colnames = c(repeated_colnames , col_name) -} -} -str_res<- paste(str_res,"are not registered in the harmonized data codebook \n", sep=" ") -new_list <- list("not_colnames" = str_res , "colnames" = valid_colnames , "repeated_colnames" = repeated_colnames) -return (new_list) -} -#Test if a single variable name is valid -check_valid_name <- function(col_name){ -valid <- 0 -if(col_name %in% codebook_col_names$col_names){ -valid <- length(grep(col_name, names(harmonized_data))) -} -return (valid) -} -check_valid_values_continuous <- function(colname, codebook_param, column) { -column <- column[column != "."] -possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname] -possible_values_list = str_split(possible_values_format, "/")[[1]] -range_as_str <- str_trim(possible_values_list[1]) -missing_value_format <- str_trim(str_trim(possible_values_list[2])) -separate_range <- str_split(range_as_str, "-")[[1]] -min_value <- strtoi(separate_range[1]) -max_value <- strtoi(separate_range[2]) -failing_values <- column[column < min_value | column > max_value] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -str_res <- "" -if (number_of_failing_values == 0) { -str_res <- "No failing values" -} else { -range_as_str <- paste(min_value, "-", max_value, "(continuous)") -str_res <- paste(colname, "has", number_of_failing_values, "failing values") -str_res <- paste(str_res, "should be in range", range_as_str, sep = " ") -} -return(str_res) -} -check_valid_values_categorical <- function(colname, codebook_param, column) { -column <- column[column != "."] -possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname] -possible_values_list <- str_split(possible_values_format, "/")[[1]] -possible_values_list <- lapply(possible_values_list, str_trim) -str_res <- "" -min_value <- 0 -max_value <- 0 -if (length(possible_values_list[[1]]) == 2) { -separate_range <- str_split(possible_values_list[[1]][1], "-")[[1]] -min_value <- strtoi(separate_range[1]) -max_value <- strtoi(separate_range[2]) -} else { -possible_values_list <- lapply(possible_values_list, strtoi)[[1]] -min_value <- possible_values_list[1] -max_value <- possible_values_list[length(possible_values_list) - 1] -} -failing_values <- column[column < min_value | column > max_value] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -if (number_of_failing_values == 0) { -str_res <- "No failing values" -} else { -range_as_str <- paste(min_value, "-", max_value, " (categorical)") -str_res <- paste(colname, "has", number_of_failing_values, "failing values") -str_res <- paste(str_res, "should be in range", range_as_str, sep = " ") -} -return(str_res) -} -check_valid_values_binary <- function(colname, column) { -column <- column[column != "."] -failing_values <- column[column < 0 | column > 1] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -str_res <- "" -if (number_of_failing_values == 0) -str_res <- "No failing values" -else { -range_as_str <- "0-1 (binary)" -str_res <- paste(colname, "has", number_of_failing_values, "failing values") -str_res <- paste(str_res, "should be in range", range_as_str, sep = " ") -} -return(str_res) -} -check_valid_values <- function(valid_colnames, codebook_param){ -res <- "" -for(i in 1:(ncol(valid_colnames))){ -name <- names(valid_colnames)[i] -if (grepl("DAT", name, fixed=TRUE)){ -next -} -#if("DMRBORN" == name | grepl("DAT", name, fixed=TRUE) | grepl("ISO", name , fixed=TRUE) | grepl("BEF", name, fixed=TRUE)){ -# next -#} -column <- valid_colnames[,i] -# Esto falla si tu codebook no es mismo que new_harmon.csv -column_type <- codebook_param$Variable.type[codebook_param$Harmonised.variable.name == name] -if (is.na(column_type) ) { -variable <- paste("Variable ", name, " wrong", sep = " ") -res <- paste(res, variable , sep="\n") -next -} -result = switch( -column_type, -"Continuous"= check_valid_values_continuous(name , codebook_param , column), -"Binary"= check_valid_values_binary(name , column), -"Categorical"= check_valid_values_categorical(name, codebook_param , column), -"Calendar date" = paste("No failing values"), -"ISO country code"= paste("No failing values"), -{ -paste("some column " , column_type , sep = " ") -} -) -if (result != "No failing values"){ -res <- paste(res , result, sep="\n") -} -} -return(res) -} -data_colnames <- as.data.frame(colnames(harmonized_data)) -check_valid_columns <- check_column_names(data_colnames) -columns_not_valid <- check_valid_columns$not_colnames -valid_colnames_column <- as.data.frame(check_valid_columns$colnames) -names(valid_colnames_column) = c("valid_colnames") -valid_colnames_with_data <- subset(harmonized_data , select = valid_colnames_column$valid_colnames) -result <- "" -result<-check_valid_values(valid_colnames_with_data, codebook) -print(columns_not_valid) -cat(result) -check_valid_values_continuous <- function(colname , codebook_param , column){ -column <- column[column != "."] -possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname] -possible_values_list = str_split(possible_values_format , "/")[[1]] -range_as_str <- str_trim(possible_values_list[1]) -missing_value_format <- str_trim(str_trim(possible_values_list[2])) -separate_range <- str_split(range_as_str , "-")[[1]] -min_value <- strtoi(separate_range[1]) -max_value <- strtoi(separate_range[2]) -failing_values <- column[column < min_value | column > max_value] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -str_res <- "" -if (number_of_failing_values == 0) -str_res <- "No failing values" -else{ -failing_values <- failing_values[!is.na(failing_values)] -failing_value_counts <- table(failing_values) -str_res <- paste(colname, "has failing values:") -for (i in seq_along(failing_value_counts)) { -value <- names(failing_value_counts)[i] -count <- failing_value_counts[i] -str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ") -} -str_res <- paste(str_res, "should be in range", range_as_str, "(continuous)", sep = " ") -} -return(str_res) -} -check_valid_values_categorical <- function(colname, codebook_param, column) { -column <- column[column != "."] -possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname] -possible_values_list <- str_split(possible_values_format, "/")[[1]] -possible_values_list <- lapply(possible_values_list, str_trim) -str_res <- "" -min_value <- 0 -max_value <- 0 -if (length(possible_values_list[[1]]) == 2) { -separate_range <- str_split(possible_values_list[[1]][1], "-")[[1]] -min_value <- strtoi(separate_range[1]) -max_value <- strtoi(separate_range[2]) -} else { -possible_values_list <- lapply(possible_values_list, strtoi)[[1]] -min_value <- possible_values_list[1] -max_value <- possible_values_list[length(possible_values_list) - 1] -} -failing_values <- column[column < min_value | column > max_value] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -if (number_of_failing_values == 0) { -str_res <- "No failing values" -} else { -range_as_str <- paste(min_value, "-", max_value, " (categorical)") -failing_values <- failing_values[!is.na(failing_values)] -failing_value_counts <- table(failing_values) -str_res <- paste(colname, "has failing values:") -for (i in seq_along(failing_value_counts)) { -value <- names(failing_value_counts)[i] -count <- failing_value_counts[i] -str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ") -} -str_res <- paste(str_res, "should be in range", range_as_str, sep = " ") -} -return(str_res) -} -check_valid_values_binary <- function(colname, column) { -column <- column[column != "."] -failing_values <- column[column < 0 | column > 1] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -str_res <- "" -if (number_of_failing_values == 0) -str_res <- "No failing values" -else { -range_as_str <- "0-1 (binary)" -failing_values <- failing_values[!is.na(failing_values)] -failing_value_counts <- table(failing_values) -str_res <- paste(colname, "has failing values:") -for (i in seq_along(failing_value_counts)) { -value <- names(failing_value_counts)[i] -count <- failing_value_counts[i] -str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ") -} -str_res <- paste(str_res, "should be in range", range_as_str, sep = " ") -} -return(str_res) -} -check_valid_values <- function(valid_colnames, codebook_param){ -res <- "" -for(i in 1:(ncol(valid_colnames))){ -name <- names(valid_colnames)[i] -if (grepl("DAT", name, fixed=TRUE)){ -next -} -#if("DMRBORN" == name | grepl("DAT", name, fixed=TRUE) | grepl("ISO", name , fixed=TRUE) | grepl("BEF", name, fixed=TRUE)){ -# next -#} -column <- valid_colnames[,i] -# Esto falla si tu codebook no es mismo que new_harmon.csv -column_type <- codebook_param$Variable.type[codebook_param$Harmonised.variable.name == name] -if (is.na(column_type) ) { -variable <- paste("Variable ", name, " wrong", sep = " ") -res <- paste(res, variable , sep="\n") -next -} -result = switch( -column_type, -"Continuous"= check_valid_values_continuous(name , codebook_param , column), -"Binary"= check_valid_values_binary(name , column), -"Categorical"= check_valid_values_categorical(name, codebook_param , column), -"Calendar date" = paste("No failing values"), -"ISO country code"= paste("No failing values"), -{ -paste("some column " , column_type , sep = " ") -} -) -if (result != "No failing values"){ -res <- paste(res , result, sep="\n") -} -} -return(res) -} -data_colnames <- as.data.frame(colnames(harmonized_data)) -check_valid_columns <- check_column_names(data_colnames) -columns_not_valid <- check_valid_columns$not_colnames -valid_colnames_column <- as.data.frame(check_valid_columns$colnames) -names(valid_colnames_column) = c("valid_colnames") -valid_colnames_with_data <- subset(harmonized_data , select = valid_colnames_column$valid_colnames) -result <- "" -result<-check_valid_values(valid_colnames_with_data, codebook) -print(columns_not_valid) -cat(result) -check_valid_values_continuous <- function(colname, codebook_param, column) { -column <- column[column != "."] -possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname] -possible_values_list = str_split(possible_values_format, "/")[[1]] -range_as_str <- str_trim(possible_values_list[1]) -missing_value_format <- str_trim(str_trim(possible_values_list[2])) -separate_range <- str_split(range_as_str, "-")[[1]] -min_value <- strtoi(separate_range[1]) -max_value <- strtoi(separate_range[2]) -failing_values <- column[column < min_value | column > max_value] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -str_res <- "" -if (number_of_failing_values == 0) { -str_res <- "No failing values" -} else { -range_as_str <- paste(min_value, "-", max_value, "(continuous)") -str_res <- paste(colname, "has", number_of_failing_values, "failing values") -str_res <- paste(str_res, "should be in range", range_as_str, sep = " ") -} -return(str_res) -} -check_valid_values_categorical <- function(colname, codebook_param, column) { -column <- column[column != "."] -possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname] -possible_values_list <- str_split(possible_values_format, "/")[[1]] -possible_values_list <- lapply(possible_values_list, str_trim) -str_res <- "" -min_value <- 0 -max_value <- 0 -if (length(possible_values_list[[1]]) == 2) { -separate_range <- str_split(possible_values_list[[1]][1], "-")[[1]] -min_value <- strtoi(separate_range[1]) -max_value <- strtoi(separate_range[2]) -} else { -possible_values_list <- lapply(possible_values_list, strtoi)[[1]] -min_value <- possible_values_list[1] -max_value <- possible_values_list[length(possible_values_list) - 1] -} -failing_values <- column[column < min_value | column > max_value] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -if (number_of_failing_values == 0) { -str_res <- "No failing values" -} else { -range_as_str <- paste(min_value, "-", max_value, " (categorical)") -str_res <- paste(colname, "has", number_of_failing_values, "failing values") -str_res <- paste(str_res, "should be in range", range_as_str, sep = " ") -} -return(str_res) -} -check_valid_values_binary <- function(colname, column) { -column <- column[column != "."] -failing_values <- column[column < 0 | column > 1] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -str_res <- "" -if (number_of_failing_values == 0) -str_res <- "No failing values" -else { -range_as_str <- "0-1 (binary)" -str_res <- paste(colname, "has", number_of_failing_values, "failing values") -str_res <- paste(str_res, "should be in range", range_as_str, sep = " ") -} -return(str_res) -} -check_valid_values <- function(valid_colnames, codebook_param){ -res <- "" -for(i in 1:(ncol(valid_colnames))){ -name <- names(valid_colnames)[i] -if (grepl("DAT", name, fixed=TRUE)){ -next -} -#if("DMRBORN" == name | grepl("DAT", name, fixed=TRUE) | grepl("ISO", name , fixed=TRUE) | grepl("BEF", name, fixed=TRUE)){ -# next -#} -column <- valid_colnames[,i] -# Esto falla si tu codebook no es mismo que new_harmon.csv -column_type <- codebook_param$Variable.type[codebook_param$Harmonised.variable.name == name] -if (is.na(column_type) ) { -variable <- paste("Variable ", name, " wrong", sep = " ") -res <- paste(res, variable , sep="\n") -next -} -result = switch( -column_type, -"Continuous"= check_valid_values_continuous(name , codebook_param , column), -"Binary"= check_valid_values_binary(name , column), -"Categorical"= check_valid_values_categorical(name, codebook_param , column), -"Calendar date" = paste("No failing values"), -"ISO country code"= paste("No failing values"), -{ -paste("some column " , column_type , sep = " ") -} -) -if (result != "No failing values"){ -res <- paste(res , result, sep="\n") -} -} -return(res) -} -data_colnames <- as.data.frame(colnames(harmonized_data)) -check_valid_columns <- check_column_names(data_colnames) -columns_not_valid <- check_valid_columns$not_colnames -valid_colnames_column <- as.data.frame(check_valid_columns$colnames) -names(valid_colnames_column) = c("valid_colnames") -valid_colnames_with_data <- subset(harmonized_data , select = valid_colnames_column$valid_colnames) -result <- "" -result<-check_valid_values(valid_colnames_with_data, codebook) -print(columns_not_valid) -cat(result) -check_valid_values_continuous <- function(colname, codebook_param, column) { -column <- column[column != "."] -possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname] -possible_values_list = str_split(possible_values_format, "/")[[1]] -range_as_str <- str_trim(possible_values_list[1]) -missing_value_format <- str_trim(str_trim(possible_values_list[2])) -separate_range <- str_split(range_as_str, "-")[[1]] -min_value <- strtoi(separate_range[1]) -max_value <- strtoi(separate_range[2]) -failing_values <- column[column < min_value | column > max_value] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -str_res <- "" -if (number_of_failing_values == 0) { -str_res <- "No failing values" -} else { -range_as_str <- paste(min_value, "-", max_value, "(continuous)") -str_res <- paste(colname, "has", number_of_failing_values, "failing values") -str_res <- paste(str_res, "should be in range", range_as_str, sep = " ") -} -return(str_res) -} -check_valid_values_categorical <- function(colname, codebook_param, column) { -column <- column[column != "."] -possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname] -possible_values_list <- str_split(possible_values_format, "/")[[1]] -possible_values_list <- lapply(possible_values_list, str_trim) -str_res <- "" -min_value <- 0 -max_value <- 0 -if (length(possible_values_list[[1]]) == 2) { -separate_range <- str_split(possible_values_list[[1]][1], "-")[[1]] -min_value <- strtoi(separate_range[1]) -max_value <- strtoi(separate_range[2]) -} else { -possible_values_list <- lapply(possible_values_list, strtoi)[[1]] -min_value <- possible_values_list[1] -max_value <- possible_values_list[length(possible_values_list) - 1] -} -failing_values <- column[column < min_value | column > max_value] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -if (number_of_failing_values == 0) { -str_res <- "No failing values" -} else { -range_as_str <- paste(min_value, "-", max_value, " (categorical)") -str_res <- paste(colname, "has", number_of_failing_values, "failing values") -str_res <- paste(str_res, "should be in range", range_as_str, sep = " ") -} -return(str_res) -} -check_valid_values_binary <- function(colname, column) { -column <- column[column != "."] -failing_values <- column[column < 0 | column > 1] -number_of_failing_values <- length(failing_values[!is.na(failing_values)]) -str_res <- "" -if (number_of_failing_values == 0) -str_res <- "No failing values" -else { -range_as_str <- "0-1 (binary)" -str_res <- paste(colname, "has", number_of_failing_values, "failing values") -str_res <- paste(str_res, "should be in range", range_as_str, sep = " ") -} -return(str_res) -} -check_valid_values <- function(valid_colnames, codebook_param){ -res <- "" -for(i in 1:(ncol(valid_colnames))){ -name <- names(valid_colnames)[i] -if (grepl("DAT", name, fixed=TRUE)){ -next -} -#if("DMRBORN" == name | grepl("DAT", name, fixed=TRUE) | grepl("ISO", name , fixed=TRUE) | grepl("BEF", name, fixed=TRUE)){ -# next -#} -column <- valid_colnames[,i] -# Esto falla si tu codebook no es mismo que new_harmon.csv -column_type <- codebook_param$Variable.type[codebook_param$Harmonised.variable.name == name] -if (is.na(column_type) ) { -variable <- paste("Variable ", name, " wrong", sep = " ") -res <- paste(res, variable , sep="\n") -next -} -result = switch( -column_type, -"Continuous"= check_valid_values_continuous(name , codebook_param , column), -"Binary"= check_valid_values_binary(name , column), -"Categorical"= check_valid_values_categorical(name, codebook_param , column), -"Calendar date" = paste("No failing values"), -"ISO country code"= paste("No failing values"), -{ -paste("some column " , column_type , sep = " ") -} -) -if (result != "No failing values"){ -res <- paste(res , result, sep="\n") -} -} -return(res) -} -data_colnames <- as.data.frame(colnames(harmonized_data)) -check_valid_columns <- check_column_names(data_colnames) -columns_not_valid <- check_valid_columns$not_colnames -valid_colnames_column <- as.data.frame(check_valid_columns$colnames) -names(valid_colnames_column) = c("valid_colnames") -valid_colnames_with_data <- subset(harmonized_data , select = valid_colnames_column$valid_colnames) -result <- "" -result<-check_valid_values(valid_colnames_with_data, codebook) -print(columns_not_valid) -cat(result) diff --git a/connection_parameters.R b/connection_parameters.R index f472a66..c817322 100755 --- a/connection_parameters.R +++ b/connection_parameters.R @@ -143,7 +143,7 @@ urls <- c( "https://opal.aulss6.veneto.it", "https://uncover.itg.be" ) - +# users <- c( "user_analisis", "user_analisis", @@ -212,6 +212,6 @@ pass <- c( "Ekfl07UUgz", "Ekfl07UUgz", "3^z4AV.)hG5~PT/]", - "Ekfl07UUgz", + "7yuvFnCTrPxPDt8@", "3^z4AV.)hG5~PT/]" ) diff --git a/remote_execute_as_admin.R b/remote_execute_as_admin.R index 900a9f3..0c695c2 100644 --- a/remote_execute_as_admin.R +++ b/remote_execute_as_admin.R @@ -1,7 +1,7 @@ # load opal library library(opalr) -o <- opal.login('pepe','7G9b!!2IyJs$', url = 'https://uncover.itg.be') +o <- opal.login('opal_admin','nK5&@WC1A13M', url = 'https://opal.aulss6.veneto.it') dir_name <- readline("Introduce the name of the directory please: ") -- 2.24.1