Commit 3d03e9b5 authored by Pepe Márquez Romero's avatar Pepe Márquez Romero

cambiando el codebook y empezando a cambiar el analisis de los valores

parent 3a586839
......@@ -4,22 +4,16 @@ dir_name <- readline("Introduce the name of the directory please: ")
setwd(dir_name)
source("required_folder_checker.R")
source("argument_hasher.R")
source("dependency_installer.R")
dep_list = c("jsonlite", "stringr","DSI","DSOpal","DSLite", "fields", "metafor", "ggplot2", "gridExtra", "data.table", "dsBaseClient", "openxlsx")
install_dependencies(dep_list)
setwd(dir_name)
#source("connection_parameters.R")
#source("necessary_functions_connection.R")
codebook <- read.csv("harmon.csv" , sep = ";")
codebook <- read.csv("new_harmon.csv" , sep = ",")
codebook_col_names <- as.data.frame(codebook$Harmonised.variable.name)
......@@ -47,18 +41,22 @@ check_column_names <- function(col_names){
str_res <- "The column names:"
valid_colnames <- c()
repeated_colnames <- c()
for(i in 1:(nrow(col_names))){
col_name <- col_names[i,1]
if(!check_valid_name(col_name)){
number_of_column <- check_valid_name(col_name)
if( number_of_column == 0){
str_res<- paste(str_res, col_name, sep=" ")
}else{
}else if (number_of_column == 1){
valid_colnames = c(valid_colnames, col_name)
}else{
repeated_colnames = c(repeated_colnames , col_name)
}
}
str_res<- paste(str_res,"are not registered in the harmonized data codebook \n", sep=" ")
new_list <- list("not_colnames" = str_res , "colnames" = valid_colnames)
new_list <- list("not_colnames" = str_res , "colnames" = valid_colnames , "repeated_colnames" = repeated_colnames)
return (new_list)
}
......@@ -66,10 +64,13 @@ check_column_names <- function(col_names){
#Test if a single variable name is valid
check_valid_name <- function(col_name){
valid <- FALSE
valid <- 0
if(col_name %in% codebook_col_names$col_names){
if(col_name %in% codebook_col_names$col_names)
valid <- TRUE
valid <- length(grep(col_name, names(harmonized_data)))
}
return (valid)
......@@ -376,7 +377,7 @@ error_message <- function(colname, invalid_values){
}
check_valid_values <- function(){
check_valid_values <- function(valid_colnames){
invalid_name_list <- c()
cannot_analyse_list <- c()
......@@ -388,37 +389,20 @@ check_valid_values <- function(){
k <- 1
for(i in 1:(nrow(valid_colnames))){
name <- names(valid_colnames_with_data)[i]
if("DMRBORN" == name | grepl("DAT",colname, fixed=TRUE) | "ISO" == name | "BEF" == name){
next
}
data_table ="empty"
if(!grepl("DMRBORN",valid_colnames[i,1], fixed=TRUE) & (!grepl("DAT",valid_colnames[i,1], fixed=TRUE)) & (!grepl("ISO",valid_colnames[i,1], fixed=TRUE))& (!grepl("BEF",valid_colnames[i,1], fixed=TRUE))){
column <- "data$"
column <- paste(column, valid_colnames[i,1], sep="")
column <- valid_colnames[,i]
tryCatch(
error = function(cnd) {
print("Unable to analyse data")
res <- FALSE
},
data_table <- as.data.frame(table(column))
)
if(data_table == "empty"){
cannot_analyse_list <- c(cannot_analyse_list,valid_colnames[i,1])
}else{
if (data_table[[1]] == "All studies failed for reasons identified below")
values <- get_values_from_quantiles(column)
else
values <- row.names(data_table)
numeric_col<- paste(valid_colnames[i,1],"_numeric", sep="")
numeric_col<- paste(valid_colnames[,i],"_numeric", sep="")
if( valid_colnames[i,1] %in% categoric_vars ){
if( name %in% categoric_vars ){
#is_numeric <- grepl("numeric",valid_colnames[i,1], fixed=TRUE)
has_numeric <- numeric_col %in% valid_colnames$`valid_data_colnames(data_colnames)`
......@@ -427,11 +411,7 @@ check_valid_values <- function(){
missing_numeric <- c(missing_numeric, valid_colnames[i,1])
if (data_table[[1]] == "All studies failed for reasons identified below"){
cannot_analyse_list <- c(cannot_analyse_list,valid_colnames[i,1])
}else if(!check_values_categoric(values,valid_colnames[i,1])){
if(!check_values_categoric(values,valid_colnames[i,1])){
print("Wrong categoric value:")
print(valid_colnames[i,1])
......@@ -458,10 +438,10 @@ check_valid_values <- function(){
}
}
}
}
missing_numeric
......@@ -505,10 +485,11 @@ columns_not_valid <- check_valid_columns$not_colnames
valid_colnames <- as.data.frame(check_valid_columns$colnames)
names(valid_colnames) = c("valid_colnames")
valid_colnames_with_data <- subset(harmonized_data , select = valid_colnames$valid_colnames)
result <- ""
result<-check_valid_values()
result<-check_valid_values(valid_colnames_with_data)
print(check_valid_columns)
#datashield.logout(connections)
cat(result)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment