survival_curve.R 5.23 KB
Newer Older
pxp9's avatar
pxp9 committed


source("dependency_installer.R")
source("required_folder_checker.R")
source("argument_hasher.R")

dep_list = c("survival", "lubridate", "survminer", "stringr", "DSI", "DSOpal", "DSLite", "fields", "hrbrthemes", "metafor", "ggplot2", "gridExtra", "data.table", "dsBaseClient")
install_dependencies(dep_list)


image_format <- ".png"

args <- commandArgs(trailingOnly = TRUE)

hospital_names <- c("HM","Princesa")
project_names <- c("RESOURCE_GUIDE","RESOURCE_GUIDE")
resource_names <- c("HM_rs", "Princesa_rs")
urls <- c("https://192.168.1.50:8844","https://192.168.1.50:8844")
users <- c("opal_admin","opal_admin")
pass <- c("5f%R!&wfbUF*7gZ14mg","5f%R!&wfbUF*7gZ14mg")

hospital_names <- hospital_names[2]
project_names <- project_names[2]
resource_names <- resource_names[2]
urls <- urls[2]
users <- users[2]
pass <- pass[2]

# project_names_o <- args[1]
# project_names <- str_split(project_names_o, ";")[[1]]
# 
# resource_names_o <- args[2]
# resource_names <- str_split(resource_names_o, ";")[[1]]
# 
# urls_o <- args[3]
# urls <- str_split(urls_o, ";")[[1]]
# 
# users_o <- args[4]
# users <- str_split(users_o, ";")[[1]]
# 
# pass_o <- args[5]
# pass <- str_split(pass_o, ";")[[1]]
# 
# hospital_name <- args[6]
# extra_filter <- args[7]




json_output <- c()


builder <- DSI::newDSLoginBuilder()

url_ctr <- 0
for(i in 1:length(urls)){
  print(paste("Connecting to Server with URL:", urls[i], sep=" "))
  builder$append(server = paste("study", url_ctr, sep=""), url = urls[i],
                 user = users[i], password = pass[i],
                 resource = paste(project_names[i], resource_names[i], sep="."),
                 driver = "OpalDriver", options="list(ssl_verifyhost=0,ssl_verifypeer=0)")
  
  url_ctr <- url_ctr+1
}

logindata <- builder$build()
connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")

datashield.assign.expr(connections, symbol = 'data', expr = quote(as.resource.data.frame(D)))
datastructure_name <- "data"
ds.colnames(x=datastructure_name, datasources= connections)

data_dim <- ds.dim(x=datastructure_name, datasources= connections)
data_dim_rows <- data_dim$`dimensions of data in combined studies`[1]
data_dim_cols <- data_dim$`dimensions of data in combined studies`[2]


get_reconstructed_population <- function(df, var, size){
  
  data_dim <- ds.dim(x=df, datasources= connections)
  data_dim_rows <- data_dim[[length(data_dim)]][1]
  data_dim_cols <- data_dim[[length(data_dim)]][2]
  
  quantile_data <- ds.quantileMean(x=paste(df, var, sep="$"), datasources = connections)
  
  est_min <- round(quantile_data[[1]])
  est_q1 <- round(quantile_data[[3]])
  est_median <- round(quantile_data[[4]])
  est_q3 <- round(quantile_data[[5]])
  est_max <- round(quantile_data[[7]])
  
  combined_mean <- quantile_data[[8]]
  
  nn <- size
  quantiles <- c(est_min, est_q1, est_median, est_q3, est_max)
  
  set.seed(1)
  reconstructed_population <- c(
    runif(nn/4,quantiles[1],quantiles[2]),
    runif(nn/4,quantiles[2],quantiles[3]),
    runif(nn/4,quantiles[3],quantiles[4]),
    runif(nn/4,quantiles[4],quantiles[5]))
  
  return(reconstructed_population)
}

ds.dataFrameSubset(df.name = datastructure_name, V1.name = "data$DSXOS_numeric", V2.name = "1", Boolean.operator = "==", newobj = "OutFilteredDEATH")
ds.dataFrameSubset(df.name = datastructure_name, V1.name = "data$DSXOS_numeric", V2.name = "0", Boolean.operator = "==", newobj = "OutFilteredALIVE")

data_dim_DEATH <- ds.dim(x="OutFilteredDEATH", datasources= connections)
data_dim_DEATH <- data_dim_DEATH[[length(data_dim_DEATH)]][1]
data_dim_ALIVE <- ds.dim(x="OutFilteredALIVE", datasources= connections)
data_dim_ALIVE <- data_dim_ALIVE[[length(data_dim_ALIVE)]][1]

reconstr_pop_time_outcome_death <- get_reconstructed_population("OutFilteredDEATH", "DATLGT", data_dim_DEATH)

df_death <- data.frame(reconstr_pop_time_outcome_death)
df_death["status"] = "death"
colnames(df_death) = c("out_time", "status")


reconstr_pop_time_outcome_alive <- get_reconstructed_population("OutFilteredALIVE", "DATLGT", data_dim_ALIVE)
df_alive <- data.frame(reconstr_pop_time_outcome_alive)
df_alive["status"] = "alive"
colnames(df_alive) = c("out_time", "status")


full_status_df <- rbind(df_alive, df_death)

filtercol <- c()
samplenum <- sample(0:100000, nrow(full_status_df), replace = T)
for( i in 1:length(samplenum) ) {
  
  if(samplenum[i] %% 2 == 0){
    filtercol[length(filtercol)+1] <- "MALE"
  }else{
    filtercol[length(filtercol)+1] <- "FEMALE"
  }
  
}

full_status_df[extra_filter] = filtercol
full_status_df["status_surv"] = 1
full_status_df[full_status_df$status == "alive" ,"status_surv"] = 0


#filename <- paste0(hospital_name, "survival_curve", sep="")
#filename <- paste(filename, "outcome", sep="_")
#filename <- paste(filename, image_format, sep="")

dir.create("./survAlberto", showWarnings = FALSE)
setwd("./survAlberto")

print("survival_curve.png")
png("survival_curve.png", width = 750, height = 500)

survplot <- ggsurvplot(
  fit = survfit(Surv(out_time, status_surv) ~ 1, data = full_status_df), 
  xlab = "Days", 
  ylab = "Overall survival probability")

survplot
dev.off()




datashield.logout(connections)