Commit 3570d2a0 authored by pxp9's avatar pxp9

scripts de harmonizacion de David

parents
empty_cell_fixer.r + add_missing_values.r
Cambia las casillas vacías, puntos, "NA", y "Unknowns" por missing, y en el "_numeric" acorde lo cambia por un 999
dora_test.r
Ejecuta la función personalizada que calcula el score derivado de las variables
full_quality_report.r
Genera un documento word con los hospitales y las variables especificadas dónde se ordenan de mayor a menor varianza entre los hosptiales
survival_curve.r
Genera una curva de supervivencia
table1_script.r + necessary_functions_table1_v2_subset.r
Muestra una tabla que muestra la información sobre los hospitales y las variables indicadas.
valid_variables_script.r
Comprueba que los valores de las variables del hospital indicado estén dentro de los rangos
(Se puede cambiar para que refleje lo que se desea obtener)
treatment_heatmap.r
Devuelve un mapa de calor que indica el porcentaje de pacientes que han recibido cada tratamiento spearados por olas.
ressourceCleaner.r
Deja los datos con Yes/No Male/Female etc
ressourceHarmonizer.r
Deriva las variables "_numeric"
\ No newline at end of file
setwd("C:/Users/victor/Documents/TFG/r-analytics-master")
codebook <- read.csv("new_harmon.csv", sep = ",")
binary <- unlist(codebook[codebook["Variable.type"] == "Binary",]["Harmonised.variable.name"])
categorical <- unlist(codebook[codebook["Variable.type"] == "Categorical",]["Harmonised.variable.name"])
continuous <- unlist(codebook[codebook["Variable.type"] == "Continuous",]["Harmonised.variable.name"])
categoric_vars <- c(binary, categorical)
replace_with_Missing_categoric <- function(column){
for (i in 1:length(column)){
x <- column[i]
if (is.na(x)){
x <- "Missing"
}else{
if(x == "" | x == "NA" | x == "Unknown")
x <- "Missing"
}
column[i] <- x
}
return (column)
}
replace_with_Missing_num_categoric <- function(column){
for (i in 1:length(column)){
x <- column[i]
if (is.na(x)){
x <- 9999
}else{
if(x == "" | x == "NA" | x == "Unknown")
x <- 9999
}
column[i] <- x
}
return (column)
}
replace_with_Missing_continuous <- function(column){
for (i in 1:length(column)){
x <- column[i]
if (is.na(x)){
x <- ""
}else{
if(x == "NA" | x == "Unknown")
x <- ""
}
column[i] <- x
}
return (column)
}
add_missing_values <- function(path_to_file){
setwd("C:/Users/victor/Documents/TFG/r-analytics-master/ressources/current_db")
data <- read.csv(path_to_file, sep = ",")
data_colnames <- colnames(data)
num_categoric <- data_colnames[grepl("_numeric", data_colnames)]
aux <- length(data_colnames)
for (i in 1:aux){
colname <- data_colnames[i]
progress <- round((100*i/aux),digits = 0)
print(paste(progress,"%", sep = ""))
if(colname %in% categoric_vars){
column <- unlist(data[colname])
data[colname] <- replace_with_Missing_categoric(column)
}
if(colname %in% num_categoric){
column <- unlist(data[colname])
data[colname] <- replace_with_Missing_num_categoric(column)
}
if(colname %in% continuous){
column <- unlist(data[colname])
data[colname] <- replace_with_Missing_continuous(column)
}
}
return(data)
}
rm(list=ls())
setwd("C:/Users/victor/Documents/TFG/r-analytics-master")
source("dependency_installer.R")
source("connection_parameters.R")
source("necessary_functions_graph.R")
dep_list = c( "stringr","DSI","DSOpal","DSLite", "fields", "metafor", "ggplot2", "gridExtra", "data.table", "dsBaseClient")
install_dependencies(dep_list)
dir.create("./admissions_analysis", showWarnings = FALSE)
setwd("./admissions_analysis")
aux <- connect()
connections <- aux[[1]]
inp <- aux[[2]]
obtain_admissions_graph(connections,inp)
hospital_names <- c( #Añadir Los de Baskent y sacrocuore
"Princesa",
"CIPH",
"UMF_Iasis",
"SMUC",
"HM",
"Porto",
"FJD",
"Coimbra",
"UNAV",
"TU",
"Ankara Impatient",
"Konya Impatient",
"Istambul Impatient",
"Izmir Impatient",
"Alanya Impatient",
"Adana Impatient",
"Ankara Outpatient",
"Konya Outpatient",
"Istambul Outpatient",
"Izmir Outpatient",
"Alanya Outpatient",
"Sacrocuore Emergency",
"Sacrocuore Employees",
"Sacrocuore Verona",
"Sacrocuore Isaric",
"TUDublin",
"UMF_Cluj",
"UdeA",
"Inantro",
"UNSA",
"UZA"
)
project_names <- c(
"FIBHULP",
"CIPH_unCoVer",
"umfiasi",
"SMUC",
"FiHM",
"uncover-up",
"IISFJD",
"IPC",
"unCOVer-UNAV",
"TU_Uncover",
"BU",
"BU",
"BU",
"BU",
"BU",
"BU",
"BU",
"BU",
"BU",
"BU",
"BU",
"S_uncover",
"S_uncover",
"S_uncover",
"S_uncover",
"TUDublin",
"UMF_Cluj",
"INS_Data",
"INANTRO",
"UnCoVer-BiH-Final",
"UZA"
)
resource_names <- c(
"Harmonized_variables_2",
"CIPH_numeric_derivated",
"20220719_HarmonisedUMFIasi",
"SMUC_resource",
"20220720_HarmonisedHM",
"Resource_derived",
"IISFJD_Harmonized_1",
"IPC_Harmonized",
"UNAV_rsc",
"TU_Harmonized",
"inpatient_ankara",
"inpatient_konya",
"inpatient_istanbul",
"inpatient_izmir",
"inpatient_alanya",
"inpatient_adana",
"outpatient_ankara",
"outpatient_konya",
"outpatient_istanbul",
"outpatient_izmir",
"outpatient_alanya",
"emergency",
"employees",
"verona",
"isaric",
"TUDublin_harmonised",
"Romania",
"colombia_all",
"Inantro",
"20220722_HarmonizedUNSA",
"UZA_prelim"
)
urls <- c(
"https://192.168.1.200:8001",
"https://192.168.1.200:8002",
"https://192.168.1.200:8003",
"https://192.168.1.200:8006",
"https://192.168.1.50:9002",
"https://192.168.1.102",
"https://uncover.itg.be",
"https://uncover.itg.be",
"https://192.168.1.50:9001",
"https://192.168.1.200:8004",
"https://192.168.1.101:8443",
"https://192.168.1.101:8443",
"https://192.168.1.101:8443",
"https://192.168.1.101:8443",
"https://192.168.1.101:8443",
"https://192.168.1.101:8443",
"https://192.168.1.101:8443",
"https://192.168.1.101:8443",
"https://192.168.1.101:8443",
"https://192.168.1.101:8443",
"https://192.168.1.101:8443",
"https://192.168.1.50:8890",
"https://192.168.1.50:8890",
"https://192.168.1.50:8890",
"https://192.168.1.50:8890",
"https://uncover.itg.be",
"https://192.168.1.200:8005",
"https://fenfisdi.udea.edu.co/opal",
"https://192.168.1.200:8007",
"https://192.168.1.200:8008",
"https://uncover.itg.be"
)
users <- c(
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"emertens",
"emertens",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"emertens",
"user_analisis",
"user_analisis",
"user_analisis",
"user_analisis",
"emertens"
)
pass <- c(
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"3^z4AV.)hG5~PT/]",
"3^z4AV.)hG5~PT/]",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"3^z4AV.)hG5~PT/]",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"Ekfl07UUgz",
"3^z4AV.)hG5~PT/]"
)
# This function receives a list of packages to be installed before the execution of a script.
# For each package provided, it verifies that it is not installed already. If so, it proceeds to
# install it.
# ToDo: exception control for unexistent/misspelled packages.
install_dependencies <- function(dep_list) {
# Deprecated since executing server is Ubuntu.
#if(Sys.info()["sysname"] == "Windows"){
# dep_list[length(dep_list)+1] = "Rtools"
#}
for(p in dep_list){
if(require(p, character.only = T) == FALSE){
if(p == "dsBaseClient"){
library(remotes)
install_github("datashield/dsBaseClient", dependencies = TRUE)
}else{
if(p == "DSI"){
library(remotes)
install_github('datashield/DSI', ref = '1.3.3', dependencies = TRUE)
}else{
install.packages(p, dependencies = TRUE, repos = "https://cran.us.r-project.org")
}
}
}
if(p != "Rtools"){
library(p, character.only = T)
}
}
}
rm(list=ls())
setwd("C:/Users/victor/Documents/TFG/r-analytics-master")
source("dependency_installer.R")
dep_list = c("magrittr","officer","dplyr","stringr","DSI","DSOpal","DSLite","dsBaseClient")
install_dependencies(dep_list)
#,"DSI","DSOpal","DSLite"
source("connection_parameters.R")
source("necessary_functions_connection.R")
auxConnections <- connect()
connections <- auxConnections[[1]]
inp <- auxConnections[[2]]
calltext <- call("DORA_scoresDS", "data")
newobj <- "DORA_table"
datashield.assign(connections, newobj, calltext)
ds.colnames("DORA_table")
ds.table("DORA_table$DORA_class")
datashield.logout(connections)
setwd("C:/Users/victor/Documents/TFG/r-analytics-master")
source("add_missing_values.R")
list_of_files <- list.files("C:/Users/victor/Documents/TFG/r-analytics-master/ressources/current_db")[-6]
#ist_of_files <- "Harmonized_LIH.csv"
for (i in 1:length(list_of_files)) {
file_name <- list_of_files[i]
print(paste ("Fixing file:", file_name))
setwd("C:/Users/victor/Documents/TFG/r-analytics-master/ressources/current_db")
ready_file <- add_missing_values(file_name)
setwd("C:/Users/victor/Documents/TFG/r-analytics-master/ressources/current_db/ready")
write.csv(x=ready_file, file = file_name, row.names = FALSE)
}
This diff is collapsed.
rm(list=ls())
setwd("C:/Users/victor/Desktop/TFG/r-analytics-master")
source("required_folder_checker.R")
source("argument_hasher.R")
source("dependency_installer.R")
source("connection_parameters.R")
source("necessary_function_table1s.R")
dep_list = c("jsonlite", "stringr","DSI","DSOpal","DSLite", "fields", "metafor", "ggplot2", "gridExtra", "data.table", "dsBaseClient")
install_dependencies(dep_list)
dir.create("./demographic_analisis", showWarnings = FALSE)
setwd("./demographic_analisis")
aux <- connect()
connections <- aux[[1]]
inp <- aux[[2]]
hasOutput <- checkHasOutput(inp)
name <- getHname(inp)
png(paste(name,".png",sep=""),width=800, height=400)
res <- obtainData(connections, hasOutput, inp)
print(res)
dev.off()
datashield.logout(connections)
res
This diff is collapsed.
connect <- function (){
cat("\n\n\n----------------------------------------------------------------------------------------------------------")
cat("\nPlease select the number corresponding to the hospital you want to analyse, if you want to do a combined analysis select multiple hospitals")
cat("\n
Princesa -> 1
CIPH -> 2
UMF_Iasis -> 3
SMUC -> 4
HM -> 5
Porto -> 6
FJD -> 7
Coimbra -> 8
UNAV -> 9
TU -> 10
Baskent:
Ankara Impatient -> 11
Konya Impatient -> 12
Istambul Impatient -> 13
Izmir Impatient -> 14
Alanya Impatient -> 15
Adana Impatient -> 16
Ankara Outpatient -> 17
Konya Outpatient -> 18
Istambul Outpatient -> 19
Izmir Outpatient -> 20
Alanya Outpatient -> 21
Sacrocuore:
Emergency database -> 22
Employees database -> 23
Verona database -> 24
Isaric -> 25
TU Dublin -> 26
UMF Cluj -> 27
UdeA -> 28
Inantro -> 29
UNSA -> 30
UZA -> 31
")
inp <- scan()
builder <- DSI::newDSLoginBuilder()
hospital_names <- hospital_names[inp]
project_names <- project_names[inp]
resource_names <- resource_names[inp]
urls <- urls[inp]
users <- users[inp]
pass <- pass[inp]
print(hospital_names)
print(project_names)
print(resource_names)
print(urls)
print(users)
print(pass)
url_ctr <- 0
for(i in 1:length(urls)){
print(paste("Connecting to Server with URL:", urls[i], sep=" "))
builder$append(server = hospital_names[i], url = urls[i],
user = users[i], password = pass[i],
resource = paste(project_names[i], resource_names[i], sep="."),
driver = "OpalDriver", options="list(ssl_verifyhost=0,ssl_verifypeer=0)")
url_ctr <- url_ctr+1
}
logindata <- builder$build()
connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D", failSafe = TRUE)
datashield.assign.expr(connections, symbol = 'data', expr = quote(as.resource.data.frame(D)))
#datashield.assign.expr(connections, symbol = 'auxDf', expr = quote(as.resource.data.frame(D)))
print("Successful connection to servers.")
return(list(connections,inp))
}
This diff is collapsed.
This diff is collapsed.
rm(list=ls())
setwd("C:/Users/Victor/Documents/TFG/r-analytics-master/ressources")
setwd(("C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/new_res_baskent/outpatient"))
#Cambiarlo por el nombre del ressource que se desea limpiar
hospital <- data.frame(read.csv("konya_outpatient.csv", sep=","))
hospital <- hospital %>% select(-contains("numeric"))
# hospital["NOT.HARMONISED"] <- NULL
#
# names <- colnames(hospital)
# for (i in 1:length(names)){
#
# if(grepl("NOT.HARMONISED", names[i])){
# hospital[names[i]] <- NULL
# print(paste("quito ", names[i]))
# }
#
# }
# hospital <- hospital[-1,]
setwd("C:/Users/Victor/Documents/TFG/r-analytics-master/harmonised_data")
ComAndRF <- data.frame(read.csv("Com&RF.csv", sep=","))[1:64,1:5]
Complications <- data.frame(read.csv("Complications.csv", sep=";"))[1:20,1:5]
Dates <- data.frame(read.csv("Dates.csv", sep=";"))[1:12,1:5]
Demographics <- data.frame(read.csv("Demographics.csv", sep=";"))[1:9,1:5]
Home_med <- data.frame(read.csv("Home_med.csv", sep=";"))[1:13,1:5]
Imaging_data <- data.frame(read.csv("Imaging_data.csv", sep=";"))[1:11,1:5]
Labo <- data.frame(read.csv("Labo.csv", sep=";"))[1:143,1:5]
SiAndSympt <- data.frame(read.csv("Si&Sympt.csv", sep=";"))[1:50,1:5]
Treatment <- data.frame(read.csv("Treatment.csv", sep=";"))[1:32,1:5]
LifestyleAndDiet <- data.frame(read.csv("Lifestyle&Diet.csv", sep=";"))[1:165,1:5]
harmonised_data <- rbind(SiAndSympt,ComAndRF)
harmonised_data <- rbind(harmonised_data,Treatment)
harmonised_data <- rbind(harmonised_data,Dates)
harmonised_data <- rbind(harmonised_data,Demographics)
harmonised_data <- rbind(harmonised_data,Home_med)
harmonised_data <- rbind(harmonised_data,Imaging_data)
harmonised_data <- rbind(harmonised_data,Complications)
harmonised_data <- rbind(harmonised_data,Labo)
harmonised_data <- rbind(harmonised_data,LifestyleAndDiet)
rm(list=c("SiAndSympt",
"Complications",
"ComAndRF",
"Dates",
"Demographics",
"Home_med",
"Imaging_data",
"Complications",
"Labo",
"LifestyleAndDiet"))
noYesValues <- subset(harmonised_data, harmonised_data$Harmonised.data.format.unit == "No/Yes / missing" | harmonised_data$Harmonised.data.format.unit == "No/Yes / Missing")
noYesValues <- noYesValues$Harmonised.variable.name
noYesValues <- c(noYesValues,"CSXCOTAB","CSXCOTAG","IMDIT","RFXHIV_RFXAIDS", "SMXASAH", "CMXATH", "CMXNO")
categoric_vars = c("CMXATH", "CMXNO", "SMXASAH","CSXCOTAB","CSXCOTAG","IMDIT","RFXHIV_RFXAIDS","DMRGENDR", "DMRBORN", "DMRRETH1", "DMROCCU", "DMRHREDU", "DSXOS", "DSXHO", "DSXIC", "TRXAV","TRXRIB","TRXLR","TRXRM","TRXIA","TRXIB","TRXCH","TRXAB","TRXCS","TRXHEP","TRXAF","TRXCP","TRXOT","TRXECM","TRXIV","TRXNIV","TRXNO","TRXOX","TRXRR","TRXTR","TRXVA","TRXPE","TRXPV","TRXIT","TRXNMB","TRXAC","TRXINA","TRXIS","TRXIM","TRXVC","TRXVD","TRXZN", "CSXCOT","CSXCTR","SMXASAH","SMXFEA","SMXCOA","SMXSTA","SMXSBA","SMXRNA","SMXMYA","SMXARA","SMXCPA","SMXAPA","SMXINA","SMXNAA","SMXDIA","SMXFAA","SMXHEA","SMXCNA","SMXACA","SMXSLA","SMXTLA","SMXSYA","SMXWHA","SMXLYA","SMXANA","SMXIWA","SMXSRA","SMXBLA","CMXPRG","CMXCVD","CMXCMP","CMXHT","CMXDI","CMXCKD","CMXCLD","CMXCPD","CMXASM","CMXCND","CMXRHE","CMXCCI","CMXCBD","CMXDE","CMXPU","CMXST","CMXLY","CMXAP","RFXSM","RFXFSM","RFXOB","RFXTB","RFXIMD","RFXHIV","RFXAIDS","RFXUI","RFXHC","RFXONC","RFXMN", "HMRACI","HMRARB","HMRAHO","HMRNS","HMROS","HMRCS","HMRIS","HMRAV","HMRAB","HMRCOV","IMDXCT","IMDXCTCR","IMDXCTTE","IMDXCTAB","IMDXXR","IMDXPN", "COXRD","COXAR","COXPM","COXMOD","COXPT","COXEC","COXSH","COXIO","COXPE","COXST","COXDIC","COXRIO","COXKF","COXHF","COXBC")
personalized <- c("DMRGENDR", "DSXOS", "CSXCTR", "SMXFEA", "CSXCOT")
is_number <- function(x){
res <- FALSE
if(length(x)!=0){
x <- str_replace(x,",",".")
aux <- as.numeric(x)
if(!is.na(aux))
res <- TRUE
}
return(res)
}
replaceNoYesValues <- function(x){
#Replace the value with Yes or No
if(is.na(x))
x <- ""
else if (x==0 | x =="No" | x ==" No" | x =="NO")
x <- "No"
else if (x==1 | x == "Yes" | x == " Yes" | x=="SI")
x <- "Yes"
else
x <- ""
return (x)
}
fixNonCategoric <- function(x){
if(!is_number(x)){
x <- ""
}else{
x <- str_replace(x,",",".")
}
return(x)
}
personalizedFun <- function(x, colname){
if(colname == "DMRGENDR"){
if(is.na(x))
x <- ""
else if(x == 1 | x == "F"| x == "f" | x== "Female")
x <- "Female"
else if (x == 0 | x =="M" | x == "m" | x== "Male")
x <- "Male"
}
if(colname == "CSXCTR"){
if(is.na(x))
x <- ""
else if(x == 1 | x == "positive" | x=="PositivM" | x=="POSITIVM" | x =="POS?T?VM" | x =="NMGAT?VM")
x <- "Positive"
else if (x == 0 | x == "negative" | x=="negativM" | x=="NMGATIVM" | x=="NAGATIVM" | x =="NMGATIV" | x =="negativeM" | x == "NAGAT?VM" | x =="NMGAT?V")
x <- "Negative"
}
if(colname == "SMXFEA"){
if(is.na(x))
x <- ""
else if(x == 1)
x <- "Yes"
else if (x == 0)
x <- "No"
else if (x == ".")
x <- ""
}
if(colname == "DMRRETH1"){
if(is.na(x))
x <- ""
else if(x ==1)
x <- "Asian"
else if (x == 2)
x <- "Black"
else if (x == 3)
x <- "Hispanic"
else if (x == 4)
x <- "White"
else if (x == 5)
x <- "Multiracial"
else if (x == 6)
x <- "Other"
}
if(colname == "DMROCCU"){
if(is.na(x))
x <- ""
else if(x ==1)
x <- "Unemployed"
else if (x == 2)
x <- "Student"
else if (x == 3)
x <- "Employed"
else if (x == 4)
x <- "Self-employed"
else if (x == 5)
x <- "Retired"
else if (x == 6)
x <- ""
}
if(colname == "DMRHREDU"){
if(is.na(x))
x <- ""
else if(x ==1)
x <- "High School"
else if (x == 2)
x <- "Bachelors"
else if (x == 3)
x <- "Postgraduate"
else if (x == 4)
x <- "Other"
}
if(colname =="DSXOS"){
if(is.na(x))
x <- ""
else if (x==0 | x == "Recovered")
x <- "Recovered"
else if (x==1 | x == "Deceased")
x <- "Deceased"
else if (x==2 | x == "Transferred")
x <- "Transferred"
else
x <- ""
}
if(colname =="CSXCOT"){
if(is.na(x))
x <- ""
else if (x==1 )
x <- "PCR"
else if (x==2 )
x <- "antigen"
else if (x==3 )
x <- "other"
else
x <- ""
}
return(x)
}
dotToBar <- function (x){
if (grepl(".", x, fixed = TRUE))
res <- format(as.Date(x, format = "%d.%m.%Y"), "%d/%m/%Y")
else res <- x
return(res)
}
rm(newDf)
newDf <- hospital
names <- colnames(hospital)
for (j in 1:ncol(hospital)){
percentage <- trunc(j/ncol(hospital)*100)
mes <- paste(toString(percentage),"% completed", sep="")
print(mes)
print(names[j])
for(i in 1:nrow(hospital)){
if(names[j] %in% noYesValues){
newDf[i,j] <- replaceNoYesValues(hospital[i,j])
}else if(!(names[j] %in% categoric_vars) & names[j] != "DMRBORN" & !grepl("DAT",names[j], fixed=TRUE)){
newDf[i,j] <- fixNonCategoric(hospital[i,j])
}
if(names[j] %in% personalized){
newDf[i,j] <- personalizedFun(hospital[i,j],names[j])
}
if (is.na(hospital[i,j]))
newDf[i,j] <- ""
else if (hospital[i,j] == ".")
newDf[i,j] <- ""
}
}
setwd("C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/newRessources")
setwd(("C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/new_res_baskent/outpatient/clean"))
write.csv(x=newDf, file = "konya_outpatient.csv", row.names = FALSE)
rm(list=ls())
setwd("C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/newRessources")
setwd(("C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/new_res_baskent/outpatient/clean"))
hospital <- data.frame(read.csv("konya_outpatient.csv", sep=","))
setwd("C:/Users/Victor/Documents/TFG/r-analytics-master/harmonised_data")
ComAndRF <- data.frame(read.csv("Com&RF.csv", sep=","))
Complications <- data.frame(read.csv("Complications.csv", sep=";"))[1:20,1:5]
Dates <- data.frame(read.csv("Dates.csv", sep=";"))[1:12,1:5]
Demographics <- data.frame(read.csv("Demographics.csv", sep=";"))[1:9,1:5]
Home_med <- data.frame(read.csv("Home_med.csv", sep=";"))[1:13,1:5]
Imaging_data <- data.frame(read.csv("Imaging_data.csv", sep=";"))[1:11,1:5]
Labo <- data.frame(read.csv("Labo.csv", sep=";"))[1:143,1:5]
SiAndSympt <- data.frame(read.csv("Si&Sympt.csv", sep=";"))[1:50,1:5]
Treatment <- data.frame(read.csv("Treatment.csv", sep=";"))[1:32,1:5]
LifestyleAndDiet <- data.frame(read.csv("Lifestyle&Diet.csv", sep=";"))[1:165,1:5]
harmonised_data <- rbind(SiAndSympt,ComAndRF)
harmonised_data <- rbind(harmonised_data,Treatment)
harmonised_data <- rbind(harmonised_data,Dates)
harmonised_data <- rbind(harmonised_data,Demographics)
harmonised_data <- rbind(harmonised_data,Home_med)
harmonised_data <- rbind(harmonised_data,Imaging_data)
harmonised_data <- rbind(harmonised_data,Complications)
harmonised_data <- rbind(harmonised_data,Labo)
harmonised_data <- rbind(harmonised_data,LifestyleAndDiet)
rm(list=c("SiAndSympt",
"Complications",
"ComAndRF",
"Dates",
"Demographics",
"Home_med",
"Imaging_data",
"Complications",
"Labo",
"LifestyleAndDiet"))
noYesValues <- subset(harmonised_data, harmonised_data$Harmonised.data.format.unit == "No/Yes / missing" | harmonised_data$Harmonised.data.format.unit == "No/Yes / Missing")
noYesValues <- noYesValues$Harmonised.variable.name
noYesValues <- c(noYesValues,"CSXCOTAB","CSXCOTAG","IMDIT","RFXHIV_RFXAIDS", "SMXASAH", "CMXATH", "CMXNO","SMXFEA")
categoric_vars = c("CMXATH", "CMXNO", "SMXASAH","CSXCOTAB","CSXCOTAG","IMDIT","RFXHIV_RFXAIDS","DMRGENDR", "DMRBORN", "DMRRETH1", "DMROCCU", "DMRHREDU", "DSXOS", "DSXHO", "DSXIC", "TRXAV","TRXRIB","TRXLR","TRXRM","TRXIA","TRXIB","TRXCH","TRXAB","TRXCS","TRXHEP","TRXAF","TRXCP","TRXOT","TRXECM","TRXIV","TRXNIV","TRXNO","TRXOX","TRXRR","TRXTR","TRXVA","TRXPE","TRXPV","TRXIT","TRXNMB","TRXAC","TRXINA","TRXIS","TRXIM","TRXVC","TRXVD","TRXZN", "CSXCOT","CSXCTR","SMXASAH","SMXFEA","SMXCOA","SMXSTA","SMXSBA","SMXRNA","SMXMYA","SMXARA","SMXCPA","SMXAPA","SMXINA","SMXNAA","SMXDIA","SMXFAA","SMXHEA","SMXCNA","SMXACA","SMXSLA","SMXTLA","SMXSYA","SMXWHA","SMXLYA","SMXANA","SMXIWA","SMXSRA","SMXBLA","CMXPRG","CMXCVD","CMXCMP","CMXHT","CMXDI","CMXCKD","CMXCLD","CMXCPD","CMXASM","CMXCND","CMXRHE","CMXCCI","CMXCBD","CMXDE","CMXPU","CMXST","CMXLY","CMXAP","RFXSM","RFXFSM","RFXOB","RFXTB","RFXIMD","RFXHIV","RFXAIDS","RFXUI","RFXHC","RFXONC","RFXMN", "HMRACI","HMRARB","HMRAHO","HMRNS","HMROS","HMRCS","HMRIS","HMRAV","HMRAB","HMRCOV","IMDXCT","IMDXCTCR","IMDXCTTE","IMDXCTAB","IMDXXR","IMDXPN", "COXRD","COXAR","COXPM","COXMOD","COXPT","COXEC","COXSH","COXIO","COXPE","COXST","COXDIC","COXRIO","COXKF","COXHF","COXBC")
personalized <- c("DMRGENDR", "DMRRETH1", "DMROCCU", "DMRHREDU", "DSXOS")
is_number <- function(x){
res <- FALSE
if(length(x)!=0){
x <- str_replace(x,",",".")
aux <- as.numeric(x)
if(!is.na(aux))
res <- TRUE
}
return(res)
}
fixNonCategoric <- function(x){
if(!is_number(x)){
x <- ""
}
return(x)
}
getNumericValue <- function (x,colname) {
if(colname %in% noYesValues){
if(is.na(x))
x <- ""
else if (x == "No")
x <- 0
else if (x == "Yes")
x <- 1
}
if(colname == "DMRGENDR"){
if(x == "Female")
x <- 1
else if (x == "Male")
x <- 0
}
if(colname == "DSXOS"){
if(is.na(x))
x<- ""
else if (x == "Missing")
x <- ""
else if (x == "Recovered")
x <- 0
else if (x == "Deceased")
x <- 1
else if (x == "Transferred")
x <- 2
}
if(colname == "DMRRETH1"){
if(is.na(x))
x <- ""
else if(x =="Asian")
x <- 1
else if (x == "Black")
x <- 2
else if (x == "Hispanic")
x <- 3
else if (x == "White")
x <- 4
else if (x == "Multiracial")
x <- 5
else if (x == "Other")
x <- 6
}
if(colname == "DMROCCU"){
if(is.na(x))
x <- ""
else if(x == "Unemployed")
x <- 1
else if (x == "Student")
x <- 2
else if (x == "Employed")
x <- 3
else if (x == "Self-employed")
x <- 4
else if (x == "Retired")
x <- 5
}
if(colname == "DMRHREDU"){
if(is.na(x))
x <- ""
else if(x =="High School")
x <- 1
else if (x == "Bachelors")
x <- 2
else if (x == "Postgraduate")
x <- 3
else if (x == "Other")
x <- 4
}
if(colname =="CSXCOT"){
if(is.na(x))
x <- ""
else if (x=="PCR" )
x <- 1
else if (x=="antigen" )
x <- 2
else if (x=="other" )
x <- 3
else
x <- ""
}
return(x)
}
noNa <- function(x){
if(is.na(x))
x <- ""
return(x)
}
rm(newDf)
newDf <- hospital
names <- colnames(hospital)
list_numeric <- c()
for(k in 1:length(names)){
mes <- paste(names[k], "_numeric",sep ="")
if(names[k] %in% noYesValues){
list_numeric <- c(list_numeric,mes)
}else if(names[k] %in% personalized){
list_numeric <- c(list_numeric,mes)
}
}
numericDf <- data.frame(matrix(NA, nrow = nrow(hospital), ncol = length(list_numeric)))
colnames(numericDf) <- list_numeric
newDf<-bind_cols(newDf,numericDf)
#newDf[,"DMRGENDR_numeric"] <- NA
for (j in 1:ncol(hospital)){
percentage <- trunc(j/ncol(hospital)*100)
mes <- paste(toString(percentage),"% completed", sep="")
print(mes)
numeric_col <- paste(names[j], "_numeric", sep="")
print(names[j])
for(i in 1:nrow(hospital)){
if(i %% 10000 == 0)
print(i)
if(numeric_col %in% list_numeric){
newDf[i,numeric_col] <- getNumericValue(hospital[i,j],names[j])
}
if(is.na(hospital[i,j]))
newDf[i,j] <- ""
}
}
setwd("C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/newRessources/Numeric_derived")
setwd(("C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/new_res_baskent/outpatient/clean/harmonized"))
write.csv(x=newDf, file = "konya_outpatient.csv", row.names = FALSE)
source("dependency_installer.R")
source("required_folder_checker.R")
source("argument_hasher.R")
dep_list = c("survival", "lubridate", "survminer", "stringr", "DSI", "DSOpal", "DSLite", "fields", "hrbrthemes", "metafor", "ggplot2", "gridExtra", "data.table", "dsBaseClient")
install_dependencies(dep_list)
image_format <- ".png"
args <- commandArgs(trailingOnly = TRUE)
hospital_names <- c("HM","Princesa")
project_names <- c("RESOURCE_GUIDE","RESOURCE_GUIDE")
resource_names <- c("HM_rs", "Princesa_rs")
urls <- c("https://192.168.1.50:8844","https://192.168.1.50:8844")
users <- c("opal_admin","opal_admin")
pass <- c("5f%R!&wfbUF*7gZ14mg","5f%R!&wfbUF*7gZ14mg")
hospital_names <- hospital_names[2]
project_names <- project_names[2]
resource_names <- resource_names[2]
urls <- urls[2]
users <- users[2]
pass <- pass[2]
# project_names_o <- args[1]
# project_names <- str_split(project_names_o, ";")[[1]]
#
# resource_names_o <- args[2]
# resource_names <- str_split(resource_names_o, ";")[[1]]
#
# urls_o <- args[3]
# urls <- str_split(urls_o, ";")[[1]]
#
# users_o <- args[4]
# users <- str_split(users_o, ";")[[1]]
#
# pass_o <- args[5]
# pass <- str_split(pass_o, ";")[[1]]
#
# hospital_name <- args[6]
# extra_filter <- args[7]
json_output <- c()
builder <- DSI::newDSLoginBuilder()
url_ctr <- 0
for(i in 1:length(urls)){
print(paste("Connecting to Server with URL:", urls[i], sep=" "))
builder$append(server = paste("study", url_ctr, sep=""), url = urls[i],
user = users[i], password = pass[i],
resource = paste(project_names[i], resource_names[i], sep="."),
driver = "OpalDriver", options="list(ssl_verifyhost=0,ssl_verifypeer=0)")
url_ctr <- url_ctr+1
}
logindata <- builder$build()
connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
datashield.assign.expr(connections, symbol = 'data', expr = quote(as.resource.data.frame(D)))
datastructure_name <- "data"
ds.colnames(x=datastructure_name, datasources= connections)
data_dim <- ds.dim(x=datastructure_name, datasources= connections)
data_dim_rows <- data_dim$`dimensions of data in combined studies`[1]
data_dim_cols <- data_dim$`dimensions of data in combined studies`[2]
get_reconstructed_population <- function(df, var, size){
data_dim <- ds.dim(x=df, datasources= connections)
data_dim_rows <- data_dim[[length(data_dim)]][1]
data_dim_cols <- data_dim[[length(data_dim)]][2]
quantile_data <- ds.quantileMean(x=paste(df, var, sep="$"), datasources = connections)
est_min <- round(quantile_data[[1]])
est_q1 <- round(quantile_data[[3]])
est_median <- round(quantile_data[[4]])
est_q3 <- round(quantile_data[[5]])
est_max <- round(quantile_data[[7]])
combined_mean <- quantile_data[[8]]
nn <- size
quantiles <- c(est_min, est_q1, est_median, est_q3, est_max)
set.seed(1)
reconstructed_population <- c(
runif(nn/4,quantiles[1],quantiles[2]),
runif(nn/4,quantiles[2],quantiles[3]),
runif(nn/4,quantiles[3],quantiles[4]),
runif(nn/4,quantiles[4],quantiles[5]))
return(reconstructed_population)
}
ds.dataFrameSubset(df.name = datastructure_name, V1.name = "data$DSXOS_numeric", V2.name = "1", Boolean.operator = "==", newobj = "OutFilteredDEATH")
ds.dataFrameSubset(df.name = datastructure_name, V1.name = "data$DSXOS_numeric", V2.name = "0", Boolean.operator = "==", newobj = "OutFilteredALIVE")
data_dim_DEATH <- ds.dim(x="OutFilteredDEATH", datasources= connections)
data_dim_DEATH <- data_dim_DEATH[[length(data_dim_DEATH)]][1]
data_dim_ALIVE <- ds.dim(x="OutFilteredALIVE", datasources= connections)
data_dim_ALIVE <- data_dim_ALIVE[[length(data_dim_ALIVE)]][1]
reconstr_pop_time_outcome_death <- get_reconstructed_population("OutFilteredDEATH", "DATLGT", data_dim_DEATH)
df_death <- data.frame(reconstr_pop_time_outcome_death)
df_death["status"] = "death"
colnames(df_death) = c("out_time", "status")
reconstr_pop_time_outcome_alive <- get_reconstructed_population("OutFilteredALIVE", "DATLGT", data_dim_ALIVE)
df_alive <- data.frame(reconstr_pop_time_outcome_alive)
df_alive["status"] = "alive"
colnames(df_alive) = c("out_time", "status")
full_status_df <- rbind(df_alive, df_death)
filtercol <- c()
samplenum <- sample(0:100000, nrow(full_status_df), replace = T)
for( i in 1:length(samplenum) ) {
if(samplenum[i] %% 2 == 0){
filtercol[length(filtercol)+1] <- "MALE"
}else{
filtercol[length(filtercol)+1] <- "FEMALE"
}
}
full_status_df[extra_filter] = filtercol
full_status_df["status_surv"] = 1
full_status_df[full_status_df$status == "alive" ,"status_surv"] = 0
#filename <- paste0(hospital_name, "survival_curve", sep="")
#filename <- paste(filename, "outcome", sep="_")
#filename <- paste(filename, image_format, sep="")
dir.create("./survAlberto", showWarnings = FALSE)
setwd("./survAlberto")
print("survival_curve.png")
png("survival_curve.png", width = 750, height = 500)
survplot <- ggsurvplot(
fit = survfit(Surv(out_time, status_surv) ~ 1, data = full_status_df),
xlab = "Days",
ylab = "Overall survival probability")
survplot
dev.off()
datashield.logout(connections)
rm(list=ls())
setwd("C:/Users/victor/Documents/TFG/r-analytics-master")
#Obtain the data from the codebook
codebook <- read.csv("harmon.csv", sep = ";")
codebook <- codebook[,c(1,3,4)]
colnames(codebook) <- c("variable", "description", "unit")
source("dependency_installer.R")
source("connection_parameters_aux.R")
source("necessary_functions_table1_v2_subset.R")
dep_list = c("DSI","DSOpal","DSLite", "dsBaseClient")
install_dependencies(dep_list)
dir.create("./table_1", showWarnings = FALSE)
setwd("./table_1")
#Create the connection
auxConnections <- connect()
connections <- auxConnections[[1]]
inp <- auxConnections[[2]]
#Only do this if you want to subset the data for a specific value of a categoric variable
apply_filters()
#Select the variables to be analized (Add the ones you consider relevant and remove the ones that you do not)
varToAnalize <- c("DMRGENDR","DMRAGEYR","DSXOS", "RFXSM", "TRXAV", "CMXCVD", "RFXOB")
varToAnalize <- c("DMRGENDR","DMRAGEYR")
table1 <- obtain_table1(connections, inp, varToAnalize)
#datashield.logout(connections)
library(gridExtra)
png("table1TFGFiltro.png", height = 30*nrow(table1), width = 150*ncol(table1))
grid.table(table1)
dev.off()
rm(list=ls())
library("ggplot2")
setwd("C:/Users/Victor/Documents/TFG/r-analytics-master/harmonised_data")
Treatments <- data.frame(read.csv("Treatment.csv", sep=";"))[1:32,1:5]
descriptions <- Treatments[, c(1,3)]
rownames(descriptions) <- descriptions$Harmonised.variable.name
Treatments <- Treatments$Harmonised.variable.name
setwd("C:/Users/victor/Documents/TFG/r-analytics-master")
source("dependency_installer.R")
source("connection_parameters.R")
source("necessary_functions_treatments_heatmap.R")
dep_list = c("DSI","DSOpal","DSLite", "ggplot2", "dsBaseClient")
install_dependencies(dep_list)
#Create the connection
auxConnections <- connect()
connections <- auxConnections[[1]]
inp <- auxConnections[[2]]
get_treatments_map(connections, inp)
ds.colnames("data")
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment