diff --git a/UnCover_DS_Workshop0_code.R b/UnCover_DS_Workshop0_code.R new file mode 100644 index 0000000000000000000000000000000000000000..104bb3de8314c273f04172c52165c87ee3484f66 --- /dev/null +++ b/UnCover_DS_Workshop0_code.R @@ -0,0 +1,189 @@ +############################################################ +### DS ANALYSIS ENVIRONMENT: SET UP ### +############################################################ + +# List of required packages +packages = c("DSI", + "DSOpal", + "dsBaseClient", + "ggplot2") + +# Requirement exclusive of Windows Systems +if(Sys.info()["sysname"] == "Windows"){ + packages[length(packages)+1] = "Rtools" +} + +# This loop checks whether or not the required packages are already installed or not, and only executes the installation procedure in negative case. +for(p in packages){ + + print(sprintf("Checking package %s", p)) + + # This condition check whether or not package 'p' is already installed. + if(require(p, character.only = T) == FALSE){ + + installation_repo <- "http://cran.us.r-project.org" + + # dsBaseClient package requires an specific repository to be installed from. + if(p == "dsBaseClient"){ + + installation_repo <- "http://cran.obiba.org" + } + + install.packages(p, repos = installation_repo, dependencies = TRUE) + } + + + # Load packages (Rtools only requires installation, not loading). + if(p != "Rtools"){ + + library(p, character.only = T) + } + +} + + +############################################################ +############################################################ +### CONNECTING TO 1+ DATA SOURCES ### +############################################################ + +# Variables to be set prior to any execution + +urls <- c( "http://192.168.1.50:8880",# UPM's test server + "http://192.168.1.101:8888") # Baskent University + +project_names <- c("DRAFT_TEST", + "DRAFT_TEST") + +view_names <- c("DRAFT_VIEW", + "DRAFT_TEST") + +users <- c("administrator", + "DRAFT_TEST") + +passwords <- c("password", + "DRAFT_TEST") + +# Name for the dataframe to generate on server side. It will contain the aggregation of data from all sources. Not locally available. +myDFName <- "D" + +# Variable in which connections to each server will be stored. +builder <- DSI::newDSLoginBuilder() + +url_ctr <- 0 +# This loop will add each server-connection configuration to the "builder" variable. +for(i in 1:length(urls)){ + + print(sprintf("Connecting to Server with URL: %s", urls[i])) + + builder$append(server = sprintf("study%s", url_ctr), url = urls[i], + user = users[i], password = passwords[i], + table = sprintf("%s.%s", project_names[i], view_names[i]), + driver = "OpalDriver", options="list(ssl_verifyhost=0,ssl_verifypeer=0)") + + url_ctr <- url_ctr+1 +} + +# Set up the connections previously defined and stored in "builder". Also, store data aggregation on server-side dataframe with name "myDFName". +logindata <- builder$build() +connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = myDFName) + + +############################################################ +############################################################ +### CUSTOM QUERIES ### +############################################################ + +mycustom_f <- function (x = NULL, datasources = NULL){ + + if (is.null(datasources)) { + datasources <- datashield.connections_find() + } + + if (!(is.list(datasources) && all(unlist(lapply(datasources, + function(d) { + methods::is(d, "DSConnection") + }))))) { + + stop("The 'datasources' were expected to be a list of DSConnection-class objects", call. = FALSE) + } + + if (is.null(x)) { + + stop("Please provide the name of a data.frame or matrix!", call. = FALSE) + } + + cally <- call("TEST", x) + + dimensions <- DSI::datashield.aggregate(datasources, cally) + + stdnames <- names(datasources) + + outputnames <- c() + for (i in 1:length(datasources)) { + outputnames[i] <- paste0("dimensions of ", x, " in ", stdnames[i]) + } + + global.dim1 <- 0 + global.dim2 <- dimensions[[1]][2] + + for (i in 1:length(datasources)) { + global.dim1 <- global.dim1 + dimensions[[i]][1] + } + pooled.dim <- list(c(global.dim1, global.dim2)) + + out <- c(dimensions, pooled.dim) + names(out) <- c(outputnames, paste0("dimensions of ", + x, " in combined studies")) + + return(out) +} + + +############################################################ +############################################################ +### BASIC QUERIES ### +############################################################ + +# Inspect dimensions of each data source +dim_info <- ds.dim(x="D", datasources= connections) + +# Inspect variables stored in each data source +colname_info <- ds.colnames(x="D", datasources= connections) + +# Get info about data types in each data source +variables_type_inspect <- c("DMRAGEYR", "DMRGENDR") +variable_type_info <- c() +for(var in variables_type_inspect){ + + variable_type_info[length(variable_type_info)+1] <- + ds.class(x=sprintf("%s$%s", myDFName, var), datasources = connections) +} + +# Numeric-specific functions +ds.mean(x=sprintf("%s$DMRAGEYR", myDFName), datasources = connections) + +ds.quantileMean(x=sprintf("%s$DMRAGEYR", myDFName), datasources = connections[1]) + +# Categorical-specific functions +ds.table(sprintf("%s$DMRGENDR", myDFName), datasources = connections) + + +############################################################ +############################################################ +### BASIC MACHINE LEARNING - LINEAR MODEL ### +############################################################ + +ds.scatterPlot(x = sprintf("%s$DMRAGEYR", myDFName), y = sprintf("%s$DATLGT", myDFName), datasources = connections) +ds.boxPlot(x = sprintf("%s$DMRAGEYR", myDFName), datasources = connections) + +linear_model = ds.glmSLMA(formula = sprintf("%s$DMRAGEYR~%s$DATLGT", myDFName, myDFName), family = "gaussian", datasources = connections, newobj = "ws") + + +############################################################ +############################################################ +### LOG-OUT ### +############################################################ + +# Always execute this command after closing your IDE to free server-side resources. +datashield.logout(connections) diff --git a/UnCover_DS_Workshop1_code.R b/UnCover_DS_Workshop1_code.R new file mode 100644 index 0000000000000000000000000000000000000000..2d9e77d3aee979e49f946711c3f7c340aaf60445 --- /dev/null +++ b/UnCover_DS_Workshop1_code.R @@ -0,0 +1,97 @@ +############################################################ +### DS ANALYSIS ENVIRONMENT: SET UP ### +############################################################ + +# List of required packages +packages = c("DSI", + "DSOpal", + "dsBaseClient", + "ggplot2") + +# Requirement exclusive of Windows Systems +if(Sys.info()["sysname"] == "Windows"){ + packages[length(packages)+1] = "Rtools" +} + +# This loop checks whether or not the required packages are already installed or not, and only executes the installation procedure in negative case. +for(p in packages){ + + print(sprintf("Checking package %s", p)) + + # This condition check whether or not package 'p' is already installed. + if(require(p, character.only = T) == FALSE){ + + installation_repo <- "http://cran.us.r-project.org" + + # dsBaseClient package requires an specific repository to be installed from. + if(p == "dsBaseClient"){ + + installation_repo <- "http://cran.obiba.org" + } + + install.packages(p, repos = installation_repo, dependencies = TRUE) + } + + + # Load packages (Rtools only requires installation, not loading). + if(p != "Rtools"){ + + library(p, character.only = T) + } + +} + + +############################################################ +############################################################ +### CONNECTING TO 1+ DATA SOURCES ### +############################################################ + +# Variables to be set prior to any execution + +urls <- c("https://uncover.itg.be") + +project_names <- c("name of your project") + +resource_names <- c("name of your resource") + +users <- c("administrator") + +passwords <- c("your password") + + +# Variable in which connections to each server will be stored. +builder <- DSI::newDSLoginBuilder() + +url_ctr <- 0 +# This loop will add each server-connection configuration to the "builder" variable. +for(i in 1:length(urls)){ + + print(sprintf("Connecting to Server with URL: %s", urls[i])) + + builder$append(server = sprintf("study%s", url_ctr), url = urls[i], + user = users[i], password = passwords[i], + resource = sprintf("%s.%s", project_names[i], resource_names[i]), + driver = "OpalDriver", options="list(ssl_verifyhost=0,ssl_verifypeer=0)") + + url_ctr <- url_ctr+1 +} + +# Set up the connections previously defined and stored in "builder". Also, store data aggregation on server-side dataframe with name "myDFName". +logindata <- builder$build() +connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") + +datashield.assign.expr(connections, symbol = 'data', expr = quote(as.resource.data.frame(D))) + + +ds.colnames(x='data', datasources= connections) + +ds.class('data$DMRAGEYR', datasources = connections) +############################################################ +############################################################ +### LOG-OUT ### +############################################################ + +# Always execute this command after closing your IDE to free server-side resources. +datashield.logout(connections) +