Initial commit.

parent 54dc6a6d
############################################################
### DS ANALYSIS ENVIRONMENT: SET UP ###
############################################################
# List of required packages
packages = c("DSI",
"DSOpal",
"dsBaseClient",
"ggplot2")
# Requirement exclusive of Windows Systems
if(Sys.info()["sysname"] == "Windows"){
packages[length(packages)+1] = "Rtools"
}
# This loop checks whether or not the required packages are already installed or not, and only executes the installation procedure in negative case.
for(p in packages){
print(sprintf("Checking package %s", p))
# This condition check whether or not package 'p' is already installed.
if(require(p, character.only = T) == FALSE){
installation_repo <- "http://cran.us.r-project.org"
# dsBaseClient package requires an specific repository to be installed from.
if(p == "dsBaseClient"){
installation_repo <- "http://cran.obiba.org"
}
install.packages(p, repos = installation_repo, dependencies = TRUE)
}
# Load packages (Rtools only requires installation, not loading).
if(p != "Rtools"){
library(p, character.only = T)
}
}
############################################################
############################################################
### CONNECTING TO 1+ DATA SOURCES ###
############################################################
# Variables to be set prior to any execution
urls <- c( "http://192.168.1.50:8880",# UPM's test server
"http://192.168.1.101:8888") # Baskent University
project_names <- c("DRAFT_TEST",
"DRAFT_TEST")
view_names <- c("DRAFT_VIEW",
"DRAFT_TEST")
users <- c("administrator",
"DRAFT_TEST")
passwords <- c("password",
"DRAFT_TEST")
# Name for the dataframe to generate on server side. It will contain the aggregation of data from all sources. Not locally available.
myDFName <- "D"
# Variable in which connections to each server will be stored.
builder <- DSI::newDSLoginBuilder()
url_ctr <- 0
# This loop will add each server-connection configuration to the "builder" variable.
for(i in 1:length(urls)){
print(sprintf("Connecting to Server with URL: %s", urls[i]))
builder$append(server = sprintf("study%s", url_ctr), url = urls[i],
user = users[i], password = passwords[i],
table = sprintf("%s.%s", project_names[i], view_names[i]),
driver = "OpalDriver", options="list(ssl_verifyhost=0,ssl_verifypeer=0)")
url_ctr <- url_ctr+1
}
# Set up the connections previously defined and stored in "builder". Also, store data aggregation on server-side dataframe with name "myDFName".
logindata <- builder$build()
connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = myDFName)
############################################################
############################################################
### CUSTOM QUERIES ###
############################################################
mycustom_f <- function (x = NULL, datasources = NULL){
if (is.null(datasources)) {
datasources <- datashield.connections_find()
}
if (!(is.list(datasources) && all(unlist(lapply(datasources,
function(d) {
methods::is(d, "DSConnection")
}))))) {
stop("The 'datasources' were expected to be a list of DSConnection-class objects", call. = FALSE)
}
if (is.null(x)) {
stop("Please provide the name of a data.frame or matrix!", call. = FALSE)
}
cally <- call("TEST", x)
dimensions <- DSI::datashield.aggregate(datasources, cally)
stdnames <- names(datasources)
outputnames <- c()
for (i in 1:length(datasources)) {
outputnames[i] <- paste0("dimensions of ", x, " in ", stdnames[i])
}
global.dim1 <- 0
global.dim2 <- dimensions[[1]][2]
for (i in 1:length(datasources)) {
global.dim1 <- global.dim1 + dimensions[[i]][1]
}
pooled.dim <- list(c(global.dim1, global.dim2))
out <- c(dimensions, pooled.dim)
names(out) <- c(outputnames, paste0("dimensions of ",
x, " in combined studies"))
return(out)
}
############################################################
############################################################
### BASIC QUERIES ###
############################################################
# Inspect dimensions of each data source
dim_info <- ds.dim(x="D", datasources= connections)
# Inspect variables stored in each data source
colname_info <- ds.colnames(x="D", datasources= connections)
# Get info about data types in each data source
variables_type_inspect <- c("DMRAGEYR", "DMRGENDR")
variable_type_info <- c()
for(var in variables_type_inspect){
variable_type_info[length(variable_type_info)+1] <-
ds.class(x=sprintf("%s$%s", myDFName, var), datasources = connections)
}
# Numeric-specific functions
ds.mean(x=sprintf("%s$DMRAGEYR", myDFName), datasources = connections)
ds.quantileMean(x=sprintf("%s$DMRAGEYR", myDFName), datasources = connections[1])
# Categorical-specific functions
ds.table(sprintf("%s$DMRGENDR", myDFName), datasources = connections)
############################################################
############################################################
### BASIC MACHINE LEARNING - LINEAR MODEL ###
############################################################
ds.scatterPlot(x = sprintf("%s$DMRAGEYR", myDFName), y = sprintf("%s$DATLGT", myDFName), datasources = connections)
ds.boxPlot(x = sprintf("%s$DMRAGEYR", myDFName), datasources = connections)
linear_model = ds.glmSLMA(formula = sprintf("%s$DMRAGEYR~%s$DATLGT", myDFName, myDFName), family = "gaussian", datasources = connections, newobj = "ws")
############################################################
############################################################
### LOG-OUT ###
############################################################
# Always execute this command after closing your IDE to free server-side resources.
datashield.logout(connections)
############################################################
### DS ANALYSIS ENVIRONMENT: SET UP ###
############################################################
# List of required packages
packages = c("DSI",
"DSOpal",
"dsBaseClient",
"ggplot2")
# Requirement exclusive of Windows Systems
if(Sys.info()["sysname"] == "Windows"){
packages[length(packages)+1] = "Rtools"
}
# This loop checks whether or not the required packages are already installed or not, and only executes the installation procedure in negative case.
for(p in packages){
print(sprintf("Checking package %s", p))
# This condition check whether or not package 'p' is already installed.
if(require(p, character.only = T) == FALSE){
installation_repo <- "http://cran.us.r-project.org"
# dsBaseClient package requires an specific repository to be installed from.
if(p == "dsBaseClient"){
installation_repo <- "http://cran.obiba.org"
}
install.packages(p, repos = installation_repo, dependencies = TRUE)
}
# Load packages (Rtools only requires installation, not loading).
if(p != "Rtools"){
library(p, character.only = T)
}
}
############################################################
############################################################
### CONNECTING TO 1+ DATA SOURCES ###
############################################################
# Variables to be set prior to any execution
urls <- c("https://uncover.itg.be")
project_names <- c("name of your project")
resource_names <- c("name of your resource")
users <- c("administrator")
passwords <- c("your password")
# Variable in which connections to each server will be stored.
builder <- DSI::newDSLoginBuilder()
url_ctr <- 0
# This loop will add each server-connection configuration to the "builder" variable.
for(i in 1:length(urls)){
print(sprintf("Connecting to Server with URL: %s", urls[i]))
builder$append(server = sprintf("study%s", url_ctr), url = urls[i],
user = users[i], password = passwords[i],
resource = sprintf("%s.%s", project_names[i], resource_names[i]),
driver = "OpalDriver", options="list(ssl_verifyhost=0,ssl_verifypeer=0)")
url_ctr <- url_ctr+1
}
# Set up the connections previously defined and stored in "builder". Also, store data aggregation on server-side dataframe with name "myDFName".
logindata <- builder$build()
connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
datashield.assign.expr(connections, symbol = 'data', expr = quote(as.resource.data.frame(D)))
ds.colnames(x='data', datasources= connections)
ds.class('data$DMRAGEYR', datasources = connections)
############################################################
############################################################
### LOG-OUT ###
############################################################
# Always execute this command after closing your IDE to free server-side resources.
datashield.logout(connections)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment