options(width = 80) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "Readme_files/" ) ## Helper to determine OPAL version: v_major = seq(4L, 10L) v_minor1 = seq_len(10L) - 1 v_minor2 = seq_len(10L) - 1 versions = expand.grid(v_major, v_minor1, v_minor2) od = order(versions[, 1], versions[, 2], versions[, 3]) versions = versions[od, ] vstrings = paste(versions[, 1], versions[, 2], versions[, 3], sep = ".") getOPALVersion = function(opal, versions) { k = 1 ov = opalr::opal.version_compare(opal, versions[k]) while (ov != 0) { if (ov > 0) k = k + 1 if (ov < 0) stop("Version is smaller than the smallest one from vector.") ov = opalr::opal.version_compare(opal, versions[k]) } return(versions[k]) } pkgs = c("here", "opalr", "DSI", "DSOpal", "dsBaseClient") for (pkg in pkgs) { if (! requireNamespace(pkg, quietly = TRUE)) install.packages(pkg, repos = c(getOption("repos"), "https://cran.obiba.org")) } devtools::install(quiet = TRUE, upgrade = "always") library(DSI) library(DSOpal) library(dsBaseClient) ## Install packages on the DataSHIELD test machine: surl = "https://opal-demo.obiba.org/" username = "administrator" password = "password" opal = opalr::opal.login(username = username, password = password, url = surl) opal_version = getOPALVersion(opal, vstrings) check1 = opalr::dsadmin.install_github_package(opal = opal, pkg = "dsBinVal", username = "difuture-lmu", ref = "main") if (! check1) stop("[", Sys.time(), "] Was not able to install dsBinVal!") check2 = opalr::dsadmin.publish_package(opal = opal, pkg = "dsBinVal") if (! check2) stop("[", Sys.time(), "] Was not able to publish methods of dsBinVal!") opalr::opal.logout(opal) # Build model for the example, therefore download the CNSIM data sets from: # https://github.com/datashield/DSLite/tree/master/data if (FALSE) { dpath = "~/Downloads" dnames = paste0(dpath, "/CNSIM", seq_len(3), ".rda") dLoader = function(n) { load(n) dn = ls() dn = dn[grep("CNSIM", dn)] return(get(dn)) } CNSIM = na.omit(do.call(rbind, lapply(dnames, dLoader))) mod = glm(DIS_DIAB ~ ., data = CNSIM, family = binomial()) save(mod, file = here::here("Readme_files/mod.rda")) }
The package provides functionality to conduct and visualize ROC analysis and calibration on decentralized data. The basis is the DataSHIELD infrastructure for distributed computing. This package provides the calculation of the ROC-GLM with AUC confidence intervals as well as calibration curves and the Brier score. In order to calculate the ROC-GLM or assess calibration it is necessary to push models and predict them at the servers which is also provided by this package. Note that DataSHIELD uses privacy filter from DataSHIELD v5 onwards that are also used in this package. Additionally, this package uses the old option datashield.privacyLevel
(to indicate the minimal amount of values required to allow sharing an aggregation) as fallback. Instead of setting the option, we directly retrieve the fallback privacy level from the DESCRIPTION
file each time a function calls for it. This options is set to 5 by default. The methodology of the package is explained in detail here.
At the moment, there is no CRAN version available. Install the development version from GitHub:
remotes::install_github("difuture-lmu/dsBinVal")
It is necessary to register the assign and aggregate methods in the OPAL administration. These methods are registered automatically when publishing the package on OPAL (see DESCRIPTION
).
Note that the package needs to be installed at both locations, the server and the analysts machine.
The two options are to use the Opal API:
Administration/DataSHIELD/
tabAdd DataSHIELD package
buttonGitHub
as source, and use difuture-lmu
as user, dsBinVal
as name, and main
as Git reference.The second option is to use the opalr
package to install dsBinVal
directly from R
:
### User credentials (here from the opal test server): surl = "https://opal-demo.obiba.org/" username = "administrator" password = "password" ### Install package and publish methods: opal = opalr::opal.login(username = username, password = password, url = surl) opalr::dsadmin.install_github_package(opal = opal, pkg = "dsBinVal", username = "difuture-lmu", ref = "main") opalr::dsadmin.publish_package(opal = opal, pkg = "dsBinVal") opalr::opal.logout(opal)
A more sophisticated example is available here.
library(dsBinVal)
builder = newDSLoginBuilder() surl = "https://opal-demo.obiba.org/" username = "administrator" password = "password" builder$append( server = "ds1", url = surl, user = username, password = password, table = "CNSIM.CNSIM1" ) builder$append( server = "ds2", url = surl, user = username, password = password, table = "CNSIM.CNSIM2" ) builder$append( server = "ds3", url = surl, user = username, password = password, table = "CNSIM.CNSIM3" ) connections = datashield.login(logins = builder$build(), assign = TRUE)
# Load the model fitted locally on CNSIM: load(here::here("Readme_files/mod.rda")) # Model was calculated by: #> glm(DIS_DIAB ~ ., data = CNSIM, family = binomial()) # Push the model to the DataSHIELD servers: pushObject(connections, mod) # Create a clean data set without NAs: ds.completeCases("D", newobj = "D_complete") # Calculate scores and save at the servers: pfun = "predict(mod, newdata = D, type = 'response')" predictModel(connections, mod, "pred", "D_complete", predict_fun = pfun) datashield.symbols(connections)
# In order to securely calculate the ROC-GLM, we have to assess the # l2-sensitivity to set the privacy parameters of differential # privacy adequately: l2s = dsL2Sens(connections, "D_complete", "pred") l2s # Due to the results presented in https://arxiv.org/abs/2203.10828, we set the privacy parameters to # - epsilon = 0.2, delta = 0.1 if l2s <= 0.01 # - epsilon = 0.3, delta = 0.4 if 0.01 < l2s <= 0.03 # - epsilon = 0.5, delta = 0.3 if 0.03 < l2s <= 0.05 # - epsilon = 0.5, delta = 0.5 if 0.05 < l2s <= 0.07 # - epsilon = 0.5, delta = 0.5 if 0.07 < l2s BUT results may be not good!
# The response must be encoded as integer/numeric vector: ds.asInteger("D_complete$DIS_DIAB", "truth") roc_glm = dsROCGLM(connections, truth_name = "truth", pred_name = "pred", dat_name = "D_complete", seed_object = "pred") roc_glm plot(roc_glm)
dsBrierScore(connections, "truth", "pred") ### Calculate and plot calibration curve: cc = dsCalibrationCurve(connections, "truth", "pred") cc plot(cc)
Build by r Sys.info()[["login"]]
(r Sys.info()[["sysname"]]
) on r as.character(Sys.time())
.
This readme is built automatically after each push to the repository and weekly on Monday. The autobuilt is computed by installing the package on the DataSHIELD test server and is therefore a test if the functionality of the package works on DataSHIELD servers. Additionally, the functionality is tested using the GH Actions with tests/testthat/test_on_active_server.R
. The system information of the local and remote machines are:
ri_l = sessionInfo() ri_ds = datashield.aggregate(connections, quote(getDataSHIELDInfo())) client_pkgs = c("DSI", "DSOpal", "dsBaseClient", "dsBinVal") remote_pkgs = c("dsBase", "resourcer", "dsBinVal")
R
version: r ri_l$R.version$version.string
dfv = installed.packages()[client_pkgs, ] dfv = data.frame(Package = rownames(dfv), Version = unname(dfv[, "Version"])) knitr::kable(dfv)
r opal_version
R
version of r names(ri_ds)[1]
: r ri_ds[[1]]$session$R.version$version.string
R
version of r names(ri_ds)[2]
: r ri_ds[[2]]$session$R.version$version.string
dfv = do.call(cbind, lapply(names(ri_ds), function(nm) { out = ri_ds[[nm]]$pcks[remote_pkgs, "Version", drop = FALSE] colnames(out) = paste0(nm, ": ", colnames(out)) as.data.frame(out) })) dfv = cbind(Package = rownames(dfv), dfv) rownames(dfv) = NULL knitr::kable(dfv)
datashield.logout(connections)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.