R/access_utils.R

Defines functions error summarize_file_access

Documented in summarize_file_access

# Utils for querying and setting access

# -- QUERY ACCESS --------------------------------------------------------------#

#' Summarize file access for files within some view
#'
#' Some portion adapted from `@allaway`'s original script.
#' Common usages are:
#' - Check numbers of files are viewable and downloadable for registered Synapse users:
#' `summarize_file_access(principal_id = 273948, access_type = "DOWNLOAD", "syn16858331")`
#' - Check that files within the portal purview are actually editable for the NF-OSI Sage Team,
#'  because we need at least edit permissions for updating annotations (historically, some issues with hackathon projects, etc.):
#'  `summarize_file_access(principal_id = 3378999, access_type = "UPDATE", "syn16858331")`
#'
#' More complex usage:
#' - Check access for _multiple_ teams.
#'
#' This summarizes file access by using the smaller set of benefactors,
#' and returns a `data.table` with columns `benefactorId`, `principalId`, `access`, and `N`, which describes whether
#' `principalId` has _all_ the specified `access` to `benefactorId` and its `N` files.
#' This data can be used for further aggregation as needed.
#'
#' @inheritParams check_access
#' @param fileview_id Syn id of the view. View must include `benefactorId` and `type`.
#' @import data.table
#' @export
summarize_file_access <- function(principal_id, # 3378999 for NF-OSI
                                  access_type,
                                  fileview_id # "syn16858331"
                        ) {

  .check_login()
  tryCatch({
    view <- .syn$tableQuery(glue::glue("SELECT id,type,benefactorId FROM {fileview_id}"))
  }, error = function(e) stop("Could not query view!"))
  view <- as.data.table(view$asDataFrame())
  files_by_benefactor <- view[type == "file", .N, by = .(benefactorId)]
  access <- view[, check_access(benefactorId, principal_id, access_type), by = .(benefactorId)]
  # files_by_benefactor can be smaller than access because there are folders without files
  access <- merge(access, files_by_benefactor, all.x = TRUE)
  access[is.na(N), N := 0]
  return(access)
}

#' Check access
#'
#' @param id The benefactor entity.
#' @param principal_id Group(s) for which to check the access type.
#' @param access_type Which access type(s) to check for; result summarizes whether there are permissions for _all_ types specified.
#' @export
check_access <- function(id,
                         principal_id,
                         access_type = c("CREATE", "UPDATE", "CHANGE_SETTINGS", "DOWNLOAD", "MODERATE", "READ", "CHANGE_PERMISSIONS", "DELETE")) {

  access_type <- match.arg(access_type, several.ok = TRUE)
  stopifnot(is.numeric(principal_id))

  acl_result <- tryCatch({
    .syn$restGET(glue::glue("https://repo-prod.prod.sagebase.org/repo/v1/entity/{id}/acl"))$resourceAccess %>%
    rbindlist(.)
  }, error = function(e) stop(glue::glue("Error for {id}: {e$message}")))

  setkey(acl_result, "principalId")
  # returns NA in accessType if principal does not have that access
  access_result <- acl_result[.(principal_id)][, .(access = all(access_type %in% accessType)), by = .(principalId)]
  return(access_result)
}


# -- SETTING ACCESS -------------------------------------------------------------#

#' Set public access to VIEW (READ) only for an entity 
#' 
#' Set both registered users and non-registered users to have VIEW-only permissions. 
#' See code{link{make_public}} for more permissive permissions to download (for registered users), which is usually set later at data release time.
#' 
#' @param id Synapse entity id.
#' @export
make_public_viewable <- function(id) {
  .check_login()
  ALL_REGISTERED_SYNAPSE_USERS_GROUP <- "273948"
  PUBLIC_GROUP <- "273949"
  # set registered synapse users to view, download
  .syn$setPermissions(entity = id,
                      principalId = ALL_REGISTERED_SYNAPSE_USERS_GROUP,
                      accessType = list("READ"))
  
  # set public to view
  .syn$setPermissions(entity = id,
                      principalId = PUBLIC_GROUP,
                      accessType = list("READ"))
}


#' Make public
#'
#' Sets READ/DOWNLOAD permissions for web and registered users equivalently to the "Make Public" button in Synapse UI.
#' TODO: For regular users this can be a one-and-done action, but for the DCC admin this likely entails some other actions,
#' such as updating a project tracking table, so a wrapper or "callback" functionality might be needed.
#'
#' @param id Synapse entity id.
#' @export
make_public <- function(id) {
  .check_login()
  ALL_REGISTERED_SYNAPSE_USERS_GROUP <- "273948"
  PUBLIC_GROUP <- "273949"
  # set registered synapse users to view, download
  .syn$setPermissions(entity = id,
                      principalId = ALL_REGISTERED_SYNAPSE_USERS_GROUP,
                      accessType = list("READ","DOWNLOAD"))

  # set public to view
  .syn$setPermissions(entity = id,
                      principalId = PUBLIC_GROUP,
                      accessType = list("READ"))
}


#' Provide access to a specific set of files using a query result.
#'
#' Sets READ/DOWNLOAD permissions for a specific user or team, provided a vector of entity IDs. Generally, we
#' do not set permissions this way, as it can create many, many ACLs/"local sharing settings" which will need to be removed at the
#' time of data publication. However, at the time of writing, one project (JHU Biobank) shares embargoed data but is
#' required to share only specific subsets of the files as needed by the data requestor (e.g. only MPNST tumor data, or only RNA-seq data).
#'
#' @param principal_id Synapse team or user id.
#' @param entity_ids Vector of entity ids.
#' @param create_dataset Optionally, create a dataset of the entity_ids, so that the user can easily retrieve them.
#' @param project_id If create_dataset=T, which project to create it in.
#' @param dataset_name Optional name for dataset to be created
#' @export
grant_specific_file_access <- function(principal_id, entity_ids, create_dataset = F, project_id = NULL, dataset_name = NULL) {
  # .check_login()

  if(create_dataset & is.null(project_id)){
    stop("project_id must be provided if create_dataset = T")
  }

  # set registered synapse users to view, download
  sapply(entity_ids, function(id){
    .syn$setPermissions(entity = id,
                      principalId = principal_id,
                      accessType = list("READ","DOWNLOAD"))
   })

  ##need to grab the current versions for dataset creation
  dataset_items <- lapply(entity_ids, function(id){
    vsn <- .syn$get(id, downloadFile = F)$versionNumber
    list(entityId = id, versionNumber = vsn)
  })

  if(is.null(dataset_name)){
    dataset_name <- glue::glue("Dataset {Sys.Date()} for {principal_id}")
  }

  if(create_dataset){
    tryCatch({
      # First attempt with addAnnotationColumns = TRUE
      dataset <- .syn$store(synapseclient$Dataset(name = dataset_name, 
                                                  parent = project_id, dataset_items = dataset_items, addAnnotationColumns = TRUE))
      message(glue::glue("{emoji::emoji(\"thumbsup\")} Dataset created with annotation columns at {dataset$properties$id}"))
    }, error = function(e) {
      # If error, retry with addAnnotationColumns = FALSE
      dataset <- .syn$store(synapseclient$Dataset(name = dataset_name, 
                                                  parent = project_id, dataset_items = dataset_items, addAnnotationColumns = FALSE))
      .syn$setPermissions(entity = dataset$properties$id, principalId = principal_id, 
                          accessType = list("READ", "DOWNLOAD"))
      message(glue::glue("{emoji::emoji(\"warning\")} Dataset created without annotation columns at {dataset$properties$id}. Annotation columns will need to be added manually."))
    })
  }

  message(glue::glue('{emoji::emoji("astonished")} Principal {principal_id} added to {length(entity_ids)} entities'))

  #TODO: set schema programmatically? might be easier to add annotations to schema in web client as needed to support principal_id...
  ## Note Dec 2023; schema is automatically defined unless there is an error caused by the way synapse detects annotation schemas, e.g. a type collision that causes duplicate columns with the same name. 

}
nf-osi/nfportalutils documentation built on Sept. 29, 2024, 3 a.m.