R/attachment_get.R

Defines functions prepend_uuid strip_uuid

Documented in prepend_uuid strip_uuid

#' Strip the leading "uuid:" from a UUID hash.
#'
#' `r lifecycle::badge("stable")`
#'
#' This is a helper function used by \code{\link{attachment_get}}.
#'
#' @param uuid A string which may contain any number of "uuid:"
#' @return The string with every occurrence of "uuid:" deleted.
#' @family utilities
#' @keywords internal
#' @examples
#' \dontrun{
#' strip_uuid("uuid:1234")
#' strip_uuid("uuid:d3bcefea-32a8-4dbc-80ca-4ecb0678e2b0")
#' }
strip_uuid <- function(uuid) {
  uuid %>% stringr::str_replace_all(., pattern = "uuid:", replacement = "")
}

#' Prepend a leading "uuid:" to any string, e.g. an md5 hash.
#'
#' `r lifecycle::badge("stable")`
#'
#' This is the inverse of the helper function \code{\link{strip_uuid}}.
#'
#' @param md5hash A string, e.g. an md5 hash.
#' @return The string with a prepended "uuid:"
#' @family utilities
#' @keywords internal
#' @examples
#' \dontrun{
#' prepend_uuid("1234")
#' prepend_uuid("d3bcefea-32a8-4dbc-80ca-4ecb0678e2b0")
#' }
prepend_uuid <- function(md5hash) {
  glue::glue("uuid:{md5hash}") %>% as.character(.)
}

#' Build the download URL for one or many submission UUIDs and filenames.
#'
#' `r lifecycle::badge("stable")`
#'
#' This is a helper function used by \code{\link{attachment_get}}.
#' This function is vectorised and accepts single values or vectors for uuid and
#' fn.
#'
#' @param uuid The UUID of one form submission, or a vector of UUIDs.
#' @param fn The attachment filename, as per ODK form submission, or a vector of
#'           attachment filenames.
#' @template param-pid
#' @template param-fid
#' @template param-url
#' @return The inferred download URL.
#' @family utilities
#' @keywords internal
#' @examples
#' # See vignette("setup") for setup and authentication options
#' # ruODK::ru_setup(svc = "....svc", un = "me@email.com", pw = "...")
#'
#' ruODK:::attachment_url(
#'   "uuid:d3bcefea",
#'   "filename.jpg",
#'   pid = 1,
#'   fid = "form1",
#'   url = "https://my.odkcentral.org"
#' )
attachment_url <- function(uuid,
                           fn,
                           pid = get_default_pid(),
                           fid = get_default_fid(),
                           url = get_default_url()) {
  glue::glue(
    "{url}/v1/projects/{pid}/forms/{URLencode(fid, reserved = TRUE)}",
    "/submissions/{uuid}/attachments/{fn}"
  )
  # nolint start
  # See https://github.com/ropensci/ruODK/issues/66
  # This breaks attachment_get tests:
  # httr::modify_url(
  #   url,
  #   path = glue::glue(
  #     "v1/projects/{pid}/forms/{fid}/submissions/{uuid}/attachments/{fn}"
  #   )
  # )
  # nolint end
}

#' Download one media attachment.
#'
#' `r lifecycle::badge("stable")`
#'
#' This is a helper function used by \code{\link{attachment_get}}.
#' This function is not vectorised, but mapped by \code{\link{attachment_get}}
#' to a tibble of input parameters.
#'
#' @param pth A local file path to save the attachment to.
#' @param fn The attachment filename, as per ODK form submission. If NA, no file
#'   will be downloaded, but NA will be returned.
#'   If the file does not exist in ODK Central, a warning will be emitted.
#' @param src The attachment's download URL, generated by
#'   \code{\link{attachment_url}}. The src must contain the `uuid:` prefix.
#'   Note that the main Submissions table contains the submission id in the
#'   field `id`, whereas nested sub-tables contain the submission id in the
#'   field `submissions_id`.
#' @template param-url
#' @template param-auth
#' @template param-retries
#' @template param-verbose
#' @return The relative local path to the downloaded attachment or NA.
#' @family utilities
#' @export
#' @examples
#' \dontrun{
#' # Step 1: Setup ruODK with OData Service URL (has url, pid, fid)
#' # See vignette("setup") for setup and authentication options
#' # ruODK::ru_setup(svc = "....svc", un = "me@email.com", pw = "...")
#'
#' # Step 2: Construct attachment_url
#' att_url <- ruODK:::attachment_url(
#'   "uuid:d3bcefea-32a8-4dbc-80ca-4ecb0678e2b0",
#'   "filename.jpg"
#' )
#'
#' # Step 3: Get one attachment
#' local_fn <- get_one_attachment("media/filename.jpg", "filename.jpg", att_url)
#'
#' # In real life: done in bulk behind the scenes during odata_submission_get()
#' }
get_one_attachment <- function(pth,
                               fn,
                               src,
                               url = get_default_url(),
                               un = get_default_un(),
                               pw = get_default_pw(),
                               retries = get_retries(),
                               verbose = get_ru_verbose()) {
  # Early exit if there's nothing to download
  if (is.na(fn)) {
    "Filename is NA, skipping download.\n" %>%
      glue::glue() %>%
      ru_msg_noop(verbose = verbose)
    return(NA)
  }

  # HTTP 404.1 on file download saves the JSON error message as 76B file.
  if (fs::file_exists(pth) && fs::file_size(pth) != 76) {
    'File already downloaded, keeping "{pth}".\n' %>%
      glue::glue() %>%
      ru_msg_noop(verbose = verbose)
    return(pth %>% as.character())
  }

  yell_if_missing(url, un, pw)

  res <- httr::RETRY(
    "GET",
    src,
    httr::authenticate(un, pw),
    httr::write_disk(pth, overwrite = TRUE),
    times = retries,
    terminate_on = c(404)
  ) %>%
    httr::warn_for_status(
      # If the download fails, inspect src and try to curl the plain API call.
      # nolint start
      # https://docs.getodk.org/central-api-submission-management/#downloading-an-attachment
      # nolint end
      task = glue::glue(
        "download media attachment {fn}.\n",
        "Troubleshooting tips:\n",
        "* Does the file resource {fn} exist? Run in a Terminal:\n",
        "  curl -ipu {un} {src} | cat\n",
        "* Is {fn} an expected attachment of this submission? Run:\n",
        '  curl -ipu {un} {stringr::str_replace(src, fn, "")}\n',
      )
    )

  if (fs::file_exists(pth)) {
    'File saved to "{pth}".\n' %>%
      glue::glue() %>%
      ru_msg_success(verbose = verbose)
    return(pth %>% as.character())
  } else {
    # nocov start
    # nolint start
    # This is hard to test, as it requires a form with a missing attachment.
    # This only ever happens on exotic upload errors.
    # nolint end
    "File not found.\n" %>%
      glue::glue() %>%
      ru_msg_success(verbose = verbose)
    return(NA)
    # nocov end
  }
}

#' Download attachments and return the local path.
#'
#' `r lifecycle::badge("stable")`
#'
#' @details This function is the workhorse for
#' \code{\link{handle_ru_attachments}}.
#' This function is vectorised and can handle either one or many records.
#' Parameters submission_uuid and attachment_filename accept single or exactly
#' the same number of multiple values.
#' The other parameters are automatically repeated.
#'
#' The media attachments are downloaded into a folder given by `local_dir`:
#'
#' workdir/media/filename1.jpg
#'
#' workdir/media/filename2.jpg
#'
#' workdir/media/filename3.jpg
#'
#' @param sid One or many ODK submission UUIDs, an MD5 hash.
#' @param fn One or many ODK form attachment filenames,
#'   e.g. "1558330537199.jpg".
#' @param local_dir The local folder to save the downloaded files to,
#'   default: "media".
#' @param separate (logical) Whether to separate locally downloaded files into
#'   a subfolder named after the submission uuid within `local_dir`,
#'   default: FALSE.
#'   The defaults mirror the behaviour of \code{\link{submission_export}}, which
#'   keeps all attachment files together in a folder `media`.
#'   Enable this option if downloaded files collide on idential names. This can
#'   happen if two data collection devices by chance generate the same filename
#'   for two respective media files, e.g. `DCIM0001.jpg`.
#' @template param-pid
#' @template param-fid
#' @template param-url
#' @template param-auth
#' @template param-retries
#' @template param-verbose
#' @return The relative file path for the downloaded attachment(s)
#' @family utilities
# nolint start
#' @seealso \url{https://docs.getodk.org/central-api-form-management/#downloading-a-form-attachment}
#' @seealso \url{https://docs.getodk.org/central-api-submission-management/#downloading-an-attachment}
# nolint end
#' @export
#' @examples
#' \dontrun{
#' # See vignette("setup") for setup and authentication options
#' # ruODK::ru_setup(svc = "....svc", un = "me@email.com", pw = "...")
#'
#' a_local_dir <- here::here()
#'
#' # Step 2: Get unparsed submissions
#' fresh_raw <- odata_submission_get(parse = FALSE)
#'
#' # Step 3: Get attachment field "my_photo"
#' fresh_parsed <- fresh_raw %>%
#'   odata_submission_rectangle() %>%
#'   dplyr::mutate(
#'     my_photo = attachment_get(id,
#'       my_photo,
#'       local_dir = a_local_dir,
#'       verbose = TRUE
#'     )
#'     # Repeat for all other attachment fields
#'   )
#' }
attachment_get <- function(sid,
                           fn,
                           local_dir = "media",
                           separate = FALSE,
                           pid = get_default_pid(),
                           fid = get_default_fid(),
                           url = get_default_url(),
                           un = get_default_un(),
                           pw = get_default_pw(),
                           retries = get_retries(),
                           verbose = get_ru_verbose()) {
  yell_if_missing(url, un, pw, pid = pid, fid = fid)
  if (separate == TRUE) {
    dest_dir <- fs::path(local_dir, strip_uuid(sid))
  } else {
    dest_dir <- fs::path(local_dir)
  }
  "Using local directory \"{dest_dir}\".\n" %>%
    glue::glue() %>%
    ru_msg_info(verbose = verbose)

  fs::dir_create(dest_dir)

  tibble::tibble(
    pth = fs::path(dest_dir, fn),
    fn = fn,
    src = attachment_url(
      sid,
      fn,
      pid = pid,
      fid = fid,
      url = url
    ),
    url = url,
    un = un,
    pw = pw,
    verbose = verbose,
    retries = retries
  ) %>%
    purrr::pmap(get_one_attachment) %>%
    as.character(.)
}

# usethis::use_test("attachment_get") # nolint
dbca-wa/ruODK documentation built on March 20, 2024, 12:19 p.m.