R/utils-file.R

Defines functions write_json set_config read_json get_config config_paths user_config_path default_config_path ensure_dirs src_data_avail auto_load_src_names install_data_pkgs data_pkg_avail src_data_dir data_dir

Documented in auto_load_src_names config_paths data_dir get_config set_config src_data_avail src_data_dir

#' File system utilities
#'
#' Determine the location where to place data meant to persist between
#' individual sessions.
#'
#' For data, the default location depends on the operating system as
#'
#' | **Platform** | **Location**                         |
#' | ------------ | -------------------------------------|
#' | Linux        | `~/.local/share/ricu`                |
#' | macOS        | `~/Library/Application Support/ricu` |
#' | Windows      | `%LOCALAPPDATA%/ricu`                |
#'
#' If the default storage directory does not exists, it will only be created
#' upon user consent (requiring an interactive session).
#'
#' The environment variable `RICU_DATA_PATH` can be used to overwrite the
#' default location. If desired, this variable can be set in an R startup file
#' to make it apply to all R sessions. For example, it could be set within:
#'
#' - A project-local `.Renviron`;
#' - The user-level `.Renviron`;
#' - A file at `$(R RHOME)/etc/Renviron.site`.
#'
#' Any directory specified as environment variable will recursively be created.
#'
#' Data source directories typically are sub-directories to `data_dir()` named
#' the same as the respective dataset. For demo datasets corresponding to
#' `mimic` and `eicu`, file location however deviates from this scheme. The
#' function `src_data_dir()` is used to determine the expected data location
#' of a given dataset.
#'
#' Configuration files used both for data source configuration, as well as for
#' dictionary definitions potentially involve multiple files that are read and
#' merged. For that reason, `get_config()` will iterate over directories
#' passed as `cfg_dirs` and look for the specified file (with suffix `.json`
#' appended and might be missing in some of the queried directories). All
#' found files are read by [jsonlite::read_json()] and the resulting lists are
#' combined by reduction with the binary function passed as `combine_fun`.
#'
#' With default arguments, `get_config()` will simply concatenate lists
#' corresponding to files found in the default config locations as returned by
#' `config_paths()`: first the directory specified by the environment variable
#' `RICU_CONFIG_PATH` (if set), followed by the directory at
#'
#' ```
#' system.file("extdata", "config", package = "ricu")
#' ```
#'
#' Further arguments are passed to [jsonlite::read_json()], which is called
#' with slightly modified defaults: `simplifyVector = TRUE`,
#' `simplifyDataFrame = FALSE` and `simplifyMatrix = FALSE`.
#'
#' The utility function `set_config()` writes the list passed as `x` to file
#' `dir/name.json`, using [jsonlite::write_json()] also with slightly modified
#' defaults (which can be overridden by passing arguments as `...`): `null =
#' "null"`, `auto_unbox = TRUE` and `pretty = TRUE`.
#'
#' Whenever the package namespace is attached, a summary of dataset
#' availability is printed using the utility functions `auto_load_src_names()`
#' and `src_data_avail()`. While the former simply returns a character vector
#' of data sources that are configures for automatically being set up on
#' package loading, the latter returns a summary of the number of available
#' tables per dataset.
#'
#' @param subdir A string specifying a directory that will be made sure to
#' exist below the data directory.
#' @param create Logical flag indicating whether to create the specified
#' directory
#'
#' @rdname file_utils
#'
#' @return Functions `data_dir()`, `src_data_dir()` and `config_paths()` return
#' file paths as character vectors, `auto_load_src_names()` returns a character
#' vector of data source names and `src_data_avail()` a `data.frame` describing
#' availability of data sources. Configuration utilitites `get_config()` and
#' `set_config()` read and write list objects to/from JSON format.
#'
#' @examples
#' Sys.setenv(RICU_DATA_PATH = tempdir())
#' identical(data_dir(), tempdir())
#'
#' dir.exists(file.path(tempdir(), "some_subdir"))
#' some_subdir <- data_dir("some_subdir")
#' dir.exists(some_subdir)
#'
#' cfg <- get_config("concept-dict")
#'
#' identical(
#'   cfg,
#'   get_config("concept-dict",
#'              system.file("extdata", "config", package = "ricu"))
#' )
#'
#' @export
#'
data_dir <- function(subdir = NULL, create = TRUE) {

  assert_that(is.flag(create))

  res <- Sys.getenv("RICU_DATA_PATH", unset = NA_character_)

  if (is.na(res)) {

    res <- switch(
      Sys.info()[["sysname"]],
      Darwin  = Sys.getenv("XDG_DATA_HOME", "~/Library/Application Support"),
      Windows = Sys.getenv("LOCALAPPDATA", Sys.getenv("APPDATA")),
      Sys.getenv("XDG_DATA_HOME", "~/.local/share")
    )

    res <- file.path(res, "ricu")

  }

  if (!is.null(subdir)) {
    assert_that(is.string(subdir))
    res <- file.path(res, subdir)
  }

  if (create) {
    res <- ensure_dirs(res)
  }

  res
}

#' @param src,srcs Character vector of data source names
#' @rdname file_utils
#' @export
src_data_dir <- function(srcs) {

  if (!is.character(srcs)) {
    srcs <- src_name(srcs)
  }

  if (length(srcs) > 1L) {
    return(chr_ply(srcs, src_data_dir))
  }

  assert_that(is.string(srcs))

  pkg <- sub("_", ".", srcs)

  if (is_pkg_installed(pkg)) {
    system.file("extdata", package = pkg)
  } else if (data_pkg_avail(pkg)) {
    file.path(.libPaths()[1L], pkg, "extdata")
  } else {
    data_dir(srcs, create = FALSE)
  }
}

#' @importFrom utils packageDescription available.packages
data_pkg_avail <- function(src) {

  if (!is.string(src)) {
    src <- src_name(src)
  }

  repos <- packageDescription("ricu", fields = "Additional_repositories")
  pkgs  <- available.packages(repos = repos)

  sub("_", ".", src) %in% pkgs[, "Package"]
}

#' @importFrom utils install.packages
install_data_pkgs <- function(srcs = c("mimic_demo", "eicu_demo")) {

  if (!is.character(srcs)) {
    srcs <- src_name(srcs)
  }

  repos <- packageDescription("ricu", fields = "Additional_repositories")

  install.packages(sub("_", ".", srcs), repos = repos)
}

#' @rdname file_utils
#' @export
auto_load_src_names <- function() {

  res <- Sys.getenv("RICU_SRC_LOAD", unset = NA_character_)

  if (is.na(res)) {
    c("mimic", "mimic_demo", "eicu", "eicu_demo", "hirid")
  } else {
    strsplit(res, ",")[[1L]]
  }
}

#' @rdname file_utils
#' @export
src_data_avail <- function(src = auto_load_src_names()) {

  src_stats <- function(x, env) {

    src_env <- tryCatch(
      get0(x, envir = env, inherits = FALSE),
      miss_tbl_msg = function(msg) attr(msg, "tbl_ok"),
      message = function(msg) NULL,
      warning = function(warn) NULL,
      error = function(err) NULL
    )

    if (is.null(src_env)) {
      c(NA_integer_, NA_integer_)
    } else if (is_src_env(src_env)) {
      rep(length(src_env), 2L)
    } else {
      c(sum(src_env), length(src_env))
    }
  }

  if (identical(length(src), 0L)) {
    return(NULL)
  }

  assert_that(is.character(src))

  env <- data_env()
  res <- int_ply(src, src_stats, env, length = 2L)

  data.frame(
    name = src,
    available = is_true(src %in% ls(envir = env) & res[1L, ] == res[2L, ]),
    tables = res[1L, ], total = res[2L, ]
  )
}

ensure_dirs <- function(paths) {

  uq_paths <- unique(paths)

  is_dir <- file.info(uq_paths, extra_cols = FALSE)[["isdir"]]
  is_no_dir <- lgl_ply(is_dir, identical, FALSE)

  if (any(is_no_dir)) {
    stop_ricu(
      c("The following {qty(sum(is_no_dir))} path{?s} {?exists/exist} but not
         as director{?y/ies}:", bullet(uq_paths[is_no_dir])),
      class = "path_exists_not_dir", exdent = c(0L, rep(2L, sum(is_no_dir)))
    )
  }

  dirs_to_create <- uq_paths[is.na(is_dir)]

  if (length(dirs_to_create) > 0L) {

    res <- lgl_ply(dirs_to_create, dir.create, recursive = TRUE)

    if (!all(res)) {
      stop_ricu(
        c("The following {qty(sum(!res))} director{?y/ies} could not be
           created:", bullet(dirs_to_create[!res])),
        class = "dir_create_fail", exdent = c(0L, rep(2L, sum(!res)))
      )
    }
  }

  invisible(paths)
}

default_config_path <- function() {
  system.file("extdata", "config", package = pkg_name())
}

user_config_path <- function() {

  res <- Sys.getenv("RICU_CONFIG_PATH", unset = NA_character_)

  if (is.na(res)) {
    NULL
  } else {
    res
  }
}

#' @rdname file_utils
#' @export
config_paths <- function() c(user_config_path(), default_config_path())

#' @param name File name of the configuration file (`.json` will be appended)
#' @param cfg_dirs Character vector of directories searched for config files
#' @param combine_fun If multiple files are found, a function for combining
#' returned lists
#' @param ... Passed to [jsonlite::read_json()] or [jsonlite::write_json()]
#'
#' @rdname file_utils
#'
#' @export
#'
get_config <- function(name, cfg_dirs = config_paths(), combine_fun = c, ...) {

  read_if_exists <- function(x, ...) {

    if (isTRUE(file.exists(x))) {
      read_json(x, ...)
    } else {
      NULL
    }
  }

  assert_that(is.string(name), has_length(cfg_dirs), all_fun(cfg_dirs, is.dir),
              null_or(combine_fun, is.function))

  res <- lapply(file.path(cfg_dirs, paste0(name, ".json")),
                read_if_exists, ...)

  if (is.null(combine_fun)) {
    res
  } else {
    Reduce(combine_fun, res, NULL)
  }
}

read_json <- function(path, simplifyVector = TRUE, simplifyDataFrame = FALSE,
                      simplifyMatrix = FALSE, ...) {

  assert_that(file.exists(path))

  jsonlite::read_json(path, simplifyVector = simplifyVector,
                      simplifyDataFrame = simplifyDataFrame,
                      simplifyMatrix = simplifyMatrix, ...)
}

#' @param x Object to be written
#' @param dir Directory to write the file to (created if non-existent)
#'
#' @rdname file_utils
#'
#' @export
#'
set_config <- function(x, name, dir = file.path("inst", "extdata", "config"),
                       ...) {

  assert_that(is.string(name))

  file <- paste0(name, ".json")

  if (!is.null(dir)) {

    assert_that(is.string(dir))

    file <- file.path(ensure_dirs(dir), file)
  }

  write_json(x, file, ...)
}

write_json <- function(x, path, null = "null", auto_unbox = TRUE,
                       pretty = TRUE, ...) {

  jsonlite::write_json(x, path, null = null, auto_unbox = auto_unbox,
                       pretty = pretty, ...)
}
septic-tank/ricu documentation built on Jan. 30, 2021, 8:40 p.m.