R/read.R

Defines functions DownloadBrenda CleanECNumber ReadBrenda

Documented in CleanECNumber DownloadBrenda ReadBrenda

#' @title Read BRENDA text file into matrix.
#'
#' @inherit SeparateEntries return description
#'
#' @inheritParams ReadBrendaFile
#' @param clean Boolean; if TRUE, run [CleanECNumber()] after reading the file.
#'
#' @return A matrix containing information about the EC entries.
#'
#' @examples
#' brenda_txt <- system.file("extdata", "brenda_download_test.txt",
#'                           package = "brendaDb")
#' df <- ReadBrenda(brenda_txt)
#'
#' @importFrom magrittr %>%
#' @importFrom tibble as_tibble
#' @importFrom dplyr distinct
#' @export
ReadBrenda <- function(filepath, clean = TRUE) {
  message("Reading BRENDA text file...")
  filepath <- path.expand(filepath)

  # src/read_brenda
  df <- ReadBrendaFile(filepath)

  message("Converting text into a list. This might take a while...")
  df <- SeparateEntries(df)
  names(df) <- c("ID", "field", "description")

  message("Converting list to tibble and removing duplicated entries...")
  df <- df %>%
    as_tibble() %>%
    distinct()
  if (clean) {
    df <- CleanECNumber(df)
  }
  message("If you're going to use this data again, consider saving this table ",
          "using data.table::fwrite().\n")
  return(df)
}


#' @title Remove deleted and transferred EC numbers.
#'
#' @description Some EC numbers have comments wrapped in parentheses. Most of
#' them are deleted (in this case we remove them) entries or transferred (in
#' this case we point to the new entry) entries.
#'
#' @param df A `tibble` generated by [ReadBrenda()].
#'
#' @return A `tibble` with deleted and transferred entries moved to the bottom,
#' with columns:
#' - ID being the deleted/transferred ID,
#' - field being "TRANSFERRED_DELETED", and
#' - description being the information included in the original ID column.
#'
#' @keywords internal
#'
#' @examples
#' df <- ReadBrenda(system.file("extdata", "brenda_download_test.txt",
#'                           package = "brendaDb"))
#' brendaDb:::CleanECNumber(df)
#'
#' @importFrom dplyr mutate filter bind_rows
#' @importFrom rlang .data
#' @import stringr
#'
#' @keywords internal
CleanECNumber <- function(df) {
  df <- df %>%
    mutate(ID = str_remove(.data$ID, fixed(" ()")))
  df.standard <- df %>%
    filter(str_detect(.data$ID, fixed("("), negate = TRUE))
  df.nonstd <- df %>%
    filter(str_detect(.data$ID, fixed("("))) %>%
    distinct(.data$ID) %>%
    mutate(
      field = "TRANSFERRED_DELETED",
      description = str_sub(str_extract(.data$ID, "\\(.*$"), 2, -2),
      ID = str_extract(.data$ID, "^(\\d+\\.){3}\\d+")
    ) %>%
    filter(!is.na(.data$ID))
  return(bind_rows(df.standard, df.nonstd))
}


#' @title Download and unzip the BRENDA text file.
#'
#' @description By default, the function downloads a zipped BRENDA text file to
#' a local cache directory, and extracts a `brenda_download.txt` file.
#'
#' @param force.download Boolean value. If TRUE, ignore the cache and force
#' re-download of the BRENDA text file. Default is FALSE.
#'
#' @return A string of the path to the downloaded BRENDA text file.
#' @export
#'
#' @examples \dontrun{DownloadBrenda()}
#'
#' @importFrom utils unzip
#' @importFrom rappdirs user_cache_dir
#' @import BiocFileCache
DownloadBrenda <- function(force.download = FALSE) {
  message(
    "Please read the license agreement in the link below.\n\n",
    "https://www.brenda-enzymes.org/download_brenda_without_registration.php\n"
  )
  cache.dir <- rappdirs::user_cache_dir(appname="brendaDb")
  bfc <- BiocFileCache(cache.dir)

  # Check for brenda zip file
  rid <- bfcquery(bfc, "brenda_zip", "rname")$rid
  if (!length(rid)) {
    # Download the file if it's not in the cache
    message("File not found in cache. Downloading now...")
    brenda.zip.url <- paste0("https://s3.us-east-2.amazonaws.com/",
                             "brendadb-r-package/brenda_download.zip")
    rid <- names(bfcadd(bfc, "brenda_zip", brenda.zip.url, download = FALSE))
  }

  if (bfcneedsupdate(bfc, rid) || force.download) {
    bfcdownload(bfc, rid, ask = FALSE)
  } else {
    message("Found zip file in cache.")
  }

  message("Extracting zip file...")
  unzip(bfcpath(bfc, rids = rid), exdir = cache.dir)

  paste(cache.dir, "brenda_download.txt", sep = "/")
}
y1zhou/brendaDb documentation built on Dec. 12, 2022, 3:43 a.m.