R/02-getData.R
In Pandora: Retrieve Data using the API of the 'Pandora' Data Platform

Documented in dataOptions filterResourceByName filterValidFileType getData getNrow isOldROnWindows loadData selectSingleFile validateResource

#' Get Data
#'
#' @param name (character) name of a resource, e.g. an entry of the output from
#'  \code{getResources()$name}
#' @param verbose Logical, indicating whether to display processing messages.
#'   If TRUE, messages will be displayed; if FALSE, messages will be suppressed.
#'   Default is TRUE.
#' @param options (list) a list of extra options for \code{read.csv()} or \code{openxlsx::read.xlsx()} and
#'  \code{readxl::read_excel}
#' @inheritParams getResources
#'
#' @return (data.frame) return data from the Pandora API
#' @export
getData <- function(name,
                    repository = "",
                    verbose = TRUE,
                    options = dataOptions()) {
  resource <- try({
    getResources(repository = repository) %>%
      validateResource(repository = repository) %>%
      filterResourceByName(name = name) %>%
      filterValidFileType(name = name) %>%
      selectSingleFile()
  }, silent = TRUE)

  data <- try({
    loadData(
      path = resource[["url"]],
      type = resource[["format"]],
      nrows = options$nrows,
      sep = options$text$sep,
      dec = options$text$dec,
      fileEncoding = options$text$fileEncoding,
      colNames = options$colNames,
      sheet = options$xlsx$sheet,
      verbose = verbose
    )
  }, silent = TRUE)
  
  # catch possible errors directly inside this function
  res <- list()
  
  if (inherits(resource, "try-error")) {
    stop(resource[[1]])
  } else if (length(resource) == 0 && nrow(resource) == 0) {
    stop(sprintf("An error occurred for resource with name '%s'", name))
  }
  
  if (inherits(data, "try-error")) {
    msg <- if (resource[["format"]] == "csv") {
      "Please check dataOptions() for this resource."
    } else {
      ""
    }
    msg <- sprintf("%s for resource with name '%s', %s", data[[1]], name, msg)
    stop(msg)
  } else if (length(data) == 0 && !is.null(attr(data, "error"))) {
    msg <- sprintf("%s for resource with name '%s'", attr(data, "error"), name)
    stop(msg)
  } else if (length(data) > 0 && nrow(data) > 0) {
    res <- data
  } else {
    msg <- sprintf("An error occurred for resource with name '%s'", name)
    stop(msg)
  }
  
  return(res)
}

#' Data Options
#'
#' Set options for \code{utils::read.csv()}, \code{openxlsx::read.xlsx()} or 
#' \code{readxl::read_excel}. Choose delimiter and decimal separator as well
#' as sheetnumbner and number of rows to read.
#'
#' @inheritParams utils::read.csv
#' @inheritParams openxlsx::read.xlsx
#'
#' @return a list of extra options for \code{utils::read.csv()} or \code{openxlsx::read.xlsx()} or
#'  \code{readxl::read_excel}, respectively
#' @export
dataOptions <- function(nrows = NA_integer_,
                        colNames = TRUE,
                        sep = ",",
                        dec = ".",
                        fileEncoding = "",
                        sheet = 1) {
  list(
    nrows = nrows,
    colNames = colNames,
    text = list(sep = sep,
                dec = dec,
                fileEncoding = fileEncoding),
    xlsx = list(sheet = sheet)
  )
}

#' Validate Resource
#'
#' @param resource (data.frame) resources data frame
#' @inheritParams getResources
#'
#' @return (data.frame) resource, or error if empty
validateResource <- function(resource, repository) {
  if (nrow(resource) == 0) {
    stop(sprintf("No resource found for repository '%s'", repository))
  }
  return(resource)
}

#' Filter Resource by Name
#'
#' @param resource (data.frame) resources data frame
#' @param name (character) name of a resource
#'
#' @return (data.frame) filtered resource
filterResourceByName <- function(resource, name) {
  resource <- resource[resource[["name"]] == name, ]
  if (nrow(resource) == 0) {
    stop(sprintf("No resource found with name '%s'", name))
  }
  return(resource)
}
  
#' Filter Resource by Valid File Type
#'
#' @inheritParams filterResourceByName
#'
#' @return (data.frame) filtered resource
filterValidFileType <- function(resource, name) {
  validFileTypes <- config()$fileTypes
  resource <- resource[resource[["format"]] %in% validFileTypes, ]
  if (nrow(resource) == 0) {
    stop(sprintf(
      "No resource found with name '%s' and with valid file type (%s)",
      name,
      paste(validFileTypes, collapse = ", ")
    ))
  }
  return(resource)
}
  
#' Select Single File from Resources
#'
#' @inheritParams filterResourceByName
#'
#' @return (data.frame) selected resource
selectSingleFile <- function(resource) {
  if (nrow(resource) > 1) {
    orderVec <- na.omit(match(config()$fileTypes, resource[["format"]]))
    resource <- resource[orderVec, ]
    resource <- resource[1, ]
  }
  return(resource)
}

#' Load Data
#'
#' @param path path to the file
#' @param type (character) type of file, one of \code{c("xlsx", "xls", "odt", "csv", "txt")}
#' @inheritParams utils::read.csv
#' @inheritParams openxlsx::read.xlsx
#' @inheritParams getData
#'
#' @return (data.frame) data loaded from the file at path
#' @export
loadData <-
  function(path,
           type = c("xlsx", "xls", "odt", "csv", "txt"),
           nrows = NA_integer_,
           sep = ",",
           dec = ".",
           fileEncoding = "",
           colNames = TRUE,
           sheet = 1,
           verbose = TRUE) {
    type <- match.arg(type)
    # empty result if an error occurs
    res <- list()
    
    # if(type == "csv" | type == "txt"){
    #   codepages <- setNames(iconvlist(), iconvlist())
    #   x <- lapply(codepages, function(enc) try(suppressWarnings({read.csv(path,
    #                                                     fileEncoding=enc,
    #                                                     sep = sep, dec = dec,
    #                                                     stringsAsFactors = FALSE,
    #                                                     row.names = NULL,
    #                                                     nrows=3, header=TRUE)}),
    #                                            silent = TRUE)) # you get lots of errors/warning here
    #   x <- x[!sapply(x, function(y) class(y) %in% "try-error")]
    #   maybe_ok <- which(sapply(x, function(y) isTRUE(all.equal(dim(y)[1], c(3)))))
    #   if(length(maybe_ok) > 0){
    #     fileEncoding <- names(maybe_ok[1])
    #   } else {
    #     fileEncoding <- ""
    #   }
    # }
    
    if (fileEncoding == "") {
      fileEncoding <- as.character(guess_encoding(path)[1, 1])
    }
    
    if (type %in% c("csv", "txt")) {
      if (verbose) message(sprintf("Encoding: '%s'.\n", fileEncoding))
      isOldROnWindows()
    }
    
    if (type == "xlsx") {
      xlsSplit <- strsplit(path, split = "\\.")[[1]]
      if (xlsSplit[length(xlsSplit)] == "xls") {
        type <- "xls"
      }
    }
    
    data <- switch(
      type,
      csv = suppressWarnings({
        read.csv(
          path,
          header = colNames,
          sep = sep,
          dec = dec,
          stringsAsFactors = FALSE,
          row.names = NULL,
          fileEncoding = fileEncoding,
          nrows = getNrow(type = type, nrows = nrows)
        )
      }),
      txt = suppressWarnings({
        read.csv(
          path,
          header = colNames,
          sep = sep,
          dec = dec,
          stringsAsFactors = FALSE,
          row.names = NULL,
          fileEncoding = fileEncoding,
          nrows = getNrow(type = type, nrows = nrows)
        )
      }),
      xlsx = read.xlsx(
        path,
        sheet = sheet,
        colNames = colNames,
        rows = getNrow(type = type, nrows = nrows)
      ),
      xls = suppressWarnings({
        readxl::read_excel(
          path,
          sheet = sheet,
          col_names = colNames,
          n_max = getNrow(type = type, nrows = nrows)
        )
      }),
      ods = readODS::read_ods(
        path,
        sheet = sheet,
        col_names = colNames,
        range = getNrow(type = type, nrows = nrows)
      )
    )
    
    if (type %in% c("csv", "txt")) {
      errorInfo <- sprintf("Encoding: '%s', seperator: '%s', dec character: '%s'.", 
                           fileEncoding, sep, dec)
    } else {
      errorInfo <- ""
    }
    
    if (is.null(dim(data))) {
      stop(paste("Could not determine dimensions of data", errorInfo))
    }
    
    if (any(dim(data) == 1)) {
      stop(paste("Number of rows or columns equal to 1.", errorInfo))
    }
    
    if (any(dim(data) == 0)) {
      stop(paste("Number of rows or columns equal to 0", errorInfo))
    }
    
    return(data)
  }

#' get nRow
#'
#' @param type (character) file type
#' @inheritParams utils::read.csv
getNrow <- function(type, nrows = NA_integer_) {
  if (!is.null(nrows) && !is.na(nrows) &&
      is.numeric(nrows) && (nrows > 0) && nrows == round(nrows)) {
    if (type == "xlsx")
      return(1:nrows)
    else
      if (type == "ods")
        return(paste0("A1:C", nrows))
    else
      return(nrows)
  } else {
    if (type %in% c("xlsx", "ods"))
      return(NULL)
    else
      if (type == "xls")
        return(Inf)
    else
      return(-999)
  }
}

#' Is old windows
#' 
#' Checks if package is used with an older R version which possibly leads to encryption errors on Windows.
#' Gives a warning in that case.
#' 
#' @return (logical) TRUE if system is Windows and R version is < 4.2.0
isOldROnWindows <- function() {
  if (Sys.info()["sysname"] == "Windows" && 
      ((as.numeric(R.Version()$major) < 4) ||
       (as.numeric(R.Version()$major) == 4 && as.numeric(R.Version()$minor) < 2))) {
    warning("Please upgrade to R version >= 4.2.0 in order to prevent possible encryption issues when loading text files.")
    return(TRUE)
  } else {
    return(FALSE)
  }
}

Any scripts or data that you put into this service are public.

Pandora documentation built on June 22, 2024, 6:48 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Pandora
Retrieve Data using the API of the 'Pandora' Data Platform

R/02-getData.R
In Pandora: Retrieve Data using the API of the 'Pandora' Data Platform

Defines functions isOldROnWindows getNrow loadData selectSingleFile filterValidFileType filterResourceByName validateResource dataOptions getData

Documented in dataOptions filterResourceByName filterValidFileType getData getNrow isOldROnWindows loadData selectSingleFile validateResource

Try the Pandora package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Pandora Retrieve Data using the API of the 'Pandora' Data Platform

R/02-getData.R In Pandora: Retrieve Data using the API of the 'Pandora' Data Platform

Defines functions isOldROnWindows getNrow loadData selectSingleFile filterValidFileType filterResourceByName validateResource dataOptions getData

Documented in dataOptions filterResourceByName filterValidFileType getData getNrow isOldROnWindows loadData selectSingleFile validateResource

Try the Pandora package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Pandora
Retrieve Data using the API of the 'Pandora' Data Platform

R/02-getData.R
In Pandora: Retrieve Data using the API of the 'Pandora' Data Platform