R/source_XlsxData.R

#' Download an Excel data set
#'
#' \code{source_XlsxData} loads Excel data stored at a URL (both http
#' and https) into R.
#' @param url character string of the Excel files's URL.
#' @param sheet character string of number of representing the sheet in the
#' workbook to return. Only one sheet at a time can currently be returned
#' @param sha1 Character string of the file's SHA-1 hash, generated by
#' \code{source_data}. Note if you are using data stored using Git, this is not
#' the file's commit SHA-1 hash.
#' @param cache logical. Whether or not to cache the data so that it is not
#' downloaded every time the function is called.
#' @param clearCache logical. Whether or not to clear the downloaded data from
#' the cache.
#' @param ... arguments to pass to \code{read.xlsx} from the xlsx package.
#'
#' @return a data frame
#'
#' @seealso \code{read.xlsx}, \link{httr}, \code{\link{source_data}}
#'
#' @export

source_XlsxData <- function(url, sheet, sha1 = NULL, cache = FALSE,
                            clearCache = FALSE, ...)
{
    if (!requireNamespace('xlsx', quietly = TRUE)) {
        stop("xlsx package needed for this function to work. Please install it. \n\nNote: this may involve installing rJava and Java.\n",
             call. = FALSE)
    }
    
    stopifnot(is.character(url), length(url) == 1)

    if (missing(sheet)) {
        sheet = 1
        message('The first sheet will be returned.\n')
    }
    
    if (length(sheet) != 1){
        sheet <- sheet[1]
        warning(
            'Only one sheet can be returned per call. The first sheet will be returned.\n',
            call. = FALSE)
    }

    temp_file <- tempfile()
    on.exit(unlink(temp_file))

    key <- list(url, sheet)
    if (isTRUE(clearCache)){
        Found <- findCache(key = key)
        if (is.null(Found)){
            message('Data not in cache. Nothing to remove.\n')
        }
        else if (!is.null(Found)){
            message('Clearing data from cache.\n')
            file.remove(Found)
        }
    }

    if (isTRUE(cache)){
        data <- loadCache(key)
        if (!is.null(data)){
            message('Loading cached data.\n')
            message('Use clearCache = TRUE if you would like to have different arguments passed to read.xlsx.\n')
            return(data);
        }
        fullData <- download_data_intern(url = url, sha1 = sha1,
                                        temp_file = temp_file)
        if (class(sheet) == 'character'){
            data <- xlsx::read.xlsx(fullData, sheetName = sheet, ...)
        }
        else if (class(sheet) != 'character'){
            data <- xlsx::read.xlsx(fullData, sheetIndex = sheet, ...)
        }
        saveCache(data, key = key)
        data;
    }
    else if (!isTRUE(cache)){
        fullData <- download_data_intern(url = url, sha1 = sha1,
                                        temp_file = temp_file)
        if (class(sheet) == 'character'){
            data <- xlsx::read.xlsx(fullData, sheetName = sheet, ...)
        }
        else if (class(sheet) != 'character'){
            data <- xlsx::read.xlsx(fullData, sheetIndex = sheet, ...)
        }
        return(data)
    }
}
christophergandrud/repmis documentation built on May 13, 2019, 7:03 p.m.