R/dl_drug_es.R

Defines functions load_drug_es dl_drug_es

Documented in dl_drug_es load_drug_es

#' Download drug effect size data.
#'
#' @param files Character vector of file names to download.
#' @param check Check that existing drug effect size data is loadable? Default
#'   is FALSE.
#'
#' @return Downloads drug effect size data into package folder.
#' @export
#' @examples
#'
#' dl_drug_es('example.qs')
#'
dl_drug_es <- function(
    files = c("cmap_es_ind.qs", "l1000_drugs_es.qs", "l1000_genes_es.qs"),
    check = FALSE) {

    timeout <- options()$timeout
    options(timeout = 120)

    # make sure doesn't already exist
    dest_dir <- system.file(package = "dseqr.data", mustWork = TRUE)
    dest_dir <- file.path(dest_dir, "extdata")
    dir.create(dest_dir, showWarnings = FALSE)

    can_load <- c()
    exist_files <- file.exists(file.path(dest_dir, files))
    exist_files <- files[exist_files]
    if (length(exist_files) & check) {
        message(paste(exist_files, collapse = " and "), " already exists.")

        # check that can load
        for (exist_file in exist_files) {
            message("Checking that ", exist_file, " can be loaded.")

            # store file name if can load
            fname <- tryCatch(
                {
                    drug_es <- readRDS(file.path(dest_dir, exist_file))
                    exist_file
                },
                error = function(err) {
                    message("Couldn't load ", exist_file, ". Will download.")
                    unlink(file.path(dest_dir, exist_file))
                    return(NULL)
                }
            )
            can_load <- c(can_load, fname)
        }
        exist_files <- can_load
    }

    need_files <- setdiff(files, can_load)
    if (!length(need_files)) {
        return(NULL)
    }

    for (need_file in need_files) {
        message("downloading: ", need_file)
        dl_url <- paste0("https://s3.us-east-2.amazonaws.com/dseqr/",
                         need_file)
        utils::download.file(dl_url, file.path(dest_dir, need_file))
    }

    options(timeout = timeout)
}


#' Load drug effect size data
#'
#' Downloads requested file if not done so previously.
#'
#' @param file Character vector of drug effect size datasets to load. One of
#'   \code{'cmap_es_ind.qs'} (CMAP02),
#'   \code{'l1000_drugs_es.qs'} (L1000 compounds), or
#'   \code{'l1000_genes_es.qs'} (L1000 genetic perturbations).
#'
#' @return data.frame of expression values. Rows are genes, columns are
#'   perturbations.
#' @export
#'
#' @examples
#'
#' # dummy example (actual files are large)
#' load_drug_es("example.qs")
load_drug_es <- function(
    file = c("cmap_es_ind.qs", "l1000_drugs_es.qs", "l1000_genes_es.qs")) {
    dest_dir <- system.file(package = "dseqr.data", mustWork = TRUE)
    fpath <- file.path(dest_dir, "extdata", file[1])


    drug_es <- NULL

    while (is.null(drug_es)) {
        drug_es <- tryCatch(
            {
                qs::qread(fpath)
            },
            error = function(err) {
                message("Couldn't load ", file)
                unlink(fpath)
                return(NULL)
            }
        )

        if (is.null(drug_es)) {
            tryCatch(dl_drug_es(file),
                     error = function(err) message("Couldn't download", file)
            )
        }
    }

    return(drug_es)
}
hms-dbmi/drugseqr.data documentation built on Oct. 23, 2024, 10:28 p.m.