R/available.genomes.R

Defines functions getBSgenome .getBSgenomeObjectFromInstalledPkgname .getInstalledPkgnameFromGenome .stopOnMoreThanOneAvailablePkg .stopOnAvailablePkg available.genomes installed.genomes .splitNameParts .has_BSgenome_prefix

Documented in available.genomes getBSgenome installed.genomes

### =========================================================================
### available.genomes() and related utilities
### -------------------------------------------------------------------------
###


.BSGENOME_PREFIX <- "BSgenome."

.has_BSgenome_prefix <- function(x)
    substr(x, 1L, nchar(.BSGENOME_PREFIX)) == .BSGENOME_PREFIX


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### installed.genomes() and available.genomes()
###

.splitNameParts <- function(pkgs)
{
    if (length(pkgs) == 0L) {
        ans <- data.frame(pkgname=character(0),
                          organism=factor(),
                          provider=factor(),
                          genome=character(0),
                          masked=logical(0),
                          stringsAsFactors=FALSE)
        return(ans)
    }
    ## Remove ".masked" suffix.
    pkgs2 <- pkgs
    is_masked <- substr(pkgs2, nchar(pkgs2) - 6L, nchar(pkgs2)) == ".masked"
    idx <- which(is_masked)
    pkgs2[idx] <- substr(pkgs2[idx], 1L, nchar(pkgs2)[idx] - 7L)

    parts <- strsplit(pkgs2, ".", fixed=TRUE)
    nparts <- elementNROWS(parts)
    ## Some packages like BSgenome.Tgondii.ToxoDB.7.0 don't follow the rules
    ## and are made of more than 4 parts.
    has_more_than_4_parts <- which(nparts > 4L)
    for (i in has_more_than_4_parts) {
        tmp <- parts[[i]]
        parts[[i]] <- c(tmp[1:3], paste(tmp[4:length(tmp)], collapse="."))
    }
    ## And just in case one day we start to see some packages with less than
    ## 4 parts.
    has_less_than_4_parts <- which(nparts < 4L)
    for (i in has_less_than_4_parts) {
        tmp <- parts[[i]]
        parts[[i]] <- c(tmp, rep.int(NA_character_, 4L - length(tmp)))
    }
    ## From now, any top-level element in 'parts' is guaranteed to be a
    ## character vector of length 4.
    uparts <- unlist(parts)
    idx4 <- (1:length(parts)) * 4L
    data.frame(pkgname=pkgs,
               organism=factor(uparts[idx4 - 2L]),
               provider=factor(uparts[idx4 - 1L]),
               genome=uparts[idx4],
               masked=is_masked,
               stringsAsFactors=FALSE)
}

installed.genomes <- function(splitNameParts=FALSE)
{
    if (!isTRUEorFALSE(splitNameParts))
        stop("'splitNameParts' must be TRUE or FALSE")
    pkgs <- installed.packages()[ , "Package"]
    pkgs <- pkgs[.has_BSgenome_prefix(pkgs)]
    names(pkgs) <- NULL
    if (splitNameParts)
        pkgs <- .splitNameParts(pkgs)
    pkgs
}

available.genomes <- function(splitNameParts=FALSE, type=getOption("pkgType"))
{
    if (!isTRUEorFALSE(splitNameParts))
        stop("'splitNameParts' must be TRUE or FALSE")
    contrib_url <- get_data_annotation_contrib_url(type)
    pkgs <- available.packages(contrib_url)[, "Package"]
    pkgs <- pkgs[.has_BSgenome_prefix(pkgs)]
    names(pkgs) <- NULL
    if (splitNameParts)
        pkgs <- .splitNameParts(pkgs)
    pkgs
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### getBSgenome()
###

.stopOnAvailablePkg <- function(genome, is.source=FALSE)
    stop(genome, " package is not currently installed.\n",
         "  You first need to install it, which you can do with:\n",
         "      library(BiocManager)\n",
         "      install(\"", genome, "\"",
         ifelse(is.source, ", type=\"source\"", ""), ")")

.stopOnMoreThanOneAvailablePkg <- function(genome, masked, is.source=FALSE)
    stop("Looks like there is more than one available ",
         ifelse(masked, "masked ", ""), "BSgenome data package\n",
         "  that matches genome: ",
         genome, "\n",
         "  You first need to choose one (use 'available.genomes(",
         ifelse(is.source, "type=\"source\"", ""), ")' to get\n  the list). ",
         "Then install it, which you can do with:\n",
         "      library(BiocManager)\n",
         "      install(\"<pkgname>\"",
         ifelse(is.source, ", type=\"source\"", ""), ")")

.getInstalledPkgnameFromGenome <- function(genome, masked=FALSE)
{
    ## 1) Search installed packages.
    inst_pkgs <- installed.genomes(splitNameParts=TRUE)
    inst_pkgs <- inst_pkgs[inst_pkgs[ , "masked"] == masked, , drop=FALSE]
    idx <- which(genome == inst_pkgs[ , "genome"])
    if (length(idx) == 1L)
        return(inst_pkgs[idx , "pkgname"])
    if (length(idx) >= 2L)
        stop("Looks like you have more than one installed ",
             ifelse(masked, "masked ", ""), "BSgenome data package\n",
             "  that matches genome: ",
             genome, "\n",
             "  Please disambiguate by specifying the full name of the ",
             "package you want\n  to use (use 'installed.genomes()' to ",
             "get the list).")

    ## 2) Search available packages.
    av_pkgs <- available.genomes(splitNameParts=TRUE)
    av_pkgs <- av_pkgs[av_pkgs[ , "masked"] == masked, ]
    idx <- which(genome == av_pkgs[ , "genome"])
    if (length(idx) == 1L) {
        genome <- av_pkgs[idx , "pkgname"]
        .stopOnAvailablePkg(genome)
    }
    if (length(idx) >= 2L)
        .stopOnMoreThanOneAvailablePkg(genome, masked)

    ## 3) Search available source packages.
    if (getOption("pkgType") != "source") {
        av_srcpkgs <- available.genomes(splitNameParts=TRUE, type="source")
        av_srcpkgs <- av_srcpkgs[av_srcpkgs[ , "masked"] == masked, ]
        idx <- which(genome == av_srcpkgs[ , "genome"])
        if (length(idx) == 1L) {
            genome <- av_srcpkgs[idx , "pkgname"]
            .stopOnAvailablePkg(genome, is.source=TRUE)
        }
        if (length(idx) >= 2L)
            .stopOnMoreThanOneAvailablePkg(genome, masked, is.source=TRUE)
    }

    ## All searches have failed.
    stop("Couldn't find a ",
         ifelse(masked, "masked ", ""), "BSgenome data package ",
         "that matches genome assembly:\n  ",
         genome, "\n\n",
         "  Please use 'available.genomes()' ",
         "(or 'available.genomes(type=\"source\")')\n",
         "  to check the list of BSgenome data packages that are available ",
         "in the\n  Bioconductor repositories for your version of ",
         "R/Bioconductor.\n  If you don't find what you are looking for, ",
         "please use the BSgenomeForge package to forge a ",
         ifelse(masked, "masked ", ""), "BSgenome\n  data package ",
         "for your organism of interest.")
}

.getBSgenomeObjectFromInstalledPkgname <- function(genome, load.only=FALSE)
{
    if (!isTRUEorFALSE(load.only))
        stop("'load.only' must be TRUE or FALSE")
    if (load.only) {
        ## Try to load name space of package 'genome'.
        pkgenvir <- try(loadNamespace(genome), silent=TRUE)
        ok <- !inherits(pkgenvir, "try-error")
    } else {
        ## Try to load and attach package 'genome'.
        ok <- suppressWarnings(require(genome, quietly=TRUE,
                                       character.only=TRUE))
        if (ok)
            pkgenvir <- as.environment(paste("package", genome, sep=":"))
    }
    if (!ok) {
        av_pkgs <- available.genomes()
        if (genome %in% av_pkgs)
            .stopOnAvailablePkg(genome)
        if (getOption("pkgType") != "source") {
            av_srcpkgs <- available.genomes(type="source")
            if (genome %in% av_srcpkgs)
                .stopOnAvailablePkg(genome, is.source=TRUE)
        }
        stop(c("Package ", genome, " is not available.\n  ",
               wmsg("Please use the BSgenomeForge package to forge a ",
                    "BSgenome data package for your organism of interest.")))
    }
    ## Package 'genome' was successfully loaded.
    ans <- try(get(genome, envir=pkgenvir, inherits=FALSE), silent=TRUE)
    if (!is(ans, "BSgenome"))
        stop(genome, " doesn't look like a valid BSgenome data package")
    ans
}

getBSgenome <- function(genome, masked=FALSE, load.only=FALSE)
{
    if (!isTRUEorFALSE(masked))
        stop("'masked' must be TRUE or FALSE")
    if (is(genome, "BSgenome"))
        return(genome)
    if (!isSingleString(genome))
        stop("'genome' must be a BSgenome object, or the full name of an ",
             "installed\n  BSgenome data package, or a short string ",
             "specifying a genome assembly that\n  refers unambiguously",
             "to an installed BSgenome data package")
    if (!.has_BSgenome_prefix(genome)) {
        genome <- .getInstalledPkgnameFromGenome(genome, masked=masked)
    } else if (masked) {
        warning("'masked' is ignored when 'genome' is supplied as ",
                "a full package name")
    }
    .getBSgenomeObjectFromInstalledPkgname(genome, load.only=load.only)
}
Bioconductor/BSgenome documentation built on Oct. 31, 2024, 10:46 p.m.