azimuth: A Shiny App Demonstrating a Query-Reference Mapping Algorithm for Single-Cell Data

Documented in ConvertGeneNames GetTranscripts LoadBridgeReference LoadFileInput LoadH5AD LoadH5ADobs LoadReference NNTransform SaveAzimuthReference

#' Converts gene names of query to match type/species of reference names (human
#' or mouse).
#'
#' @param object Object to convert, must contain only RNA counts matrix
#' @param reference.names Gene names of reference
#' @param homolog.table Location of file (or URL) containing table with
#' human/mouse homologies
#' @return query object with converted feature names, likely subsetted
#'
#' @importFrom stringr str_match
#' @importFrom shiny isRunning
#' @useDynLib Azimuth
#' @export
#'
ConvertGeneNames <- function(object, reference.names, homolog.table) {
  uri <- httr::build_url(url = httr::parse_url(url = homolog.table))
  if (grepl(pattern = '^://', x = uri)) {
    if (!file.exists(homolog.table)) {
      stop("Homolog file doesn't exist at path provided: ", homolog.table)
    }
  } else {
    if (!Online(url = homolog.table)) {
      stop("Cannot find the homolog table at the URL given: ", homolog.table)
    }
    homolog.table <- url(description = homolog.table)
    on.exit(expr = {
      close(con = homolog.table)
    })
  }
  linked <- readRDS(file = homolog.table)
  query.names <- rownames(x = object)
  # remove version numbers
  ensembl.id <- '(?:ENSG|ENSMUS)'
  capture.nonversion <- paste0('(', ensembl.id, '.*)\\.(?:.*)')
  pattern <- paste0('(?:', capture.nonversion,')|(.*)')
  query.names <- Filter(
    f = function(x) !is.na(x = x),
    x = as.vector(x = t(x = str_match(string = query.names, pattern = pattern)[, 2:3]))
  )
  # determine idtype and species of query
  # 5000 because sometimes ensembl IDs are mixed with regular gene names
  query.names.sub = sample(
    x = query.names,
    size = min(length(x = query.names), 5000)
  )
  idtype <- names(x = which.max(x = apply(
    X = linked,
    MARGIN = 2,
    FUN = function(col) {
      length(x = intersect(x = col, y = query.names.sub))
    }
  )))
  species <- ifelse(test = length(x = grep(pattern = '\\.mouse', x = idtype)) > 0, 'mouse', 'human')
  idtype <- gsub(pattern = '\\.mouse|\\.human', replacement = '', x = idtype)
  message('detected inputs from ', toupper(x = species), ' with id type ', idtype)
  totalid <- paste0(idtype, '.', species)

  # determine idtype and species of ref
  reference.names.sub <- sample(x = reference.names, size = min(length(x = reference.names), 5000))
  idtype.ref <- names(x = which.max(x = apply(
    X = linked,
    MARGIN = 2,
    FUN = function(col) {
      length(x = intersect(x = col, y = reference.names))
    }
  )))
  species.ref <- ifelse(test = length(x = grep(pattern = '\\.mouse', x = idtype.ref)) > 0, 'mouse', 'human')
  idtype.ref <- gsub(pattern = '\\.mouse|\\.human', replacement = '', x = idtype.ref)
  message('reference rownames detected ', toupper(x = species.ref),' with id type ', idtype.ref)
  totalid.ref <- paste0(idtype.ref, '.', species.ref)

  if (totalid == totalid.ref) {
    return(object)
  } else {
    display.names <- setNames(
      c('ENSEMBL gene', 'ENSEMBL transcript', 'gene name', 'transcript name'),
      nm = c('Gene.stable.ID', 'Transcript.stable.ID','Gene.name','Transcript.name')
    )
    if (isRunning()) {
      showNotification(
        paste0(
          "Converted ",species," ",display.names[idtype]," IDs to ",
          species.ref," ",display.names[idtype.ref]," IDs"
        ),
        duration = 3,
        type = 'warning',
        closeButton = TRUE,
        id = 'no-progress-notification'
      )
    }
    # set up table indexed by query ids (totalid)
    linked.unique <- linked[!duplicated(x = linked[[totalid]]), ]
    new.indices <- which(query.names %in% linked.unique[[totalid]])
    message("Found ", length(x = new.indices), " out of ",
            length(x = query.names), " total inputs in conversion table")
    query.names <- query.names[new.indices]
    rownames(x = linked.unique) <- linked.unique[[totalid]]
    linked.unique <- linked.unique[query.names, ]
    # get converted totalid.ref
    new.names <- linked.unique[, totalid.ref]
    # remove duplicates
    notdup <- !duplicated(x = new.names)
    new.indices <- new.indices[notdup]
    new.names <- new.names[notdup]
    # subset/rename object accordingly
    counts <- GetAssayData(object = object[["RNA"]], slot = "counts")[rownames(x = object)[new.indices], ]
    rownames(x = counts) <- new.names
    reductions <- slot(object = object, name = "reductions")
    object <- CreateSeuratObject(
      counts = counts,
      meta.data = object[[]]
    )
    slot(object = object, name = "reductions") <- reductions
    return(object)
  }
}

ConvertEnsembleToSymbol <- function(
    mat,
    species = c('human', 'mouse')
) {
  species <- match.arg(arg = species)
  if (species == 'human') {
    database <- 'hsapiens_gene_ensembl'
    symbol <- 'hgnc_symbol'

  } else if (species == 'mouse') {
    database <- 'mmusculus_gene_ensembl'
    symbol <- 'mgi_symbol'

  } else {
    stop('species name not found')
  }

  library("biomaRt")
  library("dplyr")

  name_df <- data.frame(gene_id = c(rownames(mat)))
  name_df$orig.id <- name_df$gene_id
  #make this a character, otherwise it will throw errors with left_join
  name_df$gene_id <- as.character(name_df$gene_id)
  # in case it's gencode, this mostly works
  #if ensembl, will leave it alone
  name_df$gene_id <- sub("[.][0-9]*","",name_df$gene_id)
  mart <- useDataset(dataset = database, useMart("ensembl"))
  genes <-  name_df$gene_id
  gene_IDs <- getBM(filters= "ensembl_gene_id",
                    attributes= c("ensembl_gene_id", symbol),
                    values = genes,
                    mart= mart)
  gene.df <- left_join(name_df, gene_IDs, by = c("gene_id"="ensembl_gene_id"))
  rownames(gene.df) <- make.unique(gene.df$orig.id)
  gene.df <- gene.df[rownames(mat),]
  gene.df <-gene.df[gene.df[,symbol] != '',]
  gene.df <- gene.df[ !is.na(gene.df$orig.id),]
  mat.filter <- mat[gene.df$orig.id,]
  rownames(mat.filter) <- make.unique(gene.df[,symbol])
  return(mat.filter)
}

#' Connect to a single-cell HDF5 dataset
#'
#' @param filename Name of on-disk file
#' @param type Type of single-cell dataset to connect as; choose from:
#' \itemize{
#'  \item h5seurat
#' }
#' Leave as \code{NULL} to guess type from file extension
#' @param mode Mode to connect to data as; choose from:
#' \describe{
#'  \item{r}{Open existing dataset in read-only mode}
#'  \item{r+}{Open existing dataset in read/write mode}
#' }
#' @param force Force a connection if validation steps fail; returns a
#' \code{\link[hdf5r]{H5File}} object
#'
#' @return An object of class \code{type}, opened in mode \code{mode}
#'
#' @importFrom hdf5r H5File
#'
#' @export
#'
Connect <- function(
  filename,
  type = NULL,
  mode = c('r', 'r+'),
  force = FALSE
) {
  type <- type %||% FileType(file = filename)
  mode <- match.arg(arg = mode)
  if (!file.exists(filename)) {
    stop("Cannot find ", type, " file ", filename, call. = FALSE)
  }
  return(tryCatch(
    expr = {
      cls <- GetSCDisk(r6class = type)
      cls$new(filename = filename, mode = mode)
    },
    error = function(err) {
      if (!isTRUE(x = force)) {
        stop(err$message, call. = FALSE)
      }
      warning(err$message, call. = FALSE, immediate. = TRUE)
      return(H5File$new(filename = filename, mode = mode))
    }
  ))
}

#' Determine a filetype based on its extension
#'
#' @param file Name of file
#'
#' @return The extension, all lowercase
#'
#' @importFrom tools file_ext
#'
FileType <- function(file) {
  ext <- file_ext(x = file)
  ext <- ifelse(test = nchar(x = ext), yes = ext, no = basename(path = file))
  return(tolower(x = ext))
}

# Return CSS styling for hover box on interactive plots
#
# @param x X hover position (hover$coords_css$x)
# @param y Y hover position (hover$coords_css$y)
#
# @return Returns a string of CSS to pass to style param
#
HoverBoxStyle <- function(x, y) {
  xpad <- 20 # important to avoid collisions between cursor and hover panel
  ypad <- 20
  paste0(
    "position:absolute; background-color:rgba(245, 245, 245, 0.85); ",
    "left:", (x + xpad), "px; top:",
    (y - ypad), "px;",
    "padding: 5px; margin-bottom: 0px;"
  )
}

#' Load file input into a \code{Seurat} object
#'
#' Take a file and load it into a \code{\link[Seurat]{Seurat}} object. Supports
#' a variety of file types and always returns a \code{Seurat} object
#'
#' @details \code{LoadFileInput} supports several file types to be read in as
#' \code{Seurat} objects. File type is determined by extension, matched in a
#' case-insensitive manner See sections below for details about supported
#' filtypes, required extension, and specifics for how data is loaded
#'
#' @param path Path to input data
#'
#' @return A \code{\link[Seurat]{Seurat}} object
#'
#' @importFrom tools file_ext
#' @importFrom SeuratDisk Connect
#' @importFrom SeuratObject CreateSeuratObject Assays GetAssayData
#' DefaultAssay<-
#' @importFrom Seurat Read10X_h5 as.sparse Assays DietSeurat as.Seurat
#'
#' @section 10X H5 File (extension \code{h5}):
#' 10X HDF5 files are supported for all versions of Cell Ranger; data is read
#' in using \code{\link[Seurat]{Read10X_h5}}. \strong{Note}: for multi-modal
#' 10X HDF5 files, only the \emph{first} matrix is read in
#'
#' @section Rds File (extension \code{rds}):
#' Rds files are supported as long as they contain one of the following data
#' types:
#' \itemize{
#'  \item A \code{\link[Seurat]{Seurat}} V3 object
#'  \item An S4 \code{\link[Matrix]{Matrix}} object
#'  \item An S3 \code{\link[base]{matrix}} object
#'  \item A \code{\link[base]{data.frame}} object
#' }
#' For S4 \code{Matrix}, S3 \code{matrix}, and \code{data.frame} objects, a
#' \code{Seurat} object will be made with
#' \code{\link[Seurat]{CreateSeuratObject}} using the default arguments
#'
#' @section h5Seurat File (extension \code{h5seurat}):
#' h5Seurat files and all of their features are fully supported. They are read
#' in via \code{\link[SeuratDisk]{LoadH5Seurat}}. \strong{Note}: only the
#' \dQuote{counts} matrices are read in and only the default assay is kept
#'
#' @inheritSection LoadH5AD AnnData H5AD File (extension \code{h5ad})
#' @export
#'
LoadFileInput <- function(path, bridge = FALSE) {
  # TODO: add support for loom files
  on.exit(expr = gc(verbose = FALSE))
  type <- tolower(x = tools::file_ext(x = path))
  return(switch(
    EXPR = type,
    'h5' = {
      mat <- Read10X_h5(filename = path)
      if (is.list(x = mat)) {
        mat <- mat[[1]]
      }
      object <- CreateSeuratObject(counts = mat, min.cells = 1, min.features = 1)
      if (inherits(x = object[["RNA"]], what = "Assay5")) {
        object[["RNA"]]$data <- object[["RNA"]]$counts
      }
      object
    },
    'rds' = {
      object <- readRDS(file = path)
      if (inherits(x = object, what = c('Matrix', 'matrix', 'data.frame'))) {
        object <- CreateSeuratObject(counts = as.sparse(x = object), min.cells = 1, min.features = 1)
      } else if (inherits(x = object, what = 'Seurat')) {
        if (isTRUE(x = bridge)){
          if (!'ATAC' %in% Assays(object = object)) {
            stop("No ATAC assay provided", call. = FALSE)
          } else if (Seurat:::IsMatrixEmpty(x = GetAssayData(object = object, slot = 'counts', assay = 'ATAC'))) {
            stop("No ATAC counts matrix present", call. = FALSE)
          }
          assay <- "ATAC"
        } else{
          if (!'RNA' %in% Assays(object = object)) {
            stop("No RNA assay provided", call. = FALSE)
          } else if (Seurat:::IsMatrixEmpty(x = GetAssayData(object = object, slot = 'counts', assay = 'RNA'))) {
            stop("No RNA counts matrix present", call. = FALSE)
          }
          assay <- "RNA"
        }
        object <- tryCatch({
          CreateSeuratObject(
            counts = GetAssayData(object = object[[assay]], slot = "counts"),
            min.cells = 1,
            min.features = 1,
            meta.data = object[[]]
          )
        }, error = function(e){
          object <- UpdateSeuratObject(object)
          CreateSeuratObject(
            counts = GetAssayData(object = object[[assay]], slot = "counts"),
            min.cells = 1,
            min.features = 1,
            meta.data = object[[]]
          )
        })
        if (inherits(x = object[["RNA"]], what = "Assay5")) {
          object[["RNA"]]$data <- object[["RNA"]]$counts
        }
      } else {
        stop("The RDS file must be a Seurat object", call. = FALSE)
      }
      object
    },
    'h5ad' = LoadH5AD(path = path),
    'h5seurat' = {
      if (!requireNamespace("SeuratDisk", quietly = TRUE)) {
        stop("Loading h5Seurat files requires SeuratDisk", call. = FALSE)
      }
      hfile <- suppressWarnings(expr = SeuratDisk::Connect(filename = path))
      on.exit(expr = hfile$close_all())
      if (isTRUE(x = bridge)){
        if (!'ATAC' %in% names(x = hfile[['assays']])) {
          stop("Cannot find the ATAC assay in this h5Seurat file", call. = FALSE)
        } else if (!'counts' %in% names(x = hfile[['assays/ATAC']])) {
          stop("No ATAC counts matrix provided", call. = FALSE)
        }
        object <- as.Seurat(
          x = hfile,
          assays = list(ATAC = 'counts'),
          reductions = FALSE,
          graphs = FALSE,
          images = FALSE
        )
        assay <- 'ATAC'
      } else {
        if (!'RNA' %in% names(x = hfile[['assays']])) {
          stop("Cannot find the RNA assay in this h5Seurat file", call. = FALSE)
        } else if (!'counts' %in% names(x = hfile[['assays/RNA']])) {
          stop("No RNA counts matrix provided", call. = FALSE)
        }
        object <- as.Seurat(
          x = hfile,
          assays = list(RNA = 'counts'),
          reductions = FALSE,
          graphs = FALSE,
          images = FALSE
        )
        assay <- 'RNA'
      }
      if (inherits(x = object[[assay]], what = "Assay5")) {
        if (length(Layers(object, search = "counts")) > 1) {
          object[[assay]] <- JoinLayers(object[[assay]], 
                                        layers = "counts", new = "counts")
        }
      }
      object <- CreateSeuratObject(
        counts = GetAssayData(object = object[[assay]], slot = "counts"),
        min.cells = 1,
        min.features = 1,
        meta.data = object[[]]
      )
      if (inherits(x = object[[assay]], what = "Assay5")) {
        object[[assay]]$data <- object[[assay]]$counts
      }
    },
    stop("Unknown file type: ", type, call. = FALSE)
  ))
}

#' Load a diet H5AD file
#'
#' Read in only the counts matrix and (if present) metadata of an H5AD file and
#' return a \code{Seurat} object
#'
#' @inheritParams LoadFileInput
#'
#' @return A \code{Seurat} object
#'
#' @importFrom hdf5r H5File h5attr
#' @importFrom SeuratObject AddMetaData CreateSeuratObject
#'
#' @keywords internal
#'
#' @section AnnData H5AD File (extension \code{h5ad}):
#' Only H5AD files from AnnData v0.7 or higher are supported. Data is read from
#' the H5AD file in the following manner
#' \itemize{
#'  \item The counts matrix is read from \dQuote{/raw/X}; if \dQuote{/raw/X} is
#'  not present, the matrix is read from \dQuote{/X}
#'  \item Feature names are read from feature-level metadata. Feature level
#'  metadata must be an HDF5 group, HDF5 compound datasets are \strong{not}
#'  supported. If counts are read from \code{/raw/X}, features names are looked
#'  for in \dQuote{/raw/var}; if counts are read from \dQuote{/X}, features
#'  names are looked for in \dQuote{/var}. In both cases, feature names are read
#'  from the dataset specified by the \dQuote{_index} attribute, \dQuote{_index}
#'  dataset, or \dQuote{index} dataset, in that order
#'  \item Cell names are read from cell-level metadata. Cell-level metadata must
#'  be an HDF5 group, HDF5 compound datasets are \strong{not} supported.
#'  Cell-level metadata is read from \dQuote{/obs}. Cell names are read from the
#'  dataset specified by the \dQuote{_index} attribute, \dQuote{_index} dataset,
#'  or \dQuote{index} dataset, in that order
#'  \item Cell-level metadata is read from the \dQuote{/obs} dataset. Columns
#'  will be returned in the same order as in the \dQuote{column-order}, if
#'  present, or in alphabetical order. If a dataset named \dQuote{__categories}
#'  is present, then all datasets in \dQuote{__categories} will serve as factor
#'  levels for datasets present in \dQuote{/obs} with the same name (eg. a
#'  dataset named \dQuote{/obs/__categories/leiden} will serve as the levels for
#'  \dQuote{/obs/leiden}). Row names will be set as cell names as described
#'  above. All datasets in \dQuote{/obs} will be loaded except for
#'  \dQuote{__categories} and the cell names dataset
#' }
#'
LoadH5AD <- function(path) {
  if (!requireNamespace("hdf5r", quietly = TRUE)) {
    stop("Loading H5AD files requires hdf5r", call. = FALSE)
  }
  adata <- hdf5r::H5File$new(filename = path, mode = 'r')
  on.exit(expr = adata$close_all())
  Exists <- function(name) {
    name <- unlist(x = strsplit(x = name[1], split = '/', fixed = TRUE))
    hpath <- character(length = 1L)
    exists <- TRUE
    for (i in seq_along(along.with = name)) {
      hpath <- paste(hpath, name[i], sep = '/')
      exists <- adata$exists(name = hpath)
      if (isFALSE(x = exists)) {
        break
      }
    }
    return(exists)
  }
  GetIndex <- function(md) {
    return(
      if (adata[[md]]$attr_exists(attr_name = '_index')) {
        h5attr(x = adata[[md]], which = '_index')
      } else if (adata[[md]]$exists(name = '_index')) {
        '_index'
      } else if (adata[[md]]$exists(name = 'index')) {
        'index'
      } else {
        stop("Cannot find the rownames for '", md, "'", call. = FALSE)
      }
    )
  }
  GetRowNames <- function(md) {
    return(adata[[md]][[GetIndex(md = md)]][])
  }
  LoadMetadata <- function(md) {
    factor.cols <- if (adata[[md]]$exists(name = '__categories')) {
      names(x = adata[[md]][['__categories']])
    } else {
      NULL
    }
    index <- GetIndex(md = md)
    col.names <- names(x = adata[[md]])
    if (adata[[md]]$attr_exists(attr_name = 'column-order')) {
      tryCatch(
        expr = {
          col.order <- hdf5r::h5attr(x = adata[[md]], which = 'column-order')
          col.names <- c(
            intersect(x = col.order, y = col.names),
            setdiff(x = col.names, y = col.order)
          )
        },
        error = function(...) {
          return(invisible(x = NULL))
        }
      )
    }
    col.names <- col.names[!col.names %in% c('__categories', index)]
    df <- sapply(
      X = col.names,
      FUN = function(i) {
        x <- adata[[md]][[i]][]
        if (i %in% factor.cols) {
          x <- factor(x = x, levels = adata[[md]][['__categories']][[i]][])
        }
        return(x)
      },
      simplify = FALSE,
      USE.NAMES = TRUE
    )
    return(as.data.frame(x = df, row.names = GetRowNames(md = md)))
  }
  if (Exists(name = 'raw/X')) {
    md <- 'raw/var'
    x <- adata[['raw/X']]
  } else if (Exists(name = 'X')) {
    md <- 'var'
    x <- adata[['X']]
  } else {
    stop("Cannot find counts matrix", call. = FALSE)
  }
  # check different possible attributes to try and get matrix shape
  if (isTRUE(x$attr_exists(attr_name = 'h5sparse_shape'))) {
    mtx.shape <- h5attr(x, 'h5sparse_shape')
  } else if (isTRUE(x$attr_exists(attr_name = 'shape'))) {
    mtx.shape <- h5attr(x, 'shape')
  } else {
    warning("Could not determine matrix shape")
  }
  # check different attributes to try and deduce matrix type
  if (isTRUE(x$attr_exists(attr_name = 'h5sparse_format'))) {
    mtx.type <- h5attr(x, 'h5sparse_format')
  } else if (isTRUE(x$attr_exists(attr_name = 'encoding-type'))) {
    mtx.type <- substr(h5attr(x, 'encoding-type'), 0, 3)
  } else {
    mtx.type <- 'csr' # assume matrix is csr
    warning("Could not determine matrix format")
  }
  if (mtx.type != 'csr') {
    p <- as.integer(x[['indptr']][])
    i <- as.integer(x[['indices']][])
    data <- as.double(x[['data']][])
    # csc -> csr
    converted.mtx <- csc_tocsr(
      n_row = as.integer(mtx.shape[1]),
      n_col = as.integer(mtx.shape[2]),
      Ap = p,
      Ai = i,
      Ax = data
    )
    # csr -> dgC
    counts <- new(
      Class = 'dgCMatrix',
      p = converted.mtx$p,
      i = converted.mtx$i,
      x = converted.mtx$x,
      Dim = c(mtx.shape[2], mtx.shape[1])
    )
  } else {
    # x must be a CSR matrix
    counts <- as.matrix(x = x)
  }
  metadata <- LoadMetadata(md = 'obs') # gather additional cell-level metadata
  rownames <- GetRowNames(md = md)
  colnames <- rownames(metadata)
  rownames(x = counts) <- rownames
  colnames(x = counts) <- colnames
  options(Seurat.object.assay.calcn = TRUE)
  object <- CreateSeuratObject(counts = counts)
  if (ncol(x = metadata)) {
    object <- AddMetaData(object = object, metadata = metadata)
  }
  object <- subset(object, subset = nCount_RNA > 0)
  return(object)
}


#' Load obs from a H5AD file
#'
#' Read in only the metadata of an H5AD file and
#' return a data.frame object
#' @section AnnData H5AD File (extension \code{h5ad}):
#' @importFrom rhdf5 h5read
#' @export
#'
LoadH5ADobs <- function(path, cell.groups = NULL) {
  cell.var <- rhdf5::h5readAttributes(file = path, name = 'obs')$`_index`
  cells.index <- h5read(file = path, name = 'obs')[[cell.var]]
  suppressWarnings(expr = hfile <- Connect(filename = path, force = TRUE))
  hfile_obs <- hfile[['obs']]
  #cell.groups <- cell.groups %||% intersect(names(hfile_obs), c('_index', 'cell', 'cell_id'))
  obs_groups <- setdiff(names(hfile_obs), c('__categories',  c('_index', 'cell', 'cell_id')))
  matrix <- as.data.frame(
    x = matrix(data = NA,
               nrow = length(cells.index),
               ncol = length(obs_groups))
  )
  colnames(matrix) <- obs_groups
  rownames(matrix) <- cells.index
  if ('__categories' %in% names(x = hfile_obs)) {
    hfile_cate <- hfile_obs[['__categories']]
    for (i in seq_along(obs_groups)) {
      obs.i <- obs_groups[i]
      obs_value_i <- hfile_obs[[obs.i]][]
      if (obs.i %in% names(x = hfile_cate)){
        obs_value_i <- factor(x = obs_value_i, labels =  hfile_cate[[obs.i]][])
      }
      matrix[,i] <- obs_value_i
    }
  } else {
    for (i in seq_along(obs_groups)) {
      obs.i <- obs_groups[i]
      if (all(names(hfile_obs[[obs.i]]) == c("categories", "codes"))) {
        if (
          length(unique(hfile_obs[[obs.i]][['codes']][])) == length(hfile_obs[[obs.i]][['categories']][])
        ) {
          obs_value_i <- factor(
            x = hfile_obs[[obs.i]][['codes']][],
            labels =  hfile_obs[[obs.i]][['categories']][]
          )
        } else {
          obs_value_i <- hfile_obs[[obs.i]][['codes']][]
        }
      } else {
        # list of list, skip for now
        obs_value_i <- tryCatch(expr = hfile_obs[[obs.i]][],
                                error = function(e) {
          return('unknown')
        })
      }
      matrix[,i] <- obs_value_i
    }
  }
  hfile$close_all()
  return(matrix)
}


#' Load the reference RDS files
#'
#' Read in a reference \code{\link[Seurat]{Seurat}} object and annoy index. This
#' function can read either from URLs or a file path. In order to read properly,
#' there must be the following files:
#' \itemize{
#'  \item \dQuote{ref.Rds} for the downsampled reference \code{Seurat}
#'  object (for mapping)
#'  \item \dQuote{idx.annoy} for the nearest-neighbor index object
#' }
#'
#' @param path Path or URL to the two RDS files
#' @param seconds Timeout to check for URLs in seconds
#'
#' @return A list with two entries:
#' \describe{
#'  \item{\code{map}}{
#'   The downsampled reference \code{\link[Seurat]{Seurat}}
#'   object (for mapping)
#'  }
#'  \item{\code{plot}}{The reference \code{Seurat} object (for plotting)}
#' }
#'
#' @importFrom SeuratObject Idents<- Loadings<- 
#' @importFrom Seurat LoadAnnoyIndex Loadings
#' @importFrom httr build_url parse_url status_code GET timeout
#' @importFrom utils download.file
#' @importFrom Matrix sparseMatrix
#'
#' @export
#' @examples
#' \dontrun{
#' # Load from a URL
#' ref <- LoadReference("https://seurat.nygenome.org/references/pbmc")
#' # Load from a directory
#' ref2 <- LoadReference("/var/www/html")
#' }
#'
LoadReference <- function(path, seconds = 10L) {
  op <- options(Seurat.object.assay.calcn = FALSE)
  on.exit(expr = options(op), add = TRUE)
  ref.names <- list(
    map = 'ref.Rds',
    ann = 'idx.annoy'
  )
  if (substr(x = path, start = nchar(x = path), stop = nchar(x = path)) == '/') {
    path <- substr(x = path, start = 1, stop = nchar(x = path) - 1)
  }
  uri <- httr::build_url(url = httr::parse_url(url = path))
  if (grepl(pattern = '^://', x = uri) | grepl(pattern = '^[a-zA-Z]{1}://', x = uri)) {
    if (!dir.exists(paths = path)) {
      stop("Cannot find directory ", path, call. = FALSE)
    }
    mapref <- file.path(path, ref.names$map)
    annref <- file.path(path, ref.names$ann)
    exists <- file.exists(c(mapref, annref))
    if (!all(exists)) {
      stop(
        "Missing the following files from the directory provided: ",
        Oxford(unlist(x = ref.names)[!exists], join = 'and')
      )
    }
  } else {
    ref.uris <- paste(uri, ref.names, sep = '/')
    names(x = ref.uris) <- names(x = ref.names)
    online <- vapply(
      X = ref.uris,
      FUN = Online,
      FUN.VALUE = logical(length = 1L),
      USE.NAMES = FALSE
    )
    if (!all(online)) {
      stop(
        "Cannot find the following files at the site given: ",
        Oxford(unlist(x = ref.names)[!online], join = 'and')
      )
    }
    mapref <- url(description = ref.uris[['map']])
    annref <- tempfile()
    download.file(url = ref.uris[['ann']], destfile = annref, quiet = TRUE)
    on.exit(expr = {
      close(con = mapref)
      unlink(x = annref)
    })
  }
  # Load the map reference
  map <- readRDS(file = mapref)

  # handle new parameters in uwot models beginning in v0.1.13
  if (!"num_precomputed_nns" %in% names(Misc(map[["refUMAP"]])$model)) {
    Misc(map[["refUMAP"]], slot="model")$num_precomputed_nns <- 1
  }

  # Load the annoy index into the Neighbor object in the neighbors slot
  map[["refdr.annoy.neighbors"]] <- LoadAnnoyIndex(
    object = map[["refdr.annoy.neighbors"]],
    file = annref
  )
  # Validate that reference contains required dims
  if (ncol(x = map[["refDR"]]) < getOption(x = "Azimuth.map.ndims")) {
    stop("Provided reference doesn't contain at least ",
         getOption(x = "Azimuth.map.ndims"), " dimensions. Please either
         regenerate reference with requested dimensionality or adjust ",
         "the Azimuth.map.ndims option.")
  }
  # Fix colnames of 'feature.loadings' in reference 
  key.pattern = "^[^_]*_"
  new.colnames <- gsub(pattern = key.pattern, 
                       replacement = Key(map[["refDR"]]), 
                       x = colnames(Loadings(
                         object = map[["refDR"]],
                         projected = FALSE)))
  colnames(Loadings(object = map[["refDR"]], 
                    projected = FALSE)) <- new.colnames
  # Create plotref
  ad <- Tool(object = map, slot = "AzimuthReference")
  plotref.dr <- GetPlotRef(object = ad)
  cm <- sparseMatrix(
    i = 1, j = 1, x = 0, dims = c(1, nrow(x = plotref.dr)),
    dimnames = list("placeholder", Cells(x = plotref.dr))
  )
  op <- options(Seurat.object.assay.version = "v3")
  on.exit(expr = options(op), add = TRUE)
  plot <- CreateSeuratObject(counts = cm)
  plot[["refUMAP"]] <- plotref.dr
  DefaultAssay(plot[["refUMAP"]]) <- DefaultAssay(plot)
  plot <- AddMetaData(object = plot, metadata = Misc(object = plotref.dr, slot = "plot.metadata"))
  gc(verbose = FALSE)
  return(list(
    map = map,
    plot = plot
  ))
}

#' Load the extended reference RDS file for bridge integration
#'
#' Read in a precomputed extended reference. This function can
#' read either from URLs or a file path. The function looks for a file 
#' called ext.Rds for the extended reference \code{Seurat} object 
#'
#' @param path Path or URL to the RDS file
#' @param seconds Timeout to check for URLs in seconds
#'
#' @return A list with two entries:
#' \describe{
#'  \item{\code{map}}{
#'   The extended reference \code{\link[Seurat]{Seurat}}
#'   object 
#'  }
#'  \item{\code{plot}}{The reference \code{Seurat} object (for plotting)}
#' }
#'
#' @importFrom SeuratObject Idents<- RenameAssays Loadings<- 
#' @importFrom Seurat LoadAnnoyIndex Loadings
#' @importFrom httr build_url parse_url status_code GET timeout
#' @importFrom utils download.file
#' @importFrom Matrix sparseMatrix
#'
#' @export
#' @examples
#' \dontrun{
#' # Load from a URL
#' ref <- LoadBridgeReference("https://seurat.nygenome.org/references/pbmc")
#' # Load a file from the path to a directory 
#' ref2 <- LoadBridgeReference("path/")
#' # Load a file directly
#' ref3 <- LoadBridgeReference("ext.Rds")
#' }

LoadBridgeReference<- function(path, seconds = 10L) {
  op <- options(Seurat.object.assay.calcn = FALSE)
  on.exit(expr = options(op), add = TRUE)
  ref.names <- list(
    map = 'ext.Rds'
  )
  if (substr(x = path, start = nchar(x = path), stop = nchar(x = path)) == '/') {
    path <- substr(x = path, start = 1, stop = nchar(x = path) - 1)
  }
  uri <- httr::build_url(url = httr::parse_url(url = path))
  if (grepl(pattern = '^://', x = uri) | grepl(pattern = '^[a-zA-Z]{1}://', x = uri)) {
    if (file.exists(path) && !dir.exists(path)){
      extref <- path
    } else if (!dir.exists(paths = path)) {
        stop("Cannot find directory ", path, call. = FALSE)
    } else {
      extref <- file.path(path, ref.names$map)
    }
    exists <- file.exists(c(extref))
    if (!all(exists)) {
      stop(
        "Missing the following files from the directory provided: ",
        Oxford(unlist(x = ref.names)[!exists], join = 'and')
      )
    }
  } else {
    ref.uris <- paste(uri, ref.names, sep = '/')
    names(x = ref.uris) <- names(x = ref.names)
    online <- vapply(
      X = ref.uris,
      FUN = Online,
      FUN.VALUE = logical(length = 1L),
      USE.NAMES = FALSE
    )
    if (!all(online)) {
      stop(
        "Cannot find the following files at the site given: ",
        Oxford(unlist(x = ref.names)[!online], join = 'and')
      )
    }
    #mapref <- url(description = ref.uris[['map']])
    on.exit(expr = {
      close(con = extref)
      unlink(x = annref)
    })
  }
  # Load the map reference
  map <- readRDS(file = extref)
  # handle new parameters in uwot models beginning in v0.1.13
  if (!"num_precomputed_nns" %in% names(Misc(map[["refUMAP"]])$model)) {
    Misc(map[["refUMAP"]], slot="model")$num_precomputed_nns <- 1
  }
  
  # Validate that reference contains required dims
  if (ncol(x = map[["refDR"]]) < getOption(x = "Azimuth.map.ndims")) {
    stop("Provided reference doesn't contain at least ",
         getOption(x = "Azimuth.map.ndims"), " dimensions. Please either
         regenerate reference with requested dimensionality or adjust ",
         "the Azimuth.map.ndims option.")
  }
  key.pattern = "^[^_]*_"
  new.colnames <- gsub(pattern = key.pattern, 
                       replacement = Key(map[["refDR"]]), 
                       x = colnames(Loadings(
                         object = map[["refDR"]],
                         projected = FALSE)))
  colnames(Loadings(object = map[["refDR"]], 
                    projected = FALSE)) <- new.colnames
  # Create plotref
  ad <- Tool(object = map, slot = "AzimuthReference")
  plotref.dr <- GetPlotRef(object = ad)
  cm <- sparseMatrix(
    i = 1, j = 1, x = 0, dims = c(1, nrow(x = plotref.dr)),
    dimnames = list("placeholder", Cells(x = plotref.dr))
  )
  op <- options(Seurat.object.assay.version = "v3")
  on.exit(expr = options(op), add = TRUE)
  plot <- CreateSeuratObject(counts = cm)
  plot[["refUMAP"]] <- plotref.dr
  plot <- AddMetaData(object = plot, metadata = Misc(object = plotref.dr, slot = "plot.metadata"))
  gc(verbose = FALSE)
  return(list(
    map = map,
    plot = plot
  ))
}



#' Save \code{Azimuth} references and neighbors index to same folder
#'
#' @param object An \code{\link{Azimuth}} reference
#' @param file Path to save \code{Azimuth} reference to; defaults to
#' \code{file.path(getwd(), "azimuth_reference/"))}
#' @inheritDotParams base::saveRDS
#'
#' @return Invisibly returns \code{file}
#'
#' @export
#' @seealso \code{\link{saveRDS}()} \code{\link{readRDS}()}
#'
#'
#' @examples
#' # Make Azimuth Reference object
#' obj.azimuth <- AzimuthReference(object)
#' 
#' # Save 
#' SaveAzimuthReference(object = obj.azimuth, folder = "azimuth_reference")
#' 
#' # Run Azimuth
#' 
#' query <- RunAzimuth(query = query, 
#'                     reference = "azimuth_reference", 
#'                     ...)
#' 
#'
SaveAzimuthReference <- function(
    object = NULL,
    folder = NULL
) {
  if (is.null(Tool(object, "AzimuthReference"))){
    stop("The object is not an AzimuthReference object.", 
         "Please run AzimuthReference() and try again.")
  }
  folder <- folder %||% file.path(getwd(), "azimuth_reference/")
  base::saveRDS(object = object, file = paste0(folder, "ref.Rds"), compress=F)
  SaveAnnoyIndex(object = object[["refdr.annoy.neighbors"]], file = paste0(folder, "idx.annoy"))
  message("Saved 'ref.Rds' and 'idx.annoy' in ", folder, "folder")
  return(invisible(x = folder))
}



#' Transform an NN index
#'
#' @param object Seurat object
#' @param meta.data Metadata
#' @param neighbor.slot Name of Neighbor slot
#' @param key Column of metadata to use
#'
#' @return \code{object} with transfomed neighbor.slot
#'
#' @importFrom SeuratObject Indices
#'
#' @keywords internal
#'
NNTransform <- function(
  object,
  meta.data,
  neighbor.slot = "query_ref.nn",
  key = 'ori.index'
) {
  on.exit(expr = gc(verbose = FALSE))
  ind <- Indices(object[[neighbor.slot]])
  ori.index <- t(x = sapply(
    X = 1:nrow(x = ind),
    FUN = function(i) {
      return(meta.data[ind[i, ], key])
    }
  ))
  rownames(x = ori.index) <- rownames(x = ind)
  slot(object = object[[neighbor.slot]], name = "nn.idx") <- ori.index
  return(object)
}

# Check if file is available at given URL
#
# @param url URL to file
# @param strict Ensure http code is 200
#
# @return Boolean indicating file presence
#
Online <- function(url, strict = FALSE) {
  if (isTRUE(x = strict)) {
    code <- 200L
    comp <- identical
  } else {
    code <- 404L
    comp <- Negate(f = identical)
  }
  return(comp(
    x = httr::status_code(x = httr::GET(
      url = url,
      httr::timeout(seconds = getOption('timeout')
      ))),
    y = code
  ))
}

# Make An English List
#
# Joins items together to make an English list; uses the Oxford comma for the
# last item in the list.
#
#' @inheritParams base::paste
# @param join either \dQuote{and} or \dQuote{or}
#
# @return A character vector of the values, joined together with commas and
# \code{join}
#
#' @keywords internal
#
# @examples
# \donttest{
# Oxford("red")
# Oxford("red", "blue")
# Oxford("red", "green", "blue")
# }
#
Oxford <- function(..., join = c('and', 'or')) {
  join <- match.arg(arg = join)
  args <- as.character(x = c(...))
  args <- Filter(f = nchar, x = args)
  if (length(x = args) == 1) {
    return(args)
  } else if (length(x = args) == 2) {
    return(paste(args, collapse = paste0(' ', join, ' ')))
  }
  return(paste0(
    paste(args[1:(length(x = args) - 1)], collapse = ', '),
    paste0(', ', join, ' '),
    args[length(x = args)]
  ))
}

# Determine if there are a prohibitive # of annotations for legend
OversizedLegend <- function(annotation.list) {
  return(length(x = unique(x = as.vector(x = annotation.list))) > 50)
}

# Toggle demo button enable/disable
#
# @param action Whether to enable or disable the buttons
# @param demos data.frame containing demo dataset name and file paht
#
# @return No return value
#
ToggleDemos <- function(action = c("enable", "disable"), demos = NULL) {
  if (!is.null(x = demos)) {
    if (action == "enable") {
      for (i in 1:nrow(x = demos)) {
        enable(id = paste0("triggerdemo", i))
      }
    }
   if (action == "disable") {
     for (i in 1:nrow(x = demos)) {
       disable(id = paste0("triggerdemo", i))
     }
   }
  }
}

# Theme for the plot on welcome page
#
WelcomePlot <- function(...) {
  welcomeplot.theme <- theme(
    axis.line = element_blank(), axis.ticks = element_blank(),
    axis.text.x = element_blank(), axis.text.y = element_blank(),
    axis.title.x = element_blank(), axis.title.y = element_blank(),
    legend.position = "none", plot.title = element_blank(),
    # if we want backgroundless... (also replace 'box' with 'fluidRow' in UI)
    panel.background = element_rect(color = '#ecf0f5', fill = '#ecf0f5'),
    plot.background = element_rect(color = '#ecf0f5', fill = '#ecf0f5')
  )
  return(welcomeplot.theme)
}



############### Overlap Functionality ###############
# QC For Overlap between multiomic bridge and atac query 
#
# @param query_assay 
# @param multiome_atac 
#
# @return No return value
#

OverlapDistPlot <- function(query_assay, multiome){
  atac_peaks <- OverlapQC(query = query_assay, subject = multiome)
  d <- density(atac_peaks$perc_overlap)
  plot(d, xlab='Percentage of Overlap', main = 'Distribution of Overlap Percentages')
}

# Calculate Overlap percentage for dataframe of overlap info per peak 
#
# @param atac_peaks dataframe with coordinates for each peak and overlap 
#
# @return Percentage of Overlap 
#
PercOverlap <- function(overlap_df){
  # if no overlap  
  len_overlap <- overlap_df$o_end - overlap_df$o_start
  len_q <- overlap_df$end - overlap_df$start
  perc <- (len_overlap/len_q)
  return(perc)
}

# Calculate Create dataframe with info for each peaks's overlap in multiome 
#
# @param o_hits Iranges object of overlapping hits 
# @param query 
# @param subject multi[["ATAC"]] for example
#
# @return Percentage of Overlap 
#
OverlapQC <- function(query, subject) {
  o_hits <- findOverlaps(query, subject)
  query_inds <- queryHits(o_hits)
  subject_inds <- subjectHits(o_hits)
  overlap_df <- as.data.table(GetAssayData(query, slot = "ranges")[query_inds,])
  subject_peaks <- as.data.table(GetAssayData(subject, assay = "query", slot = "ranges")[subject_inds,])
  overlap_df$o_start <- mapply(max, overlap_df$start, subject_peaks$start)
  overlap_df$o_end <- mapply(min, overlap_df$end, subject_peaks$end)
  overlap_df$perc_overlap <- PercOverlap(overlap_df)
  overlap_df
}

OverlapTotal <- function(query, subject){ 
  overlap_df <- OverlapQC(query, subject)
  q_width <- sum(overlap_df$width) # but there will be repeats in this 
  o_width <- sum(overlap_df$o_end - overlap_df$o_start)
  amount_covered <- (o_width/q_width) * 100
  return(amount_covered)
}

PeakJaccard <- function(query, subject) {
  overlap_df <- OverlapQC(query, subject)
  intersection = sum(overlap_df$o_end - overlap_df$o_start) # this is the total overlap 
  total_query_width = sum(width(query@ranges))
  total_subject_width = sum(width(subject@ranges))
  union = total_query_width + total_subject_width - intersection
  return ((intersection/union) * 100)
}

# Requantify atac peaks to either multiomic peaks or to genes 
#
# @param o_hits Iranges object of overlapping hits (Should use same assay as assay for requantification)
# @param ATAC chromatin assay or Seurat Object
# @param subject ATAC assay from Bridge or Transcripts dataframe 
# @param assay assay to use in requantifying peaks to genes (original peaks "peak.orig" or requantified peaks "ATAC")
# @param verbose
#
# @return Percentage of Overlap 
#
RequantifyPeaks <- function(
    #o_hits, 
    atac, 
    subject,
    assay = "peak.orig",
    verbose = TRUE){
  # Query peaks that have overlap w/ multiome peaks
  if (inherits(x = atac, what = "ChromatinAssay")){
    o_hits <- findOverlaps(atac, subject[["ATAC"]])
    atac <- GetAssayData(atac, assay = "ATAC", slot = "counts")
    atac_inds <- queryHits(o_hits)
    atac_subset <- atac[atac_inds, ]
    new_names <- rownames(subject[["ATAC"]])[subjectHits(o_hits)]
    if (verbose){
      message("Requantifying query peaks to match multiome")
    }
    # Reassign query row names
    row.names(atac_subset) <- new_names
    # Merge duplicates
    row.names <- row.names(atac_subset)
    model.matrix <- sparse.model.matrix(
      object = ~ 0 + row.names
    )  
    colnames(x = model.matrix) <- sapply(
      X = colnames(x = model.matrix),
      FUN = function(name) {
        name <- gsub(pattern = "row.names", replacement = "", x = name)
        return(paste0(rev(x = unlist(x = strsplit(x = name, split = ":"))),
                      collapse = "__"
        ))
      }
    )
    # Multiply matrices to combine counts
    atac_final <- as((Matrix::t(model.matrix) %*% atac_subset), "dgCMatrix")
  } else if (inherits(x = atac, what = "Seurat")){ 
    o_hits <- suppressWarnings(findOverlaps(atac[[assay]], subject))
    atac_inds <- queryHits(o_hits)
    DefaultAssay(atac) <- assay
    print(atac)
    atac_data <- GetAssayData(atac, assay = assay, slot = "counts")
    atac_final <- atac_data[atac_inds, ]
    new_names <- GRangesToString(subject[subjectHits(o_hits)])
    if (verbose){
      message("Requantifying query peaks to genes")
    }
    # Reassign query row names
    rownames(atac_final) <- new_names
    # Merge duplicates
    atac_final <- rowsum(atac_final, row.names(atac_final), reorder=FALSE)  
    atac_final <- Matrix::Matrix(atac_final, sparse = TRUE) 
  } else{
    stop("Incorrect object type ")
  }
  ##### code from signac 
  if (inherits(x = subject, what = "GRanges")){
    gene.key <- subject$gene_name
    names(x = gene.key) <- GRangesToString(grange = subject)
    rownames(x = atac_final) <- as.vector(x = gene.key[rownames(x = atac_final)])
    atac_final <- atac_final[rownames(x = atac_final) != "", ]
  }
  return(atac_final)
}

# Requantify atac peaks to either multiomic peaks or to genes (optimized for a large set of features)
#
# @param o_hits Iranges object of overlapping hits (Should use same assay as assay for requantification)
# @param ATAC chromatin assay or Seurat Object
# @param subject ATAC assay from Bridge or Transcripts dataframe 
# @param assay assay to use in requantifying peaks to genes (original peaks "peak.orig" or requantified peaks "ATAC")
# @param verbose
#
# @return Percentage of Overlap 
#
RequantifyPeaksLarge <- function(
    atac, 
    subject,
    assay = "peak.orig",
    verbose = TRUE){
  # Query peaks that have overlap w/ multiome peaks
  if (inherits(x = atac, what = "ChromatinAssay")){
    o_hits <- findOverlaps(atac, subject[["ATAC"]])
    atac <- GetAssayData(atac, assay = "ATAC", slot = "counts")
    atac_inds <- queryHits(o_hits)
    atac_subset <- atac[atac_inds, ]
    new_names <- rownames(subject[["ATAC"]][subjectHits(o_hits)]) 
    if (verbose){
      message("Requantifying query peaks to match multiome")
    }
  } else if (inherits(x = atac, what = "Seurat")){ 
    o_hits <- suppressWarnings(findOverlaps(atac[[assay]], subject))
    atac_inds <- queryHits(o_hits)
    DefaultAssay(atac) <- assay
    atac_data <- GetAssayData(atac, assay = assay, slot = "counts")
    atac_subset <- atac_data[atac_inds, ]
    new_names <- GRangesToString(subject[subjectHits(o_hits)])
    if (verbose){
      message("Requantifying query peaks to genes")
    }
  } else{
    stop("Incorrect object type ")
  }
  # Reassign query row names
  row.names(atac_subset) <- new_names
  # Merge duplicates
  row.names <- row.names(atac_subset)
  model.matrix <- sparse.model.matrix(
    object = ~ 0 + row.names
  )  
  colnames(x = model.matrix) <- sapply(
    X = colnames(x = model.matrix),
    FUN = function(name) {
      name <- gsub(pattern = "row.names", replacement = "", x = name)
      return(paste0(rev(x = unlist(x = strsplit(x = name, split = ":"))),
                    collapse = "__"
      ))
    }
  )
  # Multiply matrices to combine counts
  atac_final <- as((Matrix::t(model.matrix) %*% atac_subset), "dgCMatrix")
  ##### code from signac 
  if (inherits(x = subject, what = "GRanges")){
    gene.key <- subject$gene_name
    names(x = gene.key) <- GRangesToString(grange = subject)
    rownames(x = atac_final) <- as.vector(x = gene.key[rownames(x = atac_final)])
    atac_final <- atac_final[rownames(x = atac_final) != "", ]
  }
  return(atac_final)
}


#' Get transcripts modified from Signac::GeneActivity
#'
#' @param object A Seurat object
#' @param assay Name of assay to use. If NULL, use the default assay
#' @param features Genes to include. If NULL, use all protein-coding genes in
#' the annotations stored in the object
#' @param extend.upstream Number of bases to extend upstream of the TSS
#' @param extend.downstream Number of bases to extend downstream of the TTS
#' @param biotypes Gene biotypes to include. If NULL, use all biotypes in the
#' gene annotation.
#' @param max.width Maximum allowed gene width for a gene to be quantified.
#' Setting this parameter can avoid quantifying extremely long transcripts that
#' can add a relatively long amount of time. If NULL, do not filter genes based
#' on width.
#' @param process_n Number of regions to load into memory at a time, per thread.
#' Processing more regions at once can be faster but uses more memory.
#' @param gene.id Record gene IDs in output matrix rather than gene name.
#' @param verbose
#'
#' @importFrom SeuratObject DefaultAssay
#' 
#' @return Transcripts 
#'
GetTranscripts <- function( 
  object,
  assay = NULL,
  features = NULL,
  extend.upstream = 2000,
  extend.downstream = 0,
  biotypes = "protein_coding",
  max.width = 500000,
  process_n = 2000,
  gene.id = FALSE,
  verbose = TRUE
) {
  if (!is.null(x = features)) {
    if (length(x = features) == 0) {
      stop("Empty list of features provided")
    }
  }
  # collapse to longest protein coding transcript
  assay <- Signac:::SetIfNull(x = assay, y = DefaultAssay(object = object))
  if (!inherits(x = object[[assay]], what = "ChromatinAssay")) {
    stop("The requested assay is not a ChromatinAssay.")
  }
  annotation <- Annotation(object = object[[assay]])
  # replace NA names with gene IDD
  annotation$gene_name <- ifelse(
    test = is.na(x = annotation$gene_name) | (annotation$gene_name == ""),
    yes = annotation$gene_id,
    no = annotation$gene_name
  )
  if (length(x = annotation) == 0) {
    stop("No gene annotations present in object")
  }
  if (verbose) {
    message("Extracting gene coordinates")
  }
  transcripts <- Signac:::CollapseToLongestTranscript(ranges = annotation)
  if (gene.id) {
    transcripts$gene_name <- transcripts$gene_id
  }
  if (!is.null(x = biotypes)) {
    transcripts <- transcripts[transcripts$gene_biotype %in% biotypes]
    if (length(x = transcripts) == 0) {
      stop("No genes remaining after filtering for requested biotypes")
    }
  }
  
  # filter genes if provided
  if (!is.null(x = features)) {
    transcripts <- transcripts[transcripts$gene_name %in% features]
    if (length(x = transcripts) == 0) {
      stop("None of the requested genes were found in the gene annotation")
    }
  }
  if (!is.null(x = max.width)) {
    transcript.keep <- which(x = width(x = transcripts) < max.width)
    transcripts <- transcripts[transcript.keep]
    if (length(x = transcripts) == 0) {
      stop("No genes remaining after filtering for max.width")
    }
  }
  
  # extend to include promoters
  transcripts <- Extend(
    x = transcripts,
    upstream = extend.upstream,
    downstream = extend.downstream
  )
  
}
satijalab/azimuth documentation built on Nov. 19, 2023, 8:34 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
satijalab/azimuth
A Shiny App Demonstrating a Query-Reference Mapping Algorithm for Single-Cell Data

R/helpers.R
In satijalab/azimuth: A Shiny App Demonstrating a Query-Reference Mapping Algorithm for Single-Cell Data

Documented in ConvertGeneNames GetTranscripts LoadBridgeReference LoadFileInput LoadH5AD LoadH5ADobs LoadReference NNTransform SaveAzimuthReference

R Package Documentation

Browse R Packages

We want your feedback!

satijalab/azimuth A Shiny App Demonstrating a Query-Reference Mapping Algorithm for Single-Cell Data

R/helpers.R In satijalab/azimuth: A Shiny App Demonstrating a Query-Reference Mapping Algorithm for Single-Cell Data

Documented in ConvertGeneNames GetTranscripts LoadBridgeReference LoadFileInput LoadH5AD LoadH5ADobs LoadReference NNTransform SaveAzimuthReference

R Package Documentation

Browse R Packages

We want your feedback!

satijalab/azimuth
A Shiny App Demonstrating a Query-Reference Mapping Algorithm for Single-Cell Data

R/helpers.R
In satijalab/azimuth: A Shiny App Demonstrating a Query-Reference Mapping Algorithm for Single-Cell Data