R/ExportSeurat.R

Defines functions ExportSeuratObject ExportSeurat

Documented in ExportSeurat ExportSeuratObject

#' @title Export a Seurat object in BCS format
#' @param object a Seurat object
#' @param bcs.path path to BCS file
#' @param unique.limit ignore a metadata if it has number of unique labels larger than this number. Default is 100.
#' @param batch.name name of the metadata of batches. This is for specialized functions in BBrowser
#'                        that requires batch's information.
#' @param compression.level an integer ranging from 1 to 9. Higher level creates smaller files but takes more time to create and load. Default is 1.
#' @param author email of the creator. Default is rBCS.
#' @param overwrite if TRUE, overwrite existing file at bcs.path. Default is FALSE.
#' @param raw.rna name of the assay that has mRNA raw count. Default is `RNA`.
#' @param norm.rna name of the assay that has mRNA normalized count. Default is `RNA`.
#' @param raw.adt name of the assay that has ADT raw count. Default is `ADT`.
#' @param norm.adt name of the assay that has ADT normalized count. Default is `ADT`.
#' @param sort if TRUE, metadata with be sorted based on cell count (descending order). Default is FALSE
#' @import Matrix
#' @importFrom zip zip
#' @export
ExportSeurat <- function(
  object,
  bcs.path,
  unique.limit = 100,
  sort = FALSE,
  batch.name = "bioturing_batch",
  compression.level = 1,
  author = "rBCS",
  raw.rna = "RNA",
  norm.rna = "RNA",
  raw.adt = "ADT",
  norm.adt = "ADT",
  overwrite = FALSE
) {
  GetSparseMatrix <- function(x) {
    if (class(x)[1] == "dgCMatrix") {
      return(x)
    }
    return(as(x, "sparseMatrix"))
  }

  IsNullMatrix <- function(x) {
    return(is.null(x) || nrow(x) == 0 || ncol(x) == 0)
  }

  # assays: list of Assay class in Seurat
  # keys: a vector of keys sort by priority (descending)
  GetData <- function(assays, assay.name, keys) {
    data <- attr(assays[[assay.name]], keys[1])
    if (IsNullMatrix(data) && length(keys) > 1) {
      warning(paste("Assay", assay.name, "has no", keys[1]))
      data <- GetData(assays, assay.name, keys[-1])
    }
    if (!IsNullMatrix(data)) {
      data <- GetSparseMatrix(data)
    }
    return(data)
  }

  ValidateInput <- function() {
    if (length(object@reductions) == 0) {
      stop("Seurat object has no dimensionality reduction")
    } else if (length(object@reductions) == 1 && names(object@reductions) == "pca") {
      stop("Seurat object has no dimensionality reduction other than pca")
    }
    assays.list <- c(raw.rna, norm.rna)
    is.missing <- !(assays.list %in% names(object@assays))
    if (any(is.missing)) {
      stop("Found no assay with such names: ",
          paste(unique(assays.list[is.missing]), collapse=", "))
    }
    if (file.exists(bcs.path)) {
      if (overwrite) {
        warning(paste(basename(bcs.path), "will be replaced"))
        file.remove(bcs.path)
      } else {
        stop(paste(basename(bcs.path), "already exists. Please use overwrite=TRUE to force replacing."))
      }
    }
  }

  GetExpressionData <- function(object) {
    omics <- names(object@assays)
    counts <- GetData(object@assays, raw.rna, c("counts", "data"))
    norms <- GetData(object@assays, norm.rna, "data")
    feature.type <- rep("RNA", nrow(norms))
    feature.name <- rownames(norms)
    if (all(c(raw.adt, norm.adt) %in% omics)) {
      counts <- Matrix::rbind2(counts, GetData(object@assays, raw.adt, c("counts", "data")))
      norms <- Matrix::rbind2(norms, GetData(object@assays, norm.adt, "data"))
      feature.type <- c(feature.type, rep("ADT", nrow(norms) - length(feature.type)))
      feature.name <- rownames(norms)
      feature.name[feature.type == "ADT"] <- paste("ADT", feature.name[feature.type == "ADT"], sep="-")
    }
    rownames(norms) <- feature.name
    rownames(counts) <- feature.name
    return(list(counts=counts, norms=norms, feature_type=feature.type, feature.name=feature.name))
  }

  GetDimredData <- function(object) {
    # Correct method
    correct.method <- "none"
    if ("integrated" %in% names(object@assays)) {
      correct.method <- "cca"
    } else if ("harmony" %in% names(object@reductions)) {
      correct.method <- "harmony"
    } else if ("mnn" %in% names(object@reductions)) {
      correct.method <- "mnn"
    }

    # Dimred
    data <- lapply(object@reductions, function(dimred) {
      info <- list(
        param = list(omics=dimred@assay.used, correction=correct.method),
        coords = as.matrix(dimred@cell.embeddings),
        type = "dimred"
      )
      return(info)
    })
    names(data) <- names(object@reductions)

    # Typical lower dimred data
    for (i in which(names(data) %in% c("pca", "mnn", "harmony"))) {
      data[[i]]$type <- "low_dimred"
    }

    # CCA
    if ("integrated" %in% names(object@assays)) {
      data$cca <- list(
        coords = object@assays$integrated@data,
        type = "low_dimred"
      )
    }

    return(data)
  }

  GetSpatialData <- function(object) {
    images <- attr(object, "images")
    if (is.null(images) || length(images) == 0) {
      return(list())
    }
    spatial.data <- lapply(seq_along(images), function(i) {
      slide.name <- names(images)[i]
      image.data <- images[[i]]
      tissue_positions <- image.data@coordinates
      tissue_positions <- cbind(tissue_positions[ , 4:5])
      colnames(tissue_positions) <- c("y", "x")
      tissue_positions$y <- -tissue_positions$y
      tissue_positions <- tissue_positions[, c("x", "y")]

      image.dim <- dim(image.data@image)
      factors <- image.data@scale.factors
      n.cell <- nrow(tissue_positions)
      spatial_info <- list(
        image = image.data@image,
        image_name = slide.name,
        tissue_positions = tissue_positions,
        spatial_info = list(
          width = image.dim[2] / factors$lowres,
          height = image.dim[1] / factors$lowres,
          diameter = as.list(rep(factors$fiducial / 1.618, n.cell)), # Visium only
          diameter_micron = as.list(rep(55, n.cell)),
          version = 1
        )
      )
      return(spatial_info)
    })
    names(spatial.data) <- names(images)
    return(spatial.data)
  }

  GetMetadata <- function(object) {
    md <- object@meta.data
    if (!batch.name %in% names(md)) {
      return(md)
    }

    md$bioturing_batch <- md[[batch.name]]
    if (batch.name != "bioturing_batch") {
      md[[batch.name]] <- NULL
    }

    # Sort metadata
    if (sort) {
      for (name in colnames(md)) {
        if (is(md[[name]], "character") || is(md[[name]], "factor")) {
          md[[name]] <- SortFactor(md[[name]])
        }
      }
    }

    return(md)
  }

  ValidateInput()

  Meow("Initializing...")
  hash <- uuid::UUIDgenerate()
  old.wd <- getwd()
  tmp.wd <- dirname(bcs.path)
  dir.create(tmp.wd, recursive=TRUE, showWarnings=FALSE)
  setwd(tmp.wd) # easier for zipping
  on.exit(setwd(old.wd))
  dir.create(file.path(hash, "main"), recursive=TRUE)

  Meow("Extracting expressions...")
  expr.data <- GetExpressionData(object)

  Meow("Extracting metadata...")
  meta.data <- GetMetadata(object)

  Meow("Extracting dimred...")
  dimred.data <- GetDimredData(object)
  spatial.data <- GetSpatialData(object)

  Meow("Writing data...")
  WriteStudy(expr.data, meta.data, dimred.data, spatial.data, hash,
      author, unique.limit)

  Meow("Compressing data...")
  zip::zip(basename(bcs.path), hash, compression_level=compression.level)
  unlink(hash, recursive=TRUE, force=TRUE)
  return(TRUE)
}

#' @title An old function of `ExportSeurat`
#' @description Backward-compatibility purposes only
#' @export
ExportSeuratObject <- function(...) {
  lifecycle::deprecate_soft("0.1.0", "ExportSeuratObject()", "ExportSeurat()")
  return(ExportSeurat(...))
}
bioturing/rBCS documentation built on May 18, 2022, 5:26 p.m.