R/functions.R

Defines functions anndata2cds anndata2seurat .uns2misc .var2feature_metadata .obs2metadata loom2sce sce2loom seurat2sce loom2anndata sce2anndata seurat2anndata .regularise_df

# Functions for converting between different objects


#' Regularise dataframe
#'
#' This function checks if certain columns of a dataframe is of a single value
#' and drop them if required
#'
#' @param df Input data frame, usually cell metadata table (data.frame-like
#'   object)
#' @param drop_single_values Drop columns with only a single value (logical)
#'
#' @return Dataframe
.regularise_df <- function(df, drop_single_values = TRUE) {
  if (ncol(df) == 0) df[["name"]] <- rownames(df)
  if (drop_single_values) {
    k_singular <- sapply(df, function(x) length(unique(x)) == 1)
    if (sum(k_singular) > 0) {
      warning(
        paste("Dropping single category variables:"),
        paste(colnames(df)[k_singular], collapse = ", ")
      )
    }
    df <- df[, !k_singular, drop = F]
    if (ncol(df) == 0) df[["name"]] <- rownames(df)
  }
  return(df)
}

#' Convert Seurat object to AnnData
#'
#' This function converts a Seurat object to an Anndata object
#'
#' @param obj Input Seurat object
#' @param outFile Save output AnnData to this file if specified (str or NULL)
#' @param assay Assay to be converted, default "RNA" (str)
#' @param main_layer Slot in `assay` to be converted to AnnData.X, may be
#'   "counts", "data", "scale.data", default "data" (str)
#' @param transfer_layers If specified, convert slots to AnnData.layers[<slot>],
#'   (vector of str)
#' @param drop_single_values Drop single value columns in cell metadata table,
#'   default TRUE (logical)
#'
#' @return AnnData object
#'
#' @import reticulate
#' @import Matrix
seurat2anndata <- function(obj, outFile = NULL, assay = "RNA", main_layer = "data", transfer_layers = NULL, drop_single_values = TRUE) {
  if (!requireNamespace("Seurat")) {
    stop("This function requires the 'Seurat' package.")
  }
  main_layer <- match.arg(main_layer, c("data", "counts", "scale.data"))
  transfer_layers <- transfer_layers[
    transfer_layers %in% c("data", "counts", "scale.data")
  ]
  transfer_layers <- transfer_layers[transfer_layers != main_layer]

  if (compareVersion(as.character(obj@version), "3.0.0") < 0) {
    obj <- Seurat::UpdateSeuratObject(object = obj)
  }

  X <- Seurat::GetAssayData(object = obj, assay = assay, slot = main_layer)

  obs <- .regularise_df(obj@meta.data, drop_single_values = drop_single_values)

  var <- .regularise_df(Seurat::GetAssay(obj, assay = assay)@meta.features, drop_single_values = drop_single_values)

  obsm <- NULL
  reductions <- names(obj@reductions)
  if (length(reductions) > 0) {
    obsm <- sapply(
      reductions,
      function(name) as.matrix(Seurat::Embeddings(obj, reduction = name)),
      simplify = FALSE
    )
    names(obsm) <- paste0("X_", tolower(names(obj@reductions)))
  }

  layers <- list()
  for (layer in transfer_layers) {
    mat <- Seurat::GetAssayData(object = obj, assay = assay, slot = layer)
    if (all(dim(mat) == dim(X))) layers[[layer]] <- Matrix::t(mat)
  }

  anndata <- reticulate::import("anndata", convert = FALSE)

  adata <- anndata$AnnData(
    X = Matrix::t(X),
    obs = obs,
    var = var,
    obsm = obsm,
    layers = layers
  )

  if (!is.null(outFile)) {
    adata$write(outFile, compression = "gzip")
  }

  adata
}

#' Convert SingleCellExperiment object to AnnData
#'
#' This function converts a SingleCellExperiment object to an Anndata object
#'
#' @param obj Input SingleCellExperiment object
#' @param outFile Save output AnnData to this file if specified (str or NULL)
#' @param main_layer Assay to be converted to AnnData.X, default "counts" (str)
#' @param transfer_layers If specified, convert additional assays to
#'   AnnData.layers[<slot>], (vector of str)
#' @param drop_single_values Drop single value columns in cell metadata table,
#'   default TRUE (logical)
#'
#' @return AnnData object
#'
#' @import reticulate
#' @import Matrix
sce2anndata <- function(obj, outFile = NULL, main_layer = "counts", transfer_layers = NULL, drop_single_values = TRUE) {
  if (!requireNamespace("SummarizedExperiment")) {
    stop("This function requires the 'SummarizedExperiment' package.")
  }
  if (!requireNamespace("SingleCellExperiment")) {
    stop("This function requires the 'SingleCellExperiment' package.")
  }
  assay_names <- SummarizedExperiment::assayNames(obj)
  main_layer <- match.arg(main_layer, assay_names)
  transfer_layers <- transfer_layers[transfer_layers %in% assay_names]
  transfer_layers <- transfer_layers[transfer_layers != main_layer]

  X <- SummarizedExperiment::assay(obj, main_layer)

  obs <- .regularise_df(as.data.frame(SummarizedExperiment::colData(obj)), drop_single_values = drop_single_values)

  var <- .regularise_df(as.data.frame(SummarizedExperiment::rowData(obj)), drop_single_values = drop_single_values)

  obsm <- NULL
  reductions <- SingleCellExperiment::reducedDimNames(obj)
  if (length(reductions) > 0) {
    obsm <- sapply(
      reductions,
      function(name) {
        as.matrix(
          SingleCellExperiment::reducedDim(obj, type = name)
        )
      },
      simplify = FALSE
    )
    names(obsm) <- paste0(
      "X_", tolower(SingleCellExperiment::reducedDimNames(obj))
    )
  }

  layers <- list()
  for (layer in transfer_layers) {
    mat <- SummarizedExperiment::assay(obj, layer)
    if (all(dim(mat) == dim(X))) layers[[layer]] <- Matrix::t(mat)
  }

  anndata <- reticulate::import("anndata", convert = FALSE)

  adata <- anndata$AnnData(
    X = Matrix::t(X),
    obs = obs,
    var = var,
    obsm = obsm,
    layers = layers
  )

  if (!is.null(outFile)) {
    adata$write(outFile, compression = "gzip")
  }

  adata
}

#' Convert Loom object to AnnData
#'
#' This function converts a Loom object to an Anndata object
#'
#' @param inFile Path to the input Loom file on disk (str)
#' @param outFile Save output AnnData to this file if specified (str or NULL)
#' @param main_layer Assay to be converted to AnnData.X, can be "spliced" or
#'   "unspliced", default "spliced" (str)
#' @param obs_names Column in cell metadata table that contains cell IDs (str)
#' @param var_names Column in gene metadata table that contains gene names
#'
#' @return AnnData object
#'
#' @import reticulate
loom2anndata <- function(inFile, outFile = NULL, main_layer = c("spliced", "unspliced"),
                         obs_names = "CellID", var_names = "Gene") {
  main_layer <- match.arg(main_layer)

  anndata <- reticulate::import("anndata", convert = FALSE)

  if (compareVersion(as.character(anndata[["__version__"]]), "0.6.20") < 0) {
    message(paste(
      "Warning: anndata <0.6.20 detected.",
      "Upgrade to handle multi-dimensional embeddings."
    ))
  }

  adata <- anndata$read_loom(
    inFile,
    sparse = TRUE, cleanup = TRUE, X_name = main_layer,
    obs_names = obs_names, var_names = var_names
  )

  anndata$AnnData$obs_names_make_unique(adata)
  anndata$AnnData$var_names_make_unique(adata)

  if (!is.null(outFile)) {
    adata$write(outFile, compression = "gzip")
  }

  adata
}

#' Convert Seurat object to SingleCellExperiment
#'
#' This function converts a Seurat object to a SingleCellExperiment object
#'
#' @param obj Input Seurat object
#' @param outFile Save output SingleCellExperiment object to this file if
#'   specified (str or NULL)
#' @param assay Assay to be converted to AnnData.X, default "RNA" (str)
#'
#' @return AnnData object
seurat2sce <- function(obj, outFile = NULL, main_layer = NULL, assay = "RNA", ...) {
  if (!requireNamespace("Seurat")) {
    stop("This function requires the 'Seurat' package.")
  }
  sce <- Seurat::as.SingleCellExperiment(obj, assay = assay, ...)
  if (!is.null(outFile)) {
    saveRDS(sce, outFile)
  }

  sce
}

#' Convert SingleCellExperiment object to Loom file
#'
#' This function converts a SingleCellExperiment object to a LoomExperiment
#' object
#'
#' @param obj Input SingleCellExperiment object
#' @param outFile Save output Loom to this file if specified (str or NULL)
#' @param drop_single_values Drop single value columns in cell metadata table,
#'   default TRUE (logical)
#'
#' @return LoomExperiment object
sce2loom <- function(obj, outFile, main_layer = NULL, drop_single_values = TRUE, ...) {
  if (!requireNamespace("LoomExperiment")) {
    stop("This function requires the 'LoomExperiment' package.")
  }
  scle <- LoomExperiment::SingleCellLoomExperiment(obj)

  if (!is.null(outFile)) {
    LoomExperiment::export(
      scle, outFile,
      matrix = ifelse(!is.null(main_layer) && main_layer %in% SummarizedExperiment::assayNames(scle), main_layer, SummarizedExperiment::assayNames(scle)[1]),
      colnames_attr = "obs_names", rownames_attr = "var_names"
    )
  }

  scle
}

#' Read a Loom file into a SingleCellExperiment object
#'
#' This function reads a Loom file object into a SingleCellExperiment object
#'
#' @param inFile Path to input Loom file on disk (str)
#' @param outFile Save output SingleCellExperiment object to this file if
#'   specified (str or NULL)
#'
#' @return LoomExperiment object
loom2sce <- function(inFile, outFile = NULL, main_layer = NULL, main_layer_name = NULL, ...) {
  if (!requireNamespace("LoomExperiment")) {
    stop("This function requires the 'LoomExperiment' package.")
  }
  if (!requireNamespace("SingleCellExperiment")) {
    stop("This function requires the 'LoomExperiment' package.")
  }
  scle <- LoomExperiment::import(inFile)
  sce <- as(scle, "SingleCellExperiment")

  if (!is.null(outFile)) {
    saveRDS(sce, outFile)
  }

  sce
}

#' Prepare cell metadata
#'
#' This function prepare cell metadata from AnnData.obs
#'
#' @param obs_pd Input AnnData.obs dataframe
#' @param assay Assay name, default "RNA" (str)
#'
#' @return AnnData object
#'
#' @import reticulate
.obs2metadata <- function(obs_pd, assay = "RNA") {
  obs_df <- .regularise_df(reticulate::py_to_r(obs_pd), drop_single_values = FALSE)
  attr(obs_df, "pandas.index") <- NULL
  colnames(obs_df) <- sub("n_counts", paste0("nCounts_", assay), colnames(obs_df))
  colnames(obs_df) <- sub("n_genes", paste0("nFeaturess_", assay), colnames(obs_df))
  return(obs_df)
}

#' Prepare feature metadata
#'
#' This function prepare feature metadata from AnnData.var
#'
#' @param var_pd Input AnnData.var dataframe
#'
#' @return AnnData object
#'
#' @import reticulate
.var2feature_metadata <- function(var_pd) {
  var_df <- .regularise_df(reticulate::py_to_r(var_pd), drop_single_values = FALSE)
  attr(var_df, "pandas.index") <- NULL
  colnames(var_df) <- sub("dispersions_norm", "mvp.dispersion.scaled", colnames(var_df))
  colnames(var_df) <- sub("dispersions", "mvp.dispersion", colnames(var_df))
  colnames(var_df) <- sub("means", "mvp.mean", colnames(var_df))
  colnames(var_df) <- sub("highly_variable", "highly.variable", colnames(var_df))
  return(var_df)
}

.uns2misc <- function(ad, target_uns_keys = list()) {
  uns_keys <- intersect(target_uns_keys, reticulate::py_to_r(ad$uns_keys()))
  misc <- sapply(uns_keys, function(x) reticulate::py_to_r(ad$uns[x]), simplify = FALSE, USE.NAMES = TRUE)
  return(misc)
}

#' Convert AnnData object to Seurat object
#'
#' This function converts an AnnData object to a Seurat object
#'
#' @param inFile Path to an input AnnData object on disk (str)
#' @param outFile Save output Seurat to this file if specified (str or NULL)
#' @param assay Name of assay in Seurat object to store expression values,
#'   default "RNA" (str)
#' @param main_layer Name of slot in `assay` to store AnnData.X, can be
#'   "counts", "data", "scale.data", default "counts" (str)
#' @param use_seurat Use Seurat::ReadH5AD() to do the conversion, default FALSE (logical)
#' @param lzf Whether AnnData is compressed by `lzf`, default FALSE (logical)
#'
#' @return Seurat object
#'
#' @import reticulate
#' @import Matrix
anndata2seurat <- function(inFile, outFile = NULL, main_layer = "counts", assay = "RNA", use_seurat = FALSE, lzf = FALSE, target_uns_keys = list()) {
  if (!requireNamespace("Seurat")) {
    stop("This function requires the 'Seurat' package.")
  }
  main_layer <- match.arg(main_layer, c("counts", "data", "scale.data"))
  inFile <- path.expand(inFile)

  anndata <- reticulate::import("anndata", convert = FALSE)
  sp <- reticulate::import("scipy.sparse", convert = FALSE)

  if (use_seurat) {
    if (lzf) {
      tmpFile <- paste0(tools::file_path_sans_ext(inFile), ".decompressed.h5ad")
      ad <- anndata$read_h5ad(inFile)
      ad$write(tmpFile)
      tryCatch(
        {
          srt <- Seurat::ReadH5AD(tmpFile)
        },
        finally = {
          file.remove(tmpFile)
        }
      )
    } else {
      srt <- Seurat::ReadH5AD(inFile)
    }
  } else {
    ad <- anndata$read_h5ad(inFile)

    obs_df <- .obs2metadata(ad$obs)
    var_df <- .var2feature_metadata(ad$var)

    if (reticulate::py_to_r(sp$issparse(ad$X))) {
      X <- Matrix::t(reticulate::py_to_r(sp$csc_matrix(ad$X)))
    } else {
      X <- t(reticulate::py_to_r(ad$X))
    }
    colnames(X) <- rownames(obs_df)
    rownames(X) <- rownames(var_df)

    if (!is.null(reticulate::py_to_r(ad$raw))) {
      raw_var_df <- .var2feature_metadata(ad$raw$var)
      raw_X <- Matrix::t(reticulate::py_to_r(sp$csc_matrix(ad$raw$X)))
      colnames(raw_X) <- rownames(obs_df)
      rownames(raw_X) <- rownames(raw_var_df)
    } else {
      raw_var_df <- NULL
      raw_X <- NULL
    }

    if (main_layer == "scale.data" && !is.null(raw_X)) {
      assays <- list(Seurat::CreateAssayObject(data = raw_X))
      assays[[1]] <- Seurat::SetAssayData(assays[[1]], slot = "scale.data", new.data = X)
      message("X -> scale.data; raw.X -> data")
    } else if (main_layer == "data" && !is.null(raw_X)) {
      if (nrow(X) != nrow(raw_X)) {
        message("Raw layer was found with different number of genes than main layer, resizing X and raw.X to match dimensions")
        raw_X <- raw_X[rownames(raw_X) %in% rownames(X), , drop = F]
        X <- X[rownames(raw_X), , drop = F]
      }
      assays <- list(Seurat::CreateAssayObject(counts = raw_X))
      assays[[1]] <- Seurat::SetAssayData(assays[[1]], slot = "data", new.data = X)
      message("X -> data; raw.X -> counts")
    } else if (main_layer == "counts") {
      assays <- list(Seurat::CreateAssayObject(counts = X))
      message("X -> counts")
    } else {
      assays <- list(Seurat::CreateAssayObject(data = X))
      message("X -> data")
    }
    names(assays) <- assay
    Seurat::Key(assays[[assay]]) <- paste0(tolower(assay), "_")

    if (main_layer == "scale.data" && !is.null(raw_X)) {
      assays[[assay]]@meta.features <- raw_var_df
    } else {
      assays[[assay]]@meta.features <- var_df
    }

    project_name <- sub("\\.h5ad$", "", basename(inFile))
    srt <- new("Seurat", assays = assays, project.name = project_name, version = packageVersion("Seurat"))
    Seurat::DefaultAssay(srt) <- assay
    Seurat::Idents(srt) <- project_name

    srt@meta.data <- obs_df
    embed_names <- unlist(reticulate::py_to_r(ad$obsm_keys()))
    if (length(embed_names) > 0) {
      embeddings <- sapply(embed_names, function(x) as.matrix(reticulate::py_to_r(ad$obsm[x])), simplify = FALSE, USE.NAMES = TRUE)
      names(embeddings) <- embed_names
      for (name in embed_names) {
        rownames(embeddings[[name]]) <- colnames(assays[[assay]])
      }

      dim.reducs <- vector(mode = "list", length = length(embeddings))
      for (i in seq(length(embeddings))) {
        name <- embed_names[i]
        embed <- embeddings[[name]]
        key <- switch(name,
          sub("_(.*)", "\\L\\1", sub("^X_", "", toupper(name)), perl = T),
          "X_pca" = "PC",
          "X_tsne" = "tSNE",
          "X_umap" = "UMAP"
        )
        colnames(embed) <- paste0(key, "_", seq(ncol(embed)))
        dim.reducs[[i]] <- Seurat::CreateDimReducObject(
          embeddings = embed,
          loadings = new("matrix"),
          assay = assay,
          stdev = numeric(0L),
          key = paste0(key, "_")
        )
      }
      names(dim.reducs) <- sub("X_", "", embed_names)

      for (name in names(dim.reducs)) {
        srt[[name]] <- dim.reducs[[name]]
      }
    }
  }

  srt@misc <- .uns2misc(ad, target_uns_keys = target_uns_keys)

  if (!is.null(outFile)) saveRDS(object = srt, file = outFile)

  srt
}

#' Convert AnnData object to monocle3 CDS object
#'
#' This function converts an AnnData object to a monocle3 CDS object
#'
#' @param inFile Path to an input AnnData object on disk (str)
#' @param outFile Save output CDS to this file if specified (str or NULL)
#' @param main_layer Name of the slot in AnnData to be converted, can be "X",
#'   "raw", default "X" (str)
#' @param pcaName Key of PCA in AnnData.obsm to be transferred (str or NULL)
#' @param umapName Key of UMAP in AnnData.obsm to be transferred (str or NULL)
#'
#' @return CDS object
#'
#' @import reticulate
#' @import Matrix
anndata2cds <- function(inFile, outFile = NULL, main_layer = "X", pcaName = "X_pca", umapName = "X_umap") {
  builtins <- reticulate::import_builtins(convert = FALSE)
  anndata <- reticulate::import("anndata", convert = FALSE)
  sp <- reticulate::import("scipy.sparse", convert = FALSE)

  ad <- anndata$read_h5ad(inFile)
  obs_df <- .obs2metadata(ad$obs)

  if (is.null(main_layer)) main_layer <- "X"

  if ((main_layer == "raw") && (!is.null(reticulate::py_to_r(ad$raw)))) {
    var_df <- .var2feature_metadata(ad$raw$var)
    var_df$gene_short_name <- rownames(var_df)
  } else {
    var_df <- .var2feature_metadata(ad$var)
    var_df$gene_short_name <- rownames(var_df)
  }

  if (main_layer == "raw") {
    count_x <- tryCatch(
      {
        ss <- reticulate::import("scanpy_scripts", convert = FALSE)
        ss$lib$lognorm_to_counts(ad$raw$X)
      },
      error = function(e) {
        return(tryCatch(
          {
            ad$raw$X
          },
          error = function(ee) {
            return(ad$X)
          },
          warning = function(ww) {}
        ))
      },
      warning = function(w) {}
    )
  } else {
    count_x <- ad$X
  }
  X <- Matrix::t(reticulate::py_to_r(sp$csc_matrix(count_x)))
  colnames(X) <- rownames(obs_df)
  rownames(X) <- rownames(var_df)

  suppressPackageStartupMessages(library(SingleCellExperiment))
  cds1 <- monocle3::new_cell_data_set(expression_data = X, cell_metadata = obs_df, gene_metadata = var_df)

  embed_names <- reticulate::py_to_r(builtins$list(ad$obsm$keys()))
  if ((!is.null(pcaName) && pcaName %in% embed_names) || (!is.null(umapName) && umapName %in% embed_names)) {
    embeds <- SimpleList()
    if (!is.null(pcaName) && pcaName %in% embed_names) {
      pcs <- reticulate::py_to_r(ad$obsm[pcaName])
      embeds$PCA <- pcs
    }
    if (!is.null(umapName) && umapName %in% embed_names) {
      umap <- reticulate::py_to_r(ad$obsm[umapName])
      embeds$UMAP <- umap
    }
    SingleCellExperiment::reducedDims(cds1) <- embeds
  }

  if (!is.null(outFile)) saveRDS(object = cds1, file = outFile)

  cds1
}
cellgeni/sceasy documentation built on Feb. 29, 2024, 7:53 a.m.