R/io.R

Defines functions export.sparse.format import.ace.from.legacy preprocessDF convert.ace.rownames import.ace.from.CDS import.ace.from.loom import.ace.from.Seurat import.ace.from.table import.ace.from.counts import.ace.from.10X.h5 import.ace.from.10X import.ace.from.10X.generic

Documented in convert.ace.rownames import.ace.from.10X import.ace.from.10X.generic import.ace.from.10X.h5 import.ace.from.CDS import.ace.from.counts import.ace.from.loom import.ace.from.Seurat import.ace.from.table

#' Imports data from a 10X experiment folder and constructs an `SingleCellExeriment` object
#'
#' @param input_path Folder containing input files.
#' @param mtx_file Count file in Matrix Market format (default='matrix.mtx').
#' @param feature_metadata Table of the same size as number of rows in the count matrix (default='features.tsv').
#' @param sample_metadata Table of the same size as number of columns in the count matrix (default='barcodes.tsv').
#' @param sep Column-separator used in the row/column annotations files (default='\\t').
#' @param prefilter Whether to prefilter rows/columns of input counts matrix. Must specify filtering parameters to pass to filter.ace().
#'
#' @return `SingleCellExeriment` object
#'
#' @examples
#' ace = import.ace.from.10X.generic(input_path, prefilter=TRUE, min_feats_per_cell = 500)
#' @export
import.ace.from.10X.generic <- function(
  input_path,
  mtx_file = "matrix.mtx.gz",
  feature_metadata = "features.tsv.gz",
  sample_metadata = "barcodes.tsv.gz",
  sep = "\t",
  use.names = TRUE,
  prefilter = FALSE,
  ...
) {

    count.file = paste(input_path, mtx_file, sep = "/")
    if (!file.exists(count.file)) {
        err = sprintf("File %s not found. Consider changing `mtx_file` or `input_path` options.",
            count.file)
        stop(err)
    }

    feature.file = paste(input_path, feature_metadata, sep = "/")
    if (!file.exists(feature.file)) {
        err = sprintf("File %s not found. Consider changing `mtx_file` or `input_path` options.",
            feature.file)
        stop(err)
    }

    barcode.file = paste(input_path, sample_metadata, sep = "/")
    if (!file.exists(barcode.file)) {
        err = sprintf("File %s not found. Consider changing `mtx_file` or `input_path` options.",
            barcode.file)
        stop(err)
    }

    message(sprintf("Reading counts ...\n"))
    counts.mat = Matrix::readMM(count.file)

    feature_table = read.table(feature.file, header = F, sep = sep, as.is = TRUE)
    if (nrow(feature_table) == (nrow(counts.mat) + 1)) {
        rowAnnot = S4Vectors::DataFrame(feature_table[-1, ])
        colnames(rowAnnot) = feature_table[1, ]
    } else {
        rowAnnot = S4Vectors::DataFrame(feature_table)
    }
    if (ncol(rowAnnot) == 1) {
        colnames(rowAnnot) = "Gene"
    } else if (ncol(rowAnnot) == 2) {
        colnames(rowAnnot) = c("ENSEMBL", "Gene")
    } else if (ncol(rowAnnot) == 3) {
        colnames(rowAnnot) = c("ENSEMBL", "Gene", "Feature")
    }

    sample_metadata = utils::read.table(barcode.file, header = F, sep = sep, as.is = TRUE)
    if (ncol(sample_metadata) == (ncol(counts.mat) + 1)) {
        colAnnot = S4Vectors::DataFrame(sample_metadata[-1, ])
        colnames(colAnnot) = sample_metadata[1, ]
    } else {
        colAnnot = S4Vectors::DataFrame(sample_metadata)
        if(NCOL(colAnnot) == 1)
          colnames(colAnnot) = "Barcode"
    }

    # Feature-barcoding
    if (ncol(rowAnnot) > 2) {
        IDX = split(1:nrow(rowAnnot), rowAnnot[, 3])
        expression.counts.mat = counts.mat[IDX$`Gene Expression`, ]
        gene.table = rowAnnot[IDX$`Gene Expression`, 1:2]
        IDX = IDX[!grepl("Gene Expression", names(IDX))]
    } else {
        expression.counts.mat = counts.mat
        gene.table = rowAnnot
        IDX = list()
    }

    if (use.names == TRUE && (ncol(rowAnnot) >= 2)) {
        rownames(expression.counts.mat) = gene.table[, 2]
    } else {
        rownames(expression.counts.mat) = gene.table[, 1]
    }

    colnames(expression.counts.mat) = colAnnot[, 1]

    ace <- ACTIONetExperiment(
      assays = list(counts = expression.counts.mat),
      colData = colAnnot,
      rowData = rowAnnot
    )

    # Load additional barcoded features
    for (feature.name in names(IDX)) {
        feature.counts.mat = counts.mat[IDX[[feature.name]], ]
        row.annotations = rowAnnot[IDX[[feature.name]], ]
        rownames(feature.counts.mat) = rowAnnot[IDX[[feature.name]], 1]
        colnames(feature.counts.mat) = colAnnot[, 1]
        colMaps(ace)[[feature.name]] = Matrix::t(feature.counts.mat)
    }

    if (prefilter) {
        ace = filter.ace(ace, assay_name = "counts", return_fil_ace = TRUE, ...)
    }

    return(ace)
}


#' A simple wrapper to import data from a 10X experiment folder (v2 or v3) and constructs an `SingleCellExeriment` object
#'
#' @param input_path Folder containing input files
#' @param version 2 or 3
#' @param prefilter Whether to prefilter genes/cells based on the counts.mat
#' @param min.cell.frac.per.gene Minimum fraction of cells capturing a gene for it to be retained, if prefilter=TRUE (default=0.005)
#' @param min.genes.per.cell Minimum number of required captured genes per cell, if prefilter=TRUE (default=500)
#'
#' @return `SingleCellExeriment` object
#'
#' @examples
#' ace = import.ace.from.10X(input_path, prefilter=TRUE, min_feats_per_cell = 500)
#' @export
import.ace.from.10X <- function(
  input_path,
  version = 3,
  prefilter = FALSE,
  ...
) {

    if (file.exists(paste(input_path, "genes.tsv", sep = "/"))) {
        version = 2
    }

    if ((2 <= version) & (version < 3)) {
        mtx_file = "matrix.mtx.gz"
        feature_metadata = "genes.tsv.gz"
        sample_metadata = "barcodes.tsv.gz"
    } else if ((3 <= version) & (version < 4)) {
        mtx_file = "matrix.mtx.gz"
        feature_metadata = "features.tsv.gz"
        sample_metadata = "barcodes.tsv.gz"
    } else {
        message("Unknown version")
    }

    ace = import.ace.from.10X.generic(
      input_path = input_path,
      mtx_file = mtx_file,
      feature_metadata = feature_metadata,
      sample_metadata = sample_metadata,
      prefilter = prefilter,
      ...
    )

    return(ace)
}


#' A simple wrapper to import data from a 10X experiment folder (v2 or v3) and constructs an `SingleCellExeriment` object
#'
#' @param fname Input HDF5 file
#' @param version 2 or 3
#' @param genome genome to import. Default is to import the first (if many)
#' @param prefilter Whether to prefilter genes/cells based on the counts.mat
#' @param min.cell.frac.per.gene Minimum fraction of cells capturing a gene for it to be retained, if prefilter=TRUE (default=0.005)
#' @param min.genes.per.cell Minimum number of required captured genes per cell, if prefilter=TRUE (default=500)
#'
#' @return `SingleCellExeriment` object
#'
#' @examples
#' ace = import.ace.from.10X.h5(fname = fname, prefilter=TRUE)
#' @export
import.ace.from.10X.h5 <- function(
  fname,
  version = 3,
  genome = NULL,
  use.names = TRUE,
  prefilter = FALSE,
  ...
) {

    if (!requireNamespace("hdf5r", quietly = TRUE)) {
        stop("Please install hdf5r to read HDF5 files")
    }
    if (!file.exists(fname)) {
        stop("File not found")
    }

    h5file <- hdf5r::H5File$new(filename = fname, mode = "r")

    if (is.null(genome))
        genome <- names(x = h5file)[[1]]

    if (h5file$attr_exists("PYTABLES_FORMAT_VERSION")) {
        version = 2
    }

    if ((version <= 3) & (version < 4)) {
        if (use.names) {
            feature_slot <- "features/name"
        } else {
            feature_slot <- "features/id"
        }
    } else if ((version <= 2) & (version < 3)) {
        if (use.names) {
            feature_slot <- "gene_names"
        } else {
            feature_slot <- "genes"
        }
    }
    counts.mat <- h5file[[paste0(genome, "/data")]]
    indices <- h5file[[paste0(genome, "/indices")]]
    indptr <- h5file[[paste0(genome, "/indptr")]]
    shp <- h5file[[paste0(genome, "/shape")]]
    features <- h5file[[paste0(genome, "/", feature_slot)]][]
    barcodes <- h5file[[paste0(genome, "/barcodes")]]

    sparse.mat <- Matrix::sparseMatrix(
      i = indices[] + 1,
      p = indptr[],
      x = as.numeric(x = counts.mat[]),
      dims = shp[],
      giveCsparse = FALSE
    )

    if (length(unique(features)) < length(features)) {
        features <- make.names(names = features, unique = T)
    }
    rownames(x = sparse.mat) <- features
    colnames(x = sparse.mat) <- barcodes[]
    sparse.mat <- as(object = sparse.mat, Class = "dMatrix")

    if (h5file$exists(name = paste0(genome, "/features"))) {
        types <- h5file[[paste0(genome, "/features/feature_type")]][]
        types.unique <- unique(x = types)
        if (length(x = types.unique) > 1) {
            message("Genome ", genome, " has multiple modalities, returning a list of matrices for this genome")
            mats <- sapply(X = types.unique, FUN = function(x) {
                return(sparse.mat[which(x = types == x), ])
            }, simplify = FALSE, USE.NAMES = TRUE)
        } else {
            mats = list(sparse.mat)
        }
    } else {
        mats = list(sparse.mat)
    }

    h5file$close_all()

    ace <- ACTIONetExperiment(assays = list(counts = mats[[1]]))

    # Load additional barcoded features
    for (feature.name in names(mats)[-1]) {
        colMaps(ace)[[feature.name]] = Matrix::t(mats[[feature.name]])
    }

    if (prefilter) {
        ace = filter.ace(ace, assay_name = "counts", return_fil_ace = TRUE, ...)
    }

    return(ace)
}

#' Constructs an `SingleCellExeriment` object from count matrix, gene names, and sample_metadata
#'
#' @param counts_mat Matrix of counts.mat
#' @param feature_metadata Vector or data frame  of annotations for features (rows of the count matrix)
#' @param sample_metadata Vector or data frame of annotations for samples (columns of the count matrix)
#' @param prefilter Whether to prefilter genes/cells based on the counts.mat
#' @param min.cell.frac.per.gene Minimum fraction of cells capturing a gene for it to be retained, if prefilter=TRUE (default=0.005)
#' @param min.genes.per.cell Minimum number of required captured genes per cell, if prefilter=TRUE (default=500)
#'
#' @return `SingleCellExeriment` object
#'
#' @examples
#' ace = import.ace.from.count.matrix(counts.mat.mat, gene_names, prefilter=TRUE)
#' @export
import.ace.from.counts <- function(
  counts_mat,
  feature_metadata = NULL,
  sample_metadata = NULL,
  prefilter = FALSE,
  ...
) {

    if (is.null(feature_metadata)) {
        feature_metadata = .default_rowData(NROW(counts_mat))
    }

    if (is.null(sample_metadata)) {
        sample_metadata = .default_colData(NCOL(counts_mat))
    }

    ace <- ACTIONetExperiment(
      assays = list(counts = counts_mat),
      rowData = feature_metadata,
      colData = sample_metadata
    )

    if (prefilter) {
        ace = filter.ace(ace, assay_name = "counts", return_fil_ace = TRUE, ...)
    }

    return(ace)
}

#' Constructs an `SingleCellExeriment` object from a full count matrix file
#'
#' @param fname Full path to the count matrix file
#' @param sep Column-separator used in count matrix file (default='\\t')
#' @param prefilter Whether to prefilter genes/cells based on the counts.mat
#' @param min.cell.frac.per.gene Minimum fraction of cells capturing a gene for it to be retained, if prefilter=TRUE (default=0.005)
#' @param min.genes.per.cell Minimum number of required captured genes per cell, if prefilter=TRUE (default=500)
#'
#' @return `SingleCellExeriment` object
#'
#' @examples
#' ace = import.ace.from.table(file_name, prefilter=TRUE)
#' @export
import.ace.from.table <- function(
  fname,
  sep = "\t",
  prefilter = FALSE,
  ...) {

    counts.mat = utils::read.table(fname, header = TRUE, sep = sep, as.is = TRUE)

    if (!is.numeric(counts.mat[1, 1])) {
        row.names = counts.mat[, 1]
        counts.mat = counts.mat[, -1]
        rownames(counts.mat) = row.names
    }

    counts.mat = as(as.matrix(counts.mat), "sparseMatrix")

    ace <- ACTIONetExperiment(assays = list(counts = counts.mat))

    if (prefilter) {
        ace = filter.ace(ace, assay_name = "counts", return_fil_ace = TRUE, ...)
    }

    return(ace)
}


#' Constructs an `SingleCellExeriment` object from a Seurat object
#' Please refer to: https://satijalab.org/seurat/v3.0/conversion_vignette.html
#'
#' @param Seurat.obj Seurat object
#'
#' @return `SingleCellExeriment` object
#'
#' @examples
#' ace = import.ace.from.Seurat(file_name)
#' @export
import.ace.from.Seurat <- function(Seurat.obj) {

    if (!requireNamespace("Seurat", quietly = TRUE)) {
        stop("Please install Seurat to read Seurat objects")
    }

    Seurat.obj = Seurat::UpdateSeuratObject(Seurat.obj)
    sce = Seurat::as.SingleCellExperiment(Seurat.obj)
    ace <- as(sce, "ACTIONetExperiment")

    return(ace)
}


#' Constructs an `SingleCellExeriment` object from AnnData
#' This function depends on sceasy (https://github.com/cellgeni/sceasy) and
#' LoomExperiment(BioC) for file conversion.
#'
#' @param fname Path to the AnnData file
#'
#' @return `SingleCellExeriment` object
#'
#' @examples
#' ace = import.ace.from.loom(file_name)
#' @export
import.ace.from.loom <- function(fname, ...) {

    .check_and_load_package("sceasy")

    SE = SummarizedExperiment::makeSummarizedExperimentFromLoom(fname, ...)
    ace = as(SE, "ACTIONetExperiment")

    return(ace)
}


#' Constructs an `SingleCellExeriment` object from CDS format in Monocle
#'
#' @param monocle_cds CDS object
#'
#' @return `SingleCellExeriment` object
#'
#' @examples
#' ace = import.ace.from.CDS(monocle_cds)
#' @export
import.ace.from.CDS <- function(monocle_cds) {

    .check_and_load_package("monocle3")

    counts.mat = exprs(monocle_cds)
    gene_annotations = fData(monocle_cds)
    sample_metadata = pData(monocle_cds)

    ace <- ACTIONetExperiment(
      assays = list(counts = counts.mat),
      colData = sample_metadata,
      rowData = gene_annotations
    )

    # This one is NOT stable. SORRY!  sce = monocle::exportCDS(monocle_cds = CDS,
    # export_to = 'Scater', export_all = T)

    return(ace)
}


#' Translates rownames() of the `ace` object for mouse and human datasets.
#' A typical use-case is when input ids are in ENSEMBL format, but user is interested to work with gene symbols.
#'
#' @param ace Input `ace` object
#' @param from Source annotation (default='ENSEMBL')
#' @param to Target annotation (default='SYMBOL')
#' @param species Either 'mouse' or 'human' (default='human')
#'
#' @return `SingleCellExeriment` object with renamed rows
#'
#' @examples
#' ace = import.ace.from.CDS(monocle_cds)
#' @export
convert.ace.rownames <- function(
  ace,
  from = "ENSEMBL",
  to = "SYMBOL",
  species = "human"
) {

    if (species == "human") {
        library(org.Hs.eg.db)
        suppressWarnings(ids <- AnnotationDbi::mapIds(
          org.Hs.eg.db,
          keys = row.names(ace),
          keytype = from,
          column = to,
          multiVals = "first")
        )
        ids[is.na(ids)] = ""

        rownames(ace) = ids
    } else if (species == "mouse") {
        library(org.Mm.eg.db)
        suppressWarnings(ids <- AnnotationDbi::mapIds(
          org.Mm.eg.db,
          keys = row.names(ace),
          keytype = from,
          column = to,
          multiVals = "first")
        )
        ids[is.na(ids)] = ""

        rownames(ace) = ids
    }

    return(ace)
}


preprocessDF <- function(
  df,
  drop_single_values = TRUE
) {

    if (ncol(df) > 0) {
        nn = colnames(df)
        for (n in nn) {
            x = df[, n]
            if (length(unique(x)) < 50) {
                x = factor(x, sort(unique(x)))
                df[, n] = x
            }
        }
    }

    if (ncol(df) == 0)
        df[["name"]] <- rownames(df)

    if (drop_single_values) {
        k_singular <- sapply(df, function(x) length(unique(x)) == 1)
        if (sum(k_singular) > 0)
            warning(paste("Dropping single category variables:"), paste(colnames(df)[k_singular],
                collapse = ", "))
        df <- df[, !k_singular, drop = F]
        if (ncol(df) == 0)
            df[["name"]] <- rownames(df)
    }

    return(df)
}

import.ace.from.legacy <- function(
  ACTIONet.out,
  sce,
  full_import = TRUE,
  return_all = FALSE
) {

    ace = as(sce, "ACTIONetExperiment")

    if ("S_r" %in% names(colMaps(ace))) {
        colMaps(ace)[["ACTION"]] = colMaps(ace)[["S_r"]]
    }

    ACTION.out = ACTIONet.out$ACTION.out
    pruning.out = ACTIONet.out$reconstruct.out
    G = ACTIONet.out$build.out$ACTIONet

    colNets(ace)$ACTIONet = G
    vis.out = ACTIONet.out$vis.out

    colMaps(ace)$ACTIONet2D = vis.out$coordinates
    colMaps(ace)$ACTIONet3D = vis.out$coordinates_3D
    colMaps(ace)$denovo_color = vis.out$colors

    if (full_import == TRUE) {
        colMaps(ace)[["H_stacked"]] = as(Matrix::t(ACTIONet.out$reconstruct.out$H_stacked),
            "sparseMatrix")
        colMaps(ace)[["C_stacked"]] = as(ACTIONet.out$reconstruct.out$C_stacked,
            "sparseMatrix")
    }

    unification.out = ACTIONet.out$unification.out
    colMaps(ace)[["H_unified"]] = as(Matrix::t(ACTIONet.out$unification.out$H.core),
        "sparseMatrix")
    colMaps(ace)[["C_unified"]] = as(ACTIONet.out$unification.out$C.core, "sparseMatrix")

    ace$assigned_archetype = ACTIONet.out$unification.out$assignments.core
    ace$node_centrality = compute_archetype_core_centrality(colNets(ace)$ACTIONet,
        sample_assignments = ace$assigned_archetype)

    specificity.out = ACTIONet.out$unification.out$DE.core
    if (is.list(specificity.out)) {
        rowMaps(ace)[["unified_feature_profile"]] = specificity.out[["archetypes"]]
        rowMapTypes(ace)[["unified_feature_profile"]] = "internal"

        rowMaps(ace)[["unified_feature_specificity"]] = specificity.out[["upper_significance"]]
        rowMapTypes(ace)[["unified_feature_specificity"]] = "reduction"
    } else {
        rowMaps(ace)[["unified_feature_profile"]] = assays(specificity.out)[["profile"]]
        rowMapTypes(ace)[["unified_feature_profile"]] = "internal"
        rowMaps(ace)[["unified_feature_specificity"]] = assays(specificity.out)[["significance"]]
        rowMapTypes(ace)[["unified_feature_specificity"]] = "reduction"
    }

    # Prepare output
    if (return_all == TRUE) {
        trace = list(
          ACTION.out = ACTION.out,
          pruning.out = pruning.out,
          vis.out = vis.out,
          unification.out = unification.out
        )

        trace$log = list(
          genes = rownames(ace),
          cells = colnames(ace),
          time = Sys.time()
        )

        out = list(ace = ace, trace = trace)

        return(out)
    } else {
        return(ace)
    }
}

export.sparse.format <- function(ace, path) {

    counts.mat = BiocGenerics::counts(ace)
    cell_metadata = SummarizedExperiment::colData(ace)
    features = rownames(ace)

    utils::write.table(
      x = features,
      file = file.path(path, "features.txt"),
      row.names = FALSE,
      col.names = FALSE,
      quote = FALSE
    )

    utils::write.table(
      x = cell_metadata,
      file = file.path(path, "metadata.tsv"),
      row.names = FALSE,
      col.names = TRUE,
      quote = FALSE,
      sep = "\t"
    )

    Matrix::writeMM(counts.mat, file = file.path(path, "counts.mtx"))
}
shmohammadi86/ACTIONetExperiment documentation built on Sept. 13, 2023, 8:55 p.m.