R/de_functions.R
In conos: Clustering on Network of Samples

Documented in getBetweenCellTypeCorrectedDE getBetweenCellTypeDE getPerCellTypeDE namedNames rawMatricesWithCommonGenes saveDEasCSV saveDEasJSON

#' @importFrom stats qnorm
NULL

#' Get a vector of the names of an object named by the names themselves.
#' This is useful with lapply when passing names of objects as it ensures that the output list
#' is also named
#'
#' @param g an objects on which we can call names()
#' @keywords internal
namedNames <- function(g) {
  n <- names(g)
  names(n) <- n
  n
}


#' @keywords internal
validatePerCellTypeParams <- function(con.obj, groups, sample.groups, ref.level, cluster.sep.chr) {
  if (!requireNamespace("DESeq2", quietly = TRUE)) {
    stop("You have to install DESeq2 package to use differential expression")
  }

  if (!('Conos' %in% class(con.obj))) stop('con.obj must be a conos object')
  if (is.null(groups)) stop('groups must be specified');
  if (is.null(sample.groups)) stop('sample.groups must be specified')
  if (!('list' %in% class(sample.groups))) stop('sample.groups must be a list')
  if (length(sample.groups) != 2) stop('sample.groups must be of length 2')
  if (!all(unlist(lapply(sample.groups, function(x) 'character' %in% class(x)))))
    stop('sample.groups must be a list of character vectors')
  if (!all(sapply(sample.groups, length) > 0))
    stop('sample.groups entries must be on length greater or equal to 1')
  if (!all(unlist(lapply(sample.groups, function(x) {all(x %in% names(con.obj$samples))}))))
    stop('sample.groups entries must be names of samples in the conos object')
  if (is.null(ref.level)) stop('reference level is not defined')
  ## todo: check samplegrousp are named
  if(is.null(names(sample.groups))) stop('sample.groups must be named')
  if(!inherits(groups,'factor')) stop('groups must be a factor')
  if(any(grepl(cluster.sep.chr, names(con.obj$samples),fixed=TRUE)))
    stop('cluster.sep.chr must not be part of any sample name')
  if(any(grepl(cluster.sep.chr,levels(groups),fixed=TRUE)))
    stop('cluster.sep.chr must not be part of any cluster name')
}

#' @keywords internal
validateBetweenCellTypeParams <- function(con.obj, groups, sample.groups, refgroup, altgroup, cluster.sep.chr) {
  if (!requireNamespace("DESeq2", quietly = TRUE)) {
    stop("You have to install DESeq2 package to use differential expression")
  }

  if (!inherits(con.obj,'Conos')) stop('con.obj must be a conos object')
  if (is.null(groups) ) stop('groups must be specified');
  if (is.null(sample.groups) ) stop('sample.groups must be specified')
  if (!inherits(sample.groups,'list')) stop('sample.groups must be a list');
  #if ( length(sample.groups) != 2 ) stop('sample.groups must be of length 2');
  if (!all(unlist(lapply(sample.groups, function(x) class(x) == 'character'))) )
    stop('sample.groups must be a list of character vectors');
  if (!all(unlist(lapply(sample.groups, function(x) length(x) > 0))) )
    stop('sample.groups entries must be on length greater or equal to 1')
  if (!all(unlist(lapply(sample.groups, function(x) {all(x %in% names(con.obj$samples))}))) )
    stop('sample.groups entries must be names of samples in the conos object')
  if (is.null(refgroup)) stop('reference group is not defined')
  if (is.null(altgroup)) stop('altgroup is not defined')
  ## todo: check samplegrousp are named
  if(is.null(names(sample.groups))) stop('sample.groups must be named')
  if(!inherits(groups,'factor')) stop('groups must be a factor')
  if(any(grepl(cluster.sep.chr, names(con.obj$samples),fixed=TRUE)))
    stop('cluster.sep.chr must not be part of any sample name')
  if(any(grepl(cluster.sep.chr,levels(groups),fixed=TRUE)))
    stop('cluster.sep.chr must not be part of any cluster name')
}

#' Get raw matrices with common genes
#'
#' @param con.obj Conos object
#' @param sample.groups list of samples to select from Conos object, con.obj$samples (default=NULL)
#' @return raw matrices subset with common genes
#' @export
rawMatricesWithCommonGenes <- function(con.obj, sample.groups=NULL) {
  samples <- con.obj$samples
  if (!is.null(sample.groups)) {
    samples <- samples[unlist(sample.groups)]
  }

  ## Generate an aggregated matrix
  raw.mats <- lapply(samples, getRawCountMatrix, transposed=TRUE)
  common.genes <- Reduce(intersect,lapply(raw.mats, colnames))
  return(lapply(raw.mats, function(x) {x[,common.genes]}))
}

#' @keywords internal
adjustMatrixRownames <- function(name, cm, cluster.sep.chr) {rownames(cm) <- paste0(name, cluster.sep.chr, rownames(cm)); return(cm)}

#' @keywords internal
rbindDEMatrices <- function(mats, cluster.sep.chr) {
  mats <- lapply(names(mats), function(n) {
    rownames(mats[[n]]) <- paste0(n, cluster.sep.chr, rownames(mats[[n]]));
    return(mats[[n]])
  })

  return(t(do.call(rbind, mats)))
}

#' @keywords internal
strpart <- function (x, split, n, fixed = FALSE) {
  sapply(strsplit(as.character(x), split, fixed = fixed), "[", n)
}

#' @keywords internal
is.error <- function (x) {
  inherits(x, c("try-error", "error"))
}


#' Do differential expression for each cell type in a conos object between the specified subsets of apps
#'
#' @param con.obj conos object
#' @param groups factor specifying cell types (default=NULL)
#' @param sample.groups a list of two character vector specifying the app groups to compare (default=NULL)
#' @param cooks.cutoff boolean cooksCutoff for DESeq2 (default=FALSE)
#' @param ref.level the reference level of the sample.groups against which the comparison should be made (default=NULL). If NULL, will pick the first one.
#' @param min.cell.count integer Minimal number of cells per cluster for a sample to be taken into account in a comparison (default=10)
#' @param remove.na boolean If TRUE, remove NAs from DESeq calculations, which often arise as comparisons not possible (default=TRUE)
#' @param max.cell.count maximal number of cells per cluster per sample to include in a comparison (useful for comparing the number of DE genes between cell types) (default=Inf)
#' @param test which DESeq2 test to use (options: "LRT" or "Wald") (default="LRT")
#' @param independent.filtering boolean independentFiltering for DESeq2 (default=FALSE)
#' @param n.cores numeric Number of cores (default=1)
#' @param cluster.sep.chr character string of length 1 specifying a delimiter to separate cluster and app names (default='<!!>')
#' @param return.details boolean Whether to return verbose details (default=TRUE)
#' @return A list of differential expression results for every cell type
#' @export getPerCellTypeDE
getPerCellTypeDE <- function(con.obj, groups=NULL, sample.groups=NULL, cooks.cutoff=FALSE, ref.level=NULL, min.cell.count=10,
  remove.na=TRUE, max.cell.count=Inf, test="LRT", independent.filtering=FALSE, n.cores=1, cluster.sep.chr = '<!!>', return.details=TRUE) {
  validatePerCellTypeParams(con.obj, groups, sample.groups, ref.level, cluster.sep.chr)

  ## Generate a summary dataset collapsing the cells of the same type in each sample
  ## and merging everything in one matrix
  aggr2 <- rawMatricesWithCommonGenes(con.obj, sample.groups) %>%
    lapply(collapseCellsByType, groups=groups, min.cell.count=min.cell.count, max.cell.count=max.cell.count) %>%
    rbindDEMatrices(cluster.sep.chr=cluster.sep.chr)
  gc()

  ## For every cell type get differential expression results
  de.res <- papply(sn(levels(groups)), function(l) {
    tryCatch({
      ## Get count matrix
      cm <- aggr2[,strpart(colnames(aggr2),cluster.sep.chr,2,fixed=TRUE) == l]
      ## Generate metadata
      meta <- data.frame(
        sample.id= colnames(cm),
        group= as.factor(unlist(lapply(colnames(cm), function(y) {
          y <- strpart(y,cluster.sep.chr,1,fixed=TRUE)
          names(sample.groups)[unlist(lapply(sample.groups,function(x) any(x %in% y)))]
        })))
      )
      if (!ref.level %in% levels(meta$group))
        stop('The reference level is absent in this comparison')
      meta$group <- relevel(meta$group, ref=ref.level)
      if (length(unique(as.character(meta$group))) < 2) {
        stop('The cluster is not present in both conditions')
      }
      ## check counts
      checkCountsWholeNumbers(cm)
      dds1 <- DESeq2::DESeqDataSetFromMatrix(cm, meta, design=~group)
      if(test=="LRT") {
        dds1 <- DESeq2::DESeq(dds1,test="LRT", reduced = ~ 1)
      } else { # defaults to Wald
        dds1 <- DESeq2::DESeq(dds1)
      }
      res1 <- DESeq2::results(dds1, cooksCutoff = cooks.cutoff, independentFiltering = independent.filtering)
      res1 <- as.data.frame(res1)
      res1 <- res1[order(res1$padj, decreasing = FALSE),]
      ## remove NA values, which exist in log2FoldChange, lfcSE, stat, pvalue, padj
      if (remove.na){
        res1 <- res1[!is.na(res1$padj), ]
      }
      if(return.details) {
        list(res=res1, cm=cm, sample.groups=sample.groups)
      } else {
        res1
      }
    }, error=function(err) {warning("Error for level ", l, ": ", err$message); return(NA)})
  }, n.cores=n.cores)
  de.res
}


#' Save differential expression as table in *csv format
#'
#' @param de.results output of differential expression results, corrected or uncorrected
#' @param saveprefix character prefix for output file
#' @param gene.metadata gene metadta to include (default=NULL)
#' @export
saveDEasCSV <- function(de.results, saveprefix, gene.metadata=NULL) {
    ## find errors
    n.error <- sum(unlist(lapply(de.results,is.error)))
    if(n.error > 0) {
        message("Warning: ", n.error, " of ", length(de.results), ' results have returned an error; ignoring...\n')
    }

    de.results <- de.results[!unlist(lapply(de.results,is.error))]
    ##
    x <- lapply(namedNames(de.results), function(ncc) {
        res.celltype <- de.results[[ncc]]
        res.table <- as.data.frame(res.celltype$res)
        ## append gene names
        res.table$gene <- rownames(res.table)
        ## append singificance
        res.table$significant <- res.table$padj < 0.05
        res.table$log2FoldChange[is.na(res.table$log2FoldChange)] <- 0
        ## Append Z scores and rowid
        res.table$Z <- qnorm(1 - (res.table$pval/2))
        res.table$Z[is.na(res.table$Z)] <- 0
        res.table$Za <- qnorm(1 - (res.table$padj/2))
        res.table$Za[is.na(res.table$Za)] <- 0
        res.table$Z <- res.table$Z  * sign(res.table$log2FoldChange)
        res.table$Za <- res.table$Za  * sign(res.table$log2FoldChange)
        if(!is.null(gene.metadata)) {
            ## match order to metadata table
            mo <- match(as.character(gene.metadata$geneid),as.character(res.table$gene))
            ## drop gene id column
            keep.cols <- colnames(gene.metadata)[colnames(gene.metadata) != 'geneid']
            names(keep.cols) <- keep.cols
            res.table <- cbind(res.table, gene.metadata[mo,keep.cols,drop=FALSE])
        }
        file <- paste0(saveprefix,make.names(ncc),'.csv')
        write.table(x=res.table,file=file)
        res.table
    })
    invisible(x)
}

#' Save differential expression results as JSON
#'
#' @param de.results differential expression results (default=NULL)
#' @param saveprefix prefix for the differential expression output (default=NULL)
#' @param gene.metadata data.frame with gene metadata (default=NULL)
#' @param cluster.sep.chr character string of length 1 specifying a delimiter to separate cluster and app names (default='<!!>')
#' @return JSON with DE results
#' @export
saveDEasJSON <- function(de.results = NULL, saveprefix = NULL, gene.metadata = NULL, cluster.sep.chr='<!!>') {

    if (!requireNamespace("jsonlite", quietly = TRUE)) {
      stop("Package \"jsonlite\" is needed for this function to work. Please install it.", call. = FALSE)
    }
    ## Check input
    if(is.null(de.results)){
      stop('The argument "de.results" has not been specified')
    }
    if(is.null(saveprefix)){
      stop('The argument "saveprefix" has not been specified')
    }
    ## Find de instances that didn't work (usually because cell type is absent from one or more sample types)
    n.error <- sum(unlist(lapply(de.results, is.error)))
    if(n.error > 0) {
        message("Warning: ", n.error,' of ', length(de.results),' results have returned an error; ignoring...\n')
    }

    ## get the de results that worked
    de.results <- de.results[!unlist(lapply(de.results, is.error))]
    ## Generate structure and save JSON
    lapply(namedNames(de.results), function(ncc) {
        res.celltype <- de.results[[ncc]]
        ## Get results table as df
        res.table <- as.data.frame(res.celltype$res)
        ## append gene names
        res.table$gene <- rownames(res.table)
        ## append singificance
        res.table$significant <- res.table$padj < 0.05
        res.table$log2FoldChange[is.na(res.table$log2FoldChange)] <- 0
        ## Append Z scores and rowid
        res.table$Z <- qnorm(1 - (res.table$pval/2))
        res.table$Z[is.na(res.table$Z)] <- 0
        res.table$Za <- qnorm(1 - (res.table$padj/2))
        res.table$Za[is.na(res.table$Za)] <- 0
        res.table$Z <- res.table$Z  * sign(res.table$log2FoldChange)
        res.table$Za <- res.table$Za  * sign(res.table$log2FoldChange)
        res.table$rowid <- 1:nrow(res.table)
        if (!is.null(gene.metadata)) {
            ## match order to metadata table
            mo <- match(as.character(gene.metadata$geneid),as.character(res.table$gene))
            ## drop gene id column
            keep.cols <- colnames(gene.metadata)[colnames(gene.metadata) != 'geneid']
            names(keep.cols) <- keep.cols
        }
        res.table <- cbind(res.table, gene.metadata[mo,keep.cols,drop=FALSE])
        ## get names of all the genes
        all.genes <- rownames(res.table)
        ## Get the count matrix
        cm <-res.celltype$cm
        ## remove the cell type suffix
        colnames(cm) <- strpart(colnames(cm),cluster.sep.chr,1,fixed=TRUE)
        ## ilev entry (submatrices of cps)
        ilev <- lapply(res.celltype$sample.groups, function(sg) {
            ## In certain cases columns may be missing,skip
            sg <- sg[sg %in% colnames(cm)]
            ## keep only cols of interest
            cm.tmp <- cm[,sg]
            ## convert to matrix
            cm.tmp <- as.matrix(cm.tmp)
            rownames(cm.tmp) <- rownames(cm)
            ## calculate cpm
            cpm <- sweep(cm.tmp, 2, apply(cm.tmp,2, sum), FUN='/')
            cpm <- log10(cpm * 1e6 + 1)
            ##
            snames1 <- colnames(cpm)
            ## Put genes in order
            cpm <- cpm[all.genes,]
            colnames(cpm) <- NULL;
            rownames(cpm) <- NULL;
            ## return
            list(snames=snames1, val=as.matrix(cpm))
        })
        ## snames entry (samplenames)
        snames <- names(res.celltype$sample.groups)
        ## convert to json
        tojson <- list(
            res = res.table,
            genes = all.genes,
            ilev = ilev,
            snames = snames
        )
        y <- jsonlite::toJSON(tojson)
        ## File to save to
        file <- paste0(saveprefix,make.names(ncc),'.json')
        ## create the json file
        write(y,file)
        NULL
    })
    invisible(NULL)
}

#' Compare two cell types across the entire panel
#'
#' @param con.obj conos object
#' @param groups factor describing cell grouping (default=NULL)
#' @param sample.groups a named list of two character vectors specifying the app groups to compare (default=NULL)
#' @param cooks.cutoff boolean cooksCutoff parameter for DESeq2 (default=FALSE)
#' @param refgroup cell type to compare to be used as reference (default=NULL)
#' @param altgroup cell type to compare to be used as ALT against refgroup (default=NULL)
#' @param min.cell.count numeric Minimum number of cells per celltype/sample combination to keep (default=10)
#' @param independent.filtering boolean Whether to use independentFiltering parameter for DESeq2 (default=FALSE)
#' @param cluster.sep.chr character string of length 1 specifying a delimiter to separate cluster and app names (default='<!!>')
#' @param return.details boolean Return detailed results (default=TRUE)
#' @param only.paired boolean Only keep samples that that both cell types above the min.cell.count threshold (default=TRUE)
#' @param remove.na boolean If TRUE, remove NAs from DESeq calculations (default=TRUE)
#' @return Returns either a DESeq2::results() object, or if return.details=TRUE,
#'    returns a list of the DESeq2::results(), the samples from the panel to use in this comparison, refgroups, altgroup, and samplegroups
#' @export
getBetweenCellTypeDE <- function(con.obj, groups=NULL, sample.groups=NULL, cooks.cutoff = FALSE, refgroup = NULL, altgroup = NULL, min.cell.count = 10,
                                 independent.filtering = FALSE, cluster.sep.chr = '<!!>',return.details=TRUE, only.paired=TRUE, remove.na=TRUE) {
  # TODO: do we really need sample.groups here? They are used in the corrected version for some unknown reason.
  validateBetweenCellTypeParams(con.obj, groups, sample.groups, refgroup, altgroup, cluster.sep.chr)
  ## Get the samples from the panel to use in this comparison
  aggr2 <- rawMatricesWithCommonGenes(con.obj, sample.groups) %>%
    lapply(collapseCellsByType, groups=groups, min.cell.count=min.cell.count) %>%
    rbindDEMatrices(cluster.sep.chr=cluster.sep.chr)
  gc()

  aggr2.meta <- generateDEMatrixMetadata(aggr2, refgroup, altgroup, cluster.sep.chr)

  ## Get the samples that have both cell types only
  if (only.paired) {
    complete.obs <- names(which(apply(reshape2::acast(aggr2.meta, library ~ celltype),1,function(x){sum(is.na(x))}) == 0, useNames = TRUE))
    aggr2.meta <- aggr2.meta[aggr2.meta$library %in% complete.obs,]
  }
  ## Select the desired samples only
  aggr2.meta$celltype <- relevel(aggr2.meta$celltype, ref = refgroup)
  aggr2 <- aggr2[,rownames(aggr2.meta)]
  ## check counts
  checkCountsWholeNumbers(aggr2)
  ## Generate DESeq2 comparison
  dds1 <- DESeq2::DESeqDataSetFromMatrix(aggr2, aggr2.meta, design = ~ library + celltype)
  dds1 <- DESeq2::DESeq(dds1)
  res1 <- DESeq2::results(dds1, cooksCutoff = cooks.cutoff, independentFiltering = independent.filtering)
  res1 <- res1[order(res1$padj,decreasing = FALSE),]
  if (remove.na){
    res1 <- res1[!is.na(res1$padj), ]
  }
  ## Return
  if(return.details) {
    list(res=res1, cm=aggr2, meta = aggr2.meta, refgroup = refgroup, altgroup = altgroup, sample.groups=sample.groups)
  } else {
    res1
  }
}

#' @keywords internal
generateDEMatrixMetadata <- function(mtx, refgroup, altgroup, cluster.sep.chr) {
  meta <- data.frame(
    row.names = colnames(mtx),
    sample=colnames(mtx),
    library=strpart(colnames(mtx), cluster.sep.chr, 1, fixed=TRUE),
    celltype = strpart(colnames(mtx), cluster.sep.chr, 2, fixed=TRUE)
  )

  return(subset(meta, .data$celltype %in% c(refgroup, altgroup)))
}

#' Compare two cell types across the entire panel
#'
#' @param con.obj conos object
#' @param sample.groups a named list of two character vectors specifying the app groups to compare
#' @param groups factor describing cell grouping
#' @param cooks.cutoff cooksCutoff parameter for DESeq2
#' @param refgroup cell type to compare to be used as reference
#' @param altgroup cell type to compare to
#' @param min.cell.count minimum number of cells per celltype/sample combination to keep
#' @param independent.filtering independentFiltering parameter for DESeq2
#' @param cluster.sep.chr character string of length 1 specifying a delimiter to separate cluster and app names
#' @param return.details logical, return detailed results
#' @param only.paired only keep samples that that both cell types above the min.cell.count threshold
#' @param correction fold change corrections per genes
#' @param ref.level reference level on the basis of which the correction was calculated
#' @return Returns either a DESeq2::results() object, or if return.details=TRUE,
#'    returns a list of the DESeq2::results(), the samples from the panel to use in this comparison, refgroups, altgroup, and samplegroups
#' @export
getBetweenCellTypeCorrectedDE <- function(con.obj, sample.groups =  NULL, groups=NULL, cooks.cutoff = FALSE, refgroup = NULL, altgroup = NULL, min.cell.count = 10,
                                          independent.filtering = FALSE, cluster.sep.chr = '<!!>',return.details=TRUE, only.paired=TRUE, correction = NULL, ref.level=NULL) {

  if (!requireNamespace("SummarizedExperiment", quietly = TRUE)) {
    stop("Package \"SummarizedExperiment\" is needed for this function to work. Please install it from Bioconductor.", call. = FALSE)
  }
  if (!requireNamespace("plyr", quietly = TRUE)) {
    stop("Package \"plyr\" is needed for this function to work. Please install it.", call. = FALSE)
  }
  validateBetweenCellTypeParams(con.obj, groups, sample.groups, refgroup, altgroup, cluster.sep.chr)
  ## Get the samples from the panel to use in this comparison
  aggr2 <- rawMatricesWithCommonGenes(con.obj, sample.groups) %>%
    lapply(collapseCellsByType, groups=groups, min.cell.count=min.cell.count) %>%
    rbindDEMatrices(cluster.sep.chr=cluster.sep.chr)
  gc()

  aggr2.meta <- generateDEMatrixMetadata(aggr2, refgroup, altgroup, cluster.sep.chr=cluster.sep.chr)
  ## Get the samples that have both cell types only
  if (only.paired) {
    complete.obs <- reshape2::acast(aggr2.meta, library ~ celltype, value.var = 'celltype', fun.aggregate = length) %>%
      apply(1, function(x) sum(is.na(x)) == 0) %>% which(useNames = TRUE) %>% names()
    aggr2.meta <- aggr2.meta[aggr2.meta$library %in% complete.obs,]
  }
  ## Select the desired samples only
  aggr2.meta$celltype <- relevel(aggr2.meta$celltype, ref = refgroup)
  aggr2 <- aggr2[,rownames(aggr2.meta)]
  tmp1 <- reshape2::melt(sample.groups)
  colnames(tmp1) <- c('sample','group')
  aggr2.meta$group <-  factor(tmp1$group[match(as.character(aggr2.meta$library), tmp1$sample)])
  aggr2.meta$group <- relevel(aggr2.meta$group, ref = ref.level)
  rm(tmp1)
  ## check counts
  checkCountsWholeNumbers(aggr2)
  ## Generate DESeq2 comparison
  dds1 <- DESeq2::DESeqDataSetFromMatrix(aggr2, aggr2.meta, design = ~ celltype)
  ## Apply the correction based on sample type
  dds1 <- DESeq2::estimateSizeFactors(dds1)
  sf <- DESeq2::sizeFactors(dds1)
  if(!(all(rownames(aggr2) %in% names(correction)) & all(names(correction) %in% rownames(aggr2))))
    stop('incompatible matrices')
  nf.tmp <- matrix(rep(sf, nrow(aggr2)),nrow=nrow(aggr2),byrow=TRUE)
  rownames(nf.tmp) <- rownames(aggr2);
  colnames(nf.tmp) <- colnames(aggr2)
  gene.scale.factors <- 2^(correction[rownames(nf.tmp)])
  baselevel <- levels(SummarizedExperiment::colData(dds1)$group)[1]
  x <- do.call(cbind, lapply(SummarizedExperiment::colData(dds1)$group, function(x) {
    if (x == baselevel) {
      rep(1, length(gene.scale.factors))
    } else {
      gene.scale.factors
    }
  }))
  rownames(x) <- rownames(nf.tmp);
  colnames(x) <- colnames(nf.tmp)
  nf.tmp <- nf.tmp * x
  x2 <- plyr::aaply(nf.tmp, 1, function(x) {x / exp(mean(log(x)))})
  DESeq2::normalizationFactors(dds1) <- x2
  ##
  dds1 <- DESeq2::DESeq(dds1)
  res1 <- DESeq2::results(dds1, cooksCutoff = cooks.cutoff, independentFiltering = independent.filtering)
  res1 <- res1[order(res1$padj,decreasing = FALSE),]
  ## Return
  if(return.details) {
    list(res=res1, cm=aggr2, meta = aggr2.meta, refgroup = refgroup, altgroup = altgroup, sample.groups=sample.groups)
  } else {
    res1
  }
}

## Marker genes

## Takes data.frames with info about DE genes for single cell type and many samples and
## returns data.frame with aggregated info for this cell type
#' @keywords internal
aggregateDEMarkersAcrossDatasets <- function(marker.dfs, z.threshold, upregulated.only) {
  if (length(marker.dfs) == 0){
    return(data.frame())
  }

  z.scores.per.dataset <- lapply(marker.dfs, function(df) setNames(df$Z, rownames(df)))
  m.vals.per.dataset <- lapply(marker.dfs, function(df) setNames(df$M, rownames(df)))
  gene.union <- lapply(z.scores.per.dataset, names) %>% Reduce(union, .)
  z.scores <- sapply(z.scores.per.dataset, `[`, gene.union) %>% rowMeans(na.rm=TRUE)
  m.vals <- sapply(m.vals.per.dataset, `[`, gene.union) %>% rowMeans(na.rm=TRUE)
  ro <- order(z.scores,decreasing=TRUE)
  pvals <- dnorm(z.scores)
  res <- data.frame(Gene=names(z.scores), M=m.vals, Z=z.scores, PValue=pvals, PAdj=p.adjust(pvals))[ro,]

  z.filter <- if (upregulated.only) res$Z else abs(res$Z)
  return(res[z.filter > z.threshold,])
}

#' @keywords internal
getDifferentialGenesP2 <- function(p2.samples, groups, z.threshold=3.0, upregulated.only=FALSE, verbose=TRUE, n.cores=1) {

  groups %<>% as.character() %>% setNames(names(groups))

  if (verbose) message("Estimating marker genes per sample\n")
  markers.per.sample <- sccore::plapply(p2.samples, function(p2) {
    if (length(intersect(rownames(p2$counts), names(groups))) < 3) {
      list()
    } else {
      if (packageVersion("pagoda2") >= "0.1.1") {
        p2$getDifferentialGenes(groups=groups, z.threshold=0, append.specificity.metrics=FALSE, append.auc=FALSE)
      } else {
        p2$getDifferentialGenes(groups=groups, z.threshold=0)
      }
    }
  }, progress=verbose, n.cores=n.cores)

  if (verbose) message("Aggregating marker genes\n")

  ## sccore::plapply, at least v1.0.1
  ## sometimes appends warnings to output, e.g. there's a list element 'value' and 'warning'
  ## 
  ## $warning
  ## <simpleWarning in p2$getDifferentialGenes(groups = groups, z.threshold = 0, append.specificity.metrics = FALSE,     append.auc = FALSE): cluster vector doesn't specify groups for all of the cells, dropping missing cells from comparison>
  ##
  ## 
  ## If there's a warning, only take the value
  if (!is.null(markers.per.sample$warning)) {
    markers.per.sample <- markers.per.sample$value
  }

  markers.per.type <- unique(groups) %>% setNames(., .) %>%
    lapply(function(id) lapply(markers.per.sample, `[[`, id) %>% .[!sapply(., is.null)]) 
  markers.per.type = sccore::plapply(markers.per.type, aggregateDEMarkersAcrossDatasets, z.threshold=z.threshold, upregulated.only=upregulated.only, progress=verbose, n.cores=n.cores)
  return(markers.per.type)
}

#' Check that the count data contain only integer counts
#'
#' @param input.matrix the count data from aggregated samples input to DESeq
#' @return if non-integer counts are found, an error is returned
#' @keywords internal
checkCountsWholeNumbers <- function(input.matrix){
  ## check all non-zero values whole numbers
  if (!(all(input.matrix == floor(input.matrix)))){
    stop("There are counts in matrix ", input.matrix, "which are not integers. This leads to DESeq errors. Please check your count matrices.")
  }
}
Any scripts or data that you put into this service are public.
conos documentation built on May 29, 2024, 9:56 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
conos
Clustering on Network of Samples

R/de_functions.R
In conos: Clustering on Network of Samples

Defines functions aggregateDEMarkersAcrossDatasets getBetweenCellTypeCorrectedDE generateDEMatrixMetadata getBetweenCellTypeDE saveDEasJSON saveDEasCSV getPerCellTypeDE rbindDEMatrices adjustMatrixRownames rawMatricesWithCommonGenes validateBetweenCellTypeParams validatePerCellTypeParams namedNames

Documented in getBetweenCellTypeCorrectedDE getBetweenCellTypeDE getPerCellTypeDE namedNames rawMatricesWithCommonGenes saveDEasCSV saveDEasJSON

Try the conos package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

conos Clustering on Network of Samples

R/de_functions.R In conos: Clustering on Network of Samples

Defines functions aggregateDEMarkersAcrossDatasets getBetweenCellTypeCorrectedDE generateDEMatrixMetadata getBetweenCellTypeDE saveDEasJSON saveDEasCSV getPerCellTypeDE rbindDEMatrices adjustMatrixRownames rawMatricesWithCommonGenes validateBetweenCellTypeParams validatePerCellTypeParams namedNames

Documented in getBetweenCellTypeCorrectedDE getBetweenCellTypeDE getPerCellTypeDE namedNames rawMatricesWithCommonGenes saveDEasCSV saveDEasJSON

Try the conos package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

conos
Clustering on Network of Samples

R/de_functions.R
In conos: Clustering on Network of Samples