R/Features.R

Defines functions filterSEgenes findPairMarkers FindDEgenes term2gene GiottoToSpark Features

Documented in Features filterSEgenes FindDEgenes findPairMarkers GiottoToSpark term2gene

#' Feature genes 
#' 
#' @param object An giotto object
#' @importFrom Giotto fDataDT
#' @export
Features <- function(object) {
  geneMetadata <- fDataDT(object)
  feature.gene <- subset(geneMetadata, hvg == "yes", select = gene_ID)
  return(feature.gene$gene_ID)
}

#' Transform Giotto object to the Spark object
#'
#' @param object An Giotto object
#' @param ... Other arguments sell also \code{\link[SPARK]{CreateSPARKObject}}
#' 
#' @return An SPARK object
#' @importFrom  SPARK CreateSPARKObject
#'
#' @export

GiottoToSpark <- function(object, ...) {
  counts <- slot(object = object, name = "raw_exprs")
  position <- slot(object = object, name = "spatial_locs")[, 1:2]
  position <- as.data.frame(position)
  rownames(position) <- colnames(counts)
  spark <- CreateSPARKObject(counts = counts,
                             location = position,
                             ...)
  return(spark)
}

#' @title TERM2GENE
#' @param category MSigDB collection abbreviation, such as H or C1.
#' @param Geneformat Format of gene name in count matrix, one of "ensembl_gene", "gene_symbol".
#' @param Species Species name, such as Homo sapiens or Mus musculus.
#' @importFrom   msigdbr msigdbr
#' @export term2gene
term2gene <- function(Geneformat=Geneformat,
                      Species=Species,
                      category=category){
  gene_sets = msigdbr(species = Species,category = category)
  if(Geneformat=="gene_symbol"){
    msigdbr_t2g = gene_sets %>% dplyr::distinct(gs_name, gene_symbol) %>% as.data.frame()}
  if(Geneformat=="ensembl_gene"){
    msigdbr_t2g = gene_sets %>% dplyr::distinct(gs_name, entrez_gene) %>% as.data.frame()}
  return(msigdbr_t2g)
}

#' @title Find markers (differentially expressed genes) among groups.
#' @param counts Either a matrix-like object with un-normalized data with cells as columns and features as rows or an Assay-derived object.
#' @param annotation A data frame of cell types annotation with cell names as rownames and "annot" as the colname. Row-names in the annotation need to match the column names of the counts matrix.
#' @param pval minimal p-value to filter markers
#' @param logFC logFC to filter markers
#' @param minGenes minimum genes to keep in each group per cluster, overrides pval and logFC
#' @export
FindDEgenes <- function( counts,
                         annotation,
                         pval = 0.01,
                         logFC = 0.5, 
                         minGenes = 10,
                         ... ){
  if (! "annot" %in% colnames(annotation)) {
    stop("Input annotation doesn't contain the column of annot.")
  }
  cell_interaction <- intersect(colnames(counts), rownames(annotation))
  annotation <- annotation[cell_interaction, "annot",drop=FALSE]
  celltypes <- unique(annotation$annot)
  result_list = list()
  for ( i in celltypes) {
    print(i)
    othertypes = celltypes[-which(celltypes==i)]
    clusterN = which(celltypes==i)
    markers = findPairMarkers(counts = counts, annotation = annotation, group_1 = i, group_2 = othertypes)
    markers <- as.data.frame(markers)
    markers <- markers %>% mutate(ranking = base::rank(-summary.logFC)) %>% 
      mutate(cluster = i, gene= rownames(markers))
    filtermarkers <- subset(markers, p.value <= pval & summary.logFC >= logFC | ranking <= minGenes)
    result_list[[clusterN]] = filtermarkers
  }
  return(do.call("rbind", result_list))
}

#' @title Find markers (differentially expressed genes) between two groups.
#' @param counts Either a matrix-like object with un-normalized data with cells as columns and features as rows or an Assay-derived object.
#' @param annotation A data frame of cell types annotation with cell names as rownames and "annot" as the colname. Row-names in the annotation need to match the column names of the counts matrix.
#' @param group_1 Character vector of cell types in group_1 for pairwise comparison
#' @param group_2 Character vector of cell types in group_2 for pairwise comparison
findPairMarkers<- function(counts,annotation,group_1,group_2){
  groups = as.data.frame(annotation) %>% mutate(group= if_else((annot %in% group_1), 1,2))
  marker_results = scran::findMarkers(x = counts, groups = groups$group)
  maker_results = marker_results[[1]]
  return(maker_results)
}

#' @title Filter SE (Spatial differently expressed) genes.
#' @param diffgene A list out of SE-genes calculation. \code{\link[STREAM]{DiffGenes}} for more details.
#' @param topgenes Integer; set how many top SE-genes to take, default as 100.
#' @export
#' 
filterSEgenes <- function(diffgene,
                          topgenes=100){
  sparkres <- diffgene[["sparkGenes"]]
  spark_spatialgenes <- rownames(sparkres[order(sparkres$adjusted_pvalue , decreasing = FALSE),])
  topspatialDEgenes <- diffgene[["spatialDE"]]$genes#1
  topbingenes <- diffgene[["binspectGenes"]]$genes #3
  topsilgenes <- diffgene[["silhouetteGenes"]]$genes#2
  topsparkgenes <- spark_spatialgenes 

  X <- list(spatialDE = topspatialDEgenes, 
          silhouetteRank = topsilgenes, 
          Binspect = topbingenes, 
          Spark = topsparkgenes)

  ALL <- Reduce(union, X)
  sum_matrix <- matrix(data=0, nrow=length(ALL), ncol=4)
  rownames(sum_matrix) <- ALL
  index <- lapply(c(seq(1,4)), function(x) {
    sum_matrix[as.vector(unlist(X[x])), x] <- 1
    return(sum_matrix[,x])
  })
  sum_matrix <- do.call(cbind, index)
  colnames(sum_matrix)<- c("spatialDE","silRank", "binspect","spark")
  sums <- rowSums(sum_matrix)
  spatialgenes <- names(sort(sums,decreasing = TRUE)[1:topgenes])
  return(spatialgenes)
}
YeehanXiao/STREAM documentation built on Aug. 13, 2022, 6:43 p.m.