R/GlowwormScale.R

Defines functions GlowwormScale

Documented in GlowwormScale

#' @title Extract data from Seurat file and scale
#' @param SeuFile Seurat file
#' @param MetaData A dataframe with barcodes as row names, and either one column of assignments,
#' or two columns of assignments (First column for granular assignments (i.e. Neuro_1), second column for broad assignments (i.e. Neurons))
#' @param GeneList A vector of genes you wish to analyze
#' @param PercentCutoff To remove background signal, clusters which express a candidate gene in less than a
#' percentage of cells will be reduced to zero. Default is 0.1 (10%)
#' @return This function takes a Seurat file, extracts data and merges with metadata.
#' This data is scaled and has background correction \cr \cr Exports a list containing:
#' \itemize{
#'     \item MetaData - A copy of the metadata
#'     \item GlowwormScaleOutput - A scaled data frame of n genes by n populations in metadata
#'     \item SumStatsGlobal - A data frame containing Total enrichment score (abundance), SD enrichment score (specificity) and
#'     the rank score. \cr \cr
#'         Output list can be used to generate a Scatter plot via GlowwormOutputScatter, a stacked bar plot via GlowwormOutputStacked,
#'         or can be used as input for GlowwormSubpopulation
#'         }
#' @importFrom matrixStats rowSds
#'
#' @export




GlowwormScale = function(SeuFile, MetaData, GeneList, PercentCutoff = 0.10){
  OutputList = list()
  OutputList[["MetaData"]] = MetaData
  SeuFile_Reduced = subset(SeuFile, cells = row.names(MetaData))
  DataMatrix = as.data.frame(SeuFile_Reduced@assays$RNA@data)
  Subset_Data = subset(DataMatrix, row.names(DataMatrix) %in% GeneList)
  GetGenes = row.names(Subset_Data)

  DataMatrix_Zscore = (Subset_Data-rowMeans(Subset_Data))/(rowSds(as.matrix(Subset_Data)))[row(Subset_Data)]

  DataMatrix_Zscore_t = as.data.frame(t(DataMatrix_Zscore))
  MetaDataComb = MetaData

  if(length(MetaDataComb) == 2){
    Names = colnames(MetaData)
    MetaDataComb$Combined = paste(MetaDataComb[[Names[1]]], MetaDataComb[[Names[2]]], sep="^")
    MetaDataComb[1:2] = NULL
  }else if(length(MetaDataComb) == 1){
    Names = colnames(MetaData)
    MetaDataComb$Combined = paste(MetaDataComb[[Names[1]]], MetaDataComb[[Names[2]]], sep="^")
    MetaDataComb[1] = NULL
  }else{
    stop("MetaData should have barcodes as row names and either one column of assignments, or two columns containing broad assignments and granular assignments")
  }

  DataMatrix_Zscore_wAssigns = merge(DataMatrix_Zscore_t, MetaDataComb, by = 0)
  row.names(DataMatrix_Zscore_wAssigns) = DataMatrix_Zscore_wAssigns$Row.names
  DataMatrix_Zscore_wAssigns$Row.names = NULL
  DataMatrix_Zscore_wAssigns2  = DataMatrix_Zscore_wAssigns %>% dplyr::select(all_of(GetGenes), Combined)
  DataMatrix_Zscore_wAssigns2[DataMatrix_Zscore_wAssigns2 <= 0] <- 0

  ScreenedData = as.data.frame(matrix(ncol = length(GetGenes)+1, nrow = 0))
  colnames(ScreenedData) = c(GetGenes, "Combined")

  for(p in unique(DataMatrix_Zscore_wAssigns2$Combined)){
    Subs = subset(DataMatrix_Zscore_wAssigns2, DataMatrix_Zscore_wAssigns2$Combined == p)
    Subs$Combined = NULL
    PercentPos = apply(Subs, 2, function(x){ sum(as.numeric(x) > 0)/dim(Subs)[1]} )
    PercentPos = as.data.frame(PercentPos)
    PercentPos_High = subset(PercentPos, PercentPos >= PercentCutoff)
    PercentPos_Low = subset(PercentPos, PercentPos < PercentCutoff)
    PullHighExpGenes = Subs %>% dplyr::select(row.names(PercentPos_High))
    PullLowExpGenes = Subs %>% dplyr::select(row.names(PercentPos_Low))
    PullLowExpGenes[PullLowExpGenes > 0] = 0
    Updated_DF = cbind(PullHighExpGenes, PullLowExpGenes)
    Updated_DF$Combined = p

    ScreenedData = rbind(ScreenedData, Updated_DF)
  }

  DataMatrix_Zscore_Reduced = ScreenedData %>% group_by(Combined) %>% summarise_all(mean, na.rm = TRUE)
  DataMatrix_Zscore_Reduced = as.data.frame(DataMatrix_Zscore_Reduced)
  row.names(DataMatrix_Zscore_Reduced) = DataMatrix_Zscore_Reduced$Combined
  DataMatrix_Zscore_Reduced$Combined = NULL
  DataMatrix_Zscore_Reduced[DataMatrix_Zscore_Reduced < 0] <- 0
  DataMatrix_Zscore_Reduced
  DataMatrix_Zscore_Reduced$Pop = gsub(".*\\^", "", row.names(DataMatrix_Zscore_Reduced))
  DataMatrix_Zscore_Reduced$Broad = gsub("\\^.*", "", row.names(DataMatrix_Zscore_Reduced))
  OutputList[["GlowwormScaleOutput"]] = DataMatrix_Zscore_Reduced

  DataMatrix_Zscore_Reduced$Pop = NULL
  DataMatrix_Zscore_Reduced$Broad = NULL
  GlowwormScaleOutput_t = as.data.frame(t(DataMatrix_Zscore_Reduced))
  SumStats = as.data.frame(matrix(ncol = 0, nrow = dim(GlowwormScaleOutput_t)[1]))
  row.names(SumStats) = row.names(GlowwormScaleOutput_t)
  SumStats$Total = sqrt(rowSums(GlowwormScaleOutput_t))
  SumStats$SD = apply(GlowwormScaleOutput_t[,-ncol(GlowwormScaleOutput_t)], 1, sd)
  SumStats = data.frame(SumStats[order(-SumStats$Total),])
  SumStats$RankScore = SumStats$Total * SumStats$SD
  SumStats = SumStats[order(-SumStats$RankScore),]
  OutputList[["SumStats"]] = SumStats
  return(OutputList)
}
Hannahglover/Glowworm documentation built on Jan. 16, 2024, 11:47 p.m.