SignallingSingleCell: Network Analysis for single cell RNA sequencing Data (sc-RNASeq)

Documented in norm_sc

#' Normalization
#'
#' This function will perform normalization of your data
#'
#' @param input the input ex_sc
#' @param gene_frac the fraction of cells expressing a given gene to be included in normalization
#' @param gene_var the percentile of least variable genes to keep (ie 0.75 removes the 25 percent most variable genes)
#' @param sf_keep size factors can be greatly skewed in some cells. This will filter (based on z score thresholding of the size factors)
#' cells whose size factor is an outlier.
#' @param genelist the subset of genes to use for calculating size factors. Defaults to null. Provide to overrule internal gene selection.
#' @param norm_by the pdata variable on which to perform internal normalization before normalizing across this variable.
#' @param pool_size A vector of sizes for each pool for the normalization method
#' @param positive enforces positive size factors
#' @export
#' @details
#' If the method is ICA, independent component analysis will be performed, and then tSNE will do the final dimension reduction. If PCA is selected, PCA will be performed before on the expression matrix transpose before tSNE. This PCA will use the cells positions on the principal components. If iPCA is selected, PCA will be be performed but without transposing the data. This will create "meta cells" instead of meta genes created in the typical PCA. Then tSNE will be performed on each cells contribution (loading) to the meta cell. We find that iPCA is much more robust and leads to cleaner clusters than traditional PCA.
#' @examples
#' ex_sc_example <- dim_reduce(input = ex_sc_example, genelist = gene_subset, pre_reduce = "iPCA", nComp = 15, tSNE_perp = 30, iterations = 500, print_progress=TRUE)
#'
norm_sc <- function(input, genelist = NULL, gene_selection = "gini", num_gini = 200, gene_frac = 0.25, gene_var = 0.80, norm_by = "Cluster", pool_sizes = c(20,30,40,50), positive = TRUE){

  clusters <- pData(input)[,norm_by]

  if(is.null(genelist)){
    if(gene_selection == "PCA"){
      frac_mat <-  input
      exprs(frac_mat)[which(exprs(frac_mat) > 0)] <- 1

      gene_set <- c()

      for (i in 1:length(unique(clusters))) {
        int <- unique(clusters)[i]
        ind <- grep(int, clusters)
        frac <- apply(exprs(frac_mat)[,ind], 1, mean)
        set <- names(which(frac > gene_frac))
        gene_set <- c(gene_set, set)
      }

      tab <- table(gene_set)
      genelist <- names(tab)[which(tab == length(unique(clusters)))]
      input2 <- input[genelist,]
      gene_subset_var <- subset_genes(input2, method = "PCA", threshold = 0, minCells = 0, nComp = 10, cutoff = gene_var) # filter genes on variability
      stable_genes <- genelist[!genelist %in% gene_subset_var] # remove variable genes
      genelist <- stable_genes
      csums <- apply(exprs(input)[genelist,], 2,sum)
      zero_csums <- which(csums == 0)
      if(length(zero_csums) > 0){
        err_result <- vector(mode = "list", length = 2)
        err_result[[1]] <- csums
        err_result[[2]] <- stable_genes
        warning("With provided parameters, some cells have zero expression. Try reducing gene_frac argument. See returned result")
        return(err_result)
      }
    }

    if(gene_selection == "gini"){
      gene_set <- c()

      for (i in 1:length(unique(clusters))) {
        int <- unique(clusters)[i]
        ind <- grep(int, clusters)

        gini_mat <- t(log2(exprs(input[,ind])+2)-1)
        gini_genes <- edgeR::gini(gini_mat)
        set <- names(sort(gini_genes)[1:num_gini])
        gene_set <- c(gene_set, set)
      }

      tab <- table(gene_set)
      genelist <- names(tab)[which(tab == length(unique(clusters)))]

      csums <- apply(exprs(input)[genelist,], 2,sum)
      zero_csums <- which(csums == 0)
      if(length(zero_csums) > 0){
        err_result <- vector(mode = "list", length = 2)
        err_result[[1]] <- csums
        err_result[[2]] <- stable_genes
        warning("With provided parameters, some cells have zero expression. Try increasing num_gini argument. See returned result")
        return(err_result)
      }
    }

  }

  SCE <- SingleCellExperiment::SingleCellExperiment(list(counts = exprs(input)))
  SCE <- scran::computeSumFactors(SCE, cluster=clusters, subset.row = genelist, sizes=pool_sizes, positive = positive, min.mean = 0)
  norm_counts <- scater::normalizeCounts(SCE)
  norm_counts[!is.finite(norm_counts)] <- 0
  norm_counts <- norm_counts[apply(norm_counts, 1, function(x) !all(x==0)),]
  norm_counts <- norm_counts[,colSums(norm_counts)>0]
  input_norm <- construct_ex_sc(norm_counts)

  ind <- match(colnames(input_norm), colnames(SCE))
  pData(input_norm) <- pData(input)[ind,]

  size_factor <- sizeFactors(SCE)
  size_factor <- size_factor[ind]
  pData(input_norm)$size_factor <- size_factor
  ind <- match(genelist, rownames(input_norm))

  fData(input_norm)$norm_gene <- 0

  fData(input_norm)$norm_gene[ind] <- "Yes"
  fData(input_norm)$norm_gene[-ind] <- "No"

  return(input_norm)
}

kgellatl/SignallingSingleCell documentation built on Dec. 29, 2021, 4:12 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

kgellatl/SignallingSingleCell
Network Analysis for single cell RNA sequencing Data (sc-RNASeq)

R/norm_sc.R
In kgellatl/SignallingSingleCell: Network Analysis for single cell RNA sequencing Data (sc-RNASeq)

Defines functions norm_sc

Documented in norm_sc

R Package Documentation

Browse R Packages

We want your feedback!

kgellatl/SignallingSingleCell Network Analysis for single cell RNA sequencing Data (sc-RNASeq)

R/norm_sc.R In kgellatl/SignallingSingleCell: Network Analysis for single cell RNA sequencing Data (sc-RNASeq)

Defines functions norm_sc

Documented in norm_sc

R Package Documentation

Browse R Packages

We want your feedback!

kgellatl/SignallingSingleCell
Network Analysis for single cell RNA sequencing Data (sc-RNASeq)

R/norm_sc.R
In kgellatl/SignallingSingleCell: Network Analysis for single cell RNA sequencing Data (sc-RNASeq)