R/select_tf_gene.R

Defines functions SelectGenes SelectTFs

Documented in SelectGenes SelectTFs

#' Select TFs for GRN inference
#'
#' @param object A Seurat object
#' @param tf.assay The assay name for TF activity. Default: "chromvar"
#' @param rna.assay The assay name for gene expression. Default: "RNA"
#' @param atac.assay The assay name for Peaks. Default: "ATAC"
#' @param trajectory.name The trajectory name used for computing correlation
#' between TF binding activity and TF expression
#' @param groupEvery The number of sequential percentiles to group together when generating a trajectory.
#' This is similar to smoothing via a non-overlapping sliding window across pseudo-time.
#' @param p.cutoff A cutoff of p-values. Default: 0.01
#' @param cor.cutoff A cutoff of correlation. Default: 0.3
#' @param return.heatmap Whether or not return the heatmap for visualization
#'
#' @return A list containing a dataframe of TF activity and expression correlation
#'and a heatmap
#' @export
#'
SelectTFs <- function(object,
                      tf.assay = "chromvar",
                      rna.assay = "RNA",
                      atac.assay= "ATAC",
                      trajectory.name = "Trajectory",
                      groupEvery = 1,
                      p.cutoff = 0.01,
                      cor.cutoff = 0.3,
                      return.heatmap = TRUE) {

    trajMM <- GetTrajectory(
    object,
    assay = tf.assay,
    trajectory.name=trajectory.name,
    groupEvery=groupEvery,
    slot = "data",
    smoothWindow = 7,
    log2Norm = FALSE
  )

  rownames(trajMM) <- object@assays[[atac.assay]]@motifs@motif.names

  trajRNA <- GetTrajectory(
    object,
    assay = rna.assay,
    trajectory.name=trajectory.name,
    groupEvery=groupEvery,
    slot = "data",
    smoothWindow = 7,
    log2Norm = TRUE
  )

  df.cor <- GetCorrelation(trajMM, trajRNA)

  # we only select TFs that show significant correlation
  df.cor <- df.cor[df.cor$adj_p < p.cutoff & df.cor$correlation > cor.cutoff, ]

  matMM <- suppressMessages(TrajectoryHeatmap(
    trajMM,
    varCutOff = 0,
    pal = paletteContinuous(set = "solarExtra"),
    limits = c(-2, 2),
    name = "TF activity",
    returnMatrix = TRUE
  ))

  df_tf_time_point <- data.frame(tfs = rownames(matMM),
                                 time_point = seq(1, 100, length.out = nrow(matMM)))
  rownames(df_tf_time_point) <- df_tf_time_point$tfs

  df_tf_time_point <- df_tf_time_point[df.cor$tfs,]
  df.cor$time_point <- df_tf_time_point$time_point
  df.cor <- df.cor[order(df.cor$time_point),]

  trajMM <- trajMM[df.cor$tfs,]
  trajRNA <- trajRNA[df.cor$tfs,]


  if (return.heatmap) {
    ht <- suppressMessages(
      CorrelationHeatmap(
        trajectory1 = trajMM,
        trajectory2 = trajRNA,
        name1 = "TF activity",
        name2 = "Gene expression"
      )
    )

    res <- list("tfs" = df.cor, "heatmap" = ht)

  } else{
    res <- list("tfs" = df.cor)

  }

  return(res)

}

#' Select genes
#'
#' @param object A Seurat object
#' @param atac.assay The assay name for chromatin accessibility. Default: "ATAC"
#' @param rna.assay The assay name for gene expression. Default: "RNA"
#' @param var.cutoff.gene The cutoff of variation to select genes. Default: 0.9
#' @param trajectory.name The trajectory name used for computing correlation
#' between TF binding activity and TF expression
#' @param groupEvery The number of sequential percentiles to group together when generating a trajectory.
#' This is similar to smoothing via a non-overlapping sliding window across pseudo-time.
#' @param distance.cutoff The minimum distance between cis-regulatory elements and genes
#' @param cor.cutoff The cutoff of peak-to-gene correlation. Default: 0
#' @param fdr.cutoff The cutoff of peak-to-gene p-value Default: 1e-04
#' @param return.heatmap Whether or not return the heatmap for visualization
#' @param labelTop1 Number of labels for row names
#' @param labelTop2 Number of labels for row names
#'
#' @return A list containing a dataframe of peak-to-gene links and a heatmap
#' @export
#'
SelectGenes <- function(object,
                        atac.assay = "ATAC",
                        rna.assay = "RNA",
                        var.cutoff.gene = 0.9,
                        trajectory.name = "Trajectory",
                        distance.cutoff = 2000,
                        groupEvery = 1,
                        cor.cutoff = 0,
                        fdr.cutoff = 1e-04,
                        return.heatmap = TRUE,
                        labelTop1 = 10,
                        labelTop2 = 10,
                        genome = "hg38"
                        ) {
  trajRNA <- GetTrajectory(
    object,
    assay = rna.assay,
    trajectory.name = trajectory.name,
    groupEvery = groupEvery,
    slot = "data",
    smoothWindow = 7,
    log2Norm = TRUE
  )

  trajATAC <- GetTrajectory(
    object,
    assay = atac.assay,
    groupEvery = groupEvery,
    trajectory.name = trajectory.name,
    slot = "data",
    smoothWindow = 7,
    log2Norm = TRUE
  )

  # note here we only use the top 10% most variable genes
  groupMatRNA <- suppressMessages(
    TrajectoryHeatmap(
      trajRNA,
      varCutOff = var.cutoff.gene,
      pal = paletteContinuous(set = "horizonExtra"),
      limits = c(-2, 2),
      returnMatrix = TRUE
    )
  )

  groupMatATAC <- suppressMessages(
    TrajectoryHeatmap(
      trajATAC,
      varCutOff = 0,
      maxFeatures = nrow(trajATAC),
      pal = paletteContinuous(set = "solarExtra"),
      limits = c(-2, 2),
      name = "Chromatin accessibility",
      returnMatrix = TRUE
    )
  )

  message("Linking cis-regulatory elements to genes...")
  df.p2g <- PeakToGene(peak.mat = groupMatATAC,
                       gene.mat = groupMatRNA,
                       genome = genome)

  df.p2g <- df.p2g %>%
    subset(distance > distance.cutoff) %>%
    subset(Correlation > cor.cutoff & FDR < fdr.cutoff)

  trajATAC <- trajATAC[df.p2g$peak,]
  trajRNA <- trajRNA[df.p2g$gene,]

  if (return.heatmap) {
    ht <- suppressMessages(
      CorrelationHeatmap(
        trajectory1 = trajATAC,
        trajectory2 = trajRNA,
        name1 = "Chromatin accessibility",
        name2 = "Gene expression",
        labelTop1 = labelTop1,
        labelTop2 = labelTop2,
        labelRows1 = FALSE,
        labelRows2 = FALSE
      )
    )

    res <- list("p2g" = df.p2g, "heatmap" = ht)

  } else{
    res <- list("p2g" = df.p2g)
  }


  return(res)

}
CostaLab/scMEGA documentation built on Sept. 25, 2024, 6:11 a.m.