#' Perform a permutation test to check enrichment of a genomic
#' feature with DIRs detected by multiHiCcompare
#'
#' @param hicexp A Hicexp object which has been compared.
#' @param feature A GRanges object containing locations for
#' a genomic feature you would like to test for enrichment
#' in the differentially interacting regions (DIRs).
#' @param p.adj_cutoff The adjusted p-value cutoff for
#' declaring a region significant. See ?topDirs for
#' more information. Defaults to 10^-10
#' @param logfc_cutoff The log fold change cutoff for
#' a region to be declared significant. See ?topDirs
#' for more information. Defaults to 1.
#' @param num.perm The number of permutations to run.
#' Defaults to 1000.
#' @param pval_aggregate Method to aggregate region-specific p-values.
#' If a region differentially interacts with several other regions,
#' the p-values are aggregated using a 'max' method (Default, select maximum
#' p-value, most conservative), or the Fisher ('fisher'), Lancaster ('lancaster'),
#' or Sidak ('sidak') methods (see 'aggregate' package).
#' regions, it is assigned a single p-value aggregated from several. See ?topDirs
#' @return The permutation p-value
#' @export
#' @importFrom GenomicRanges makeGRangesFromDataFrame findOverlaps GRanges
#' @importFrom GenomeInfoDb seqlevelsStyle
#' @importFrom stats start end
#' @import GenomeInfoDbData
#' @examples
#' \dontrun{
#' data("hicexp_diff")
#' data("hg19_cyto")
#' perm_test(hicexp_diff, hg19_cyto)
#' }
perm_test <- function(hicexp, feature, p.adj_cutoff = 10^-10, logfc_cutoff = 1, num.perm = 1000, pval_aggregate = "max") {
# check that feature is a GRanges object
if (!is(feature, "GRanges")) {
stop("feature must be a GRanges object")
}
# convert seqnames of feature to include chr
GenomeInfoDb::seqlevelsStyle(feature) <- "UCSC"
# check that data has been compared
if (nrow(results(hicexp)) < 1) {
stop("Differences must be detected before making a manhattan plot.")
}
# make a GRanges object for the DIRs
sig.regions <- topDirs(hicexp, logfc_cutoff = logfc_cutoff, p.adj_cutoff = p.adj_cutoff, return_df = 'bed')
sig.regions <- makeGRangesFromDataFrame(sig.regions,
seqnames.field = 'chr',
start.field = 'start',
end.field = 'end',
keep.extra.columns = TRUE)
# find overlaps between feature and sig.regions
olaps <- findOverlaps(sig.regions, feature)
message("There are ", length(olaps), " overlaps between your genomic feature and the significant regions")
# If 0 overlaps return p-value of 1
if (length(olaps) < 1) {
return(1)
}
# set up permutation test
sample.size <- length(sig.regions) # Number of DIRs
n.olap <- length(olaps) # How many of them overlap with DE genes
num.olap <- vector(mode = "numeric", length = num.perm) # A vector to store the number of overlaps during permutations
# make background regions GRanges object
regions <- topDirs(hicexp, logfc_cutoff = 0, logcpm_cutoff = -10,
D_cutoff = 0, p.adj_cutoff = 1,
return_df = 'bed', pval_aggregate = "max")
# Order regions
regions <- regions[order(chr, start, end), ]
# Remove unnecessary columns
regions <- regions[, c('chr', 'start', 'end')]
# perform permutation test
for(i in 1:num.perm) {
# Select random regions from the genome of the same size as DIRs
sampled <- sample(1:nrow(regions), sample.size)
sampled.regions <- regions[sampled, ]
sampled.gr <- GRanges(sampled.regions$chr,
IRanges(start = sampled.regions$start,
end = sampled.regions$end))
# Find how many times they overlap with DE genes
tmp.olap <- findOverlaps(sampled.gr, feature)
# Keep the results
num.olap[i] <- length(tmp.olap)
}
# Calculate permutation p-value
p.value <- (sum(num.olap > n.olap) + 1) / (num.perm + 1)
return(p.value)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.