Nothing
#' @title \code{testCoAccessibilityChromVar}
#'
#' @description
#' `r lifecycle::badge("deprecated")`
#' This function is deprecated due as we no longer recommend using a chromVAR
#' background set.
#' \code{testCoAccessibilityChromVar} takes an input set of tile
#' pairs and tests whether they are significantly different compared to a
#' background set found via ChromVAR
#'
#' @param SampleTileObj The SummarizedExperiment object output from
#' getSampleTileMatrix containing your sample-tile matrices
#' @param tile1 vector of indices or tile names (chrX:100-2000) for tile pairs
#' to test (first tile in each pair)
#' @param tile2 vector of indices or tile names (chrX:100-2000) for tile pairs
#' to test (second tile in each pair)
#' @param backNumber number of ChromVAR-matched background pairs. Default is
#' 1000.
#' @param returnBackGround Boolean, if TRUE return the background correlations
#' as well as foreground. Default is FALSE.
#' @param highMem Boolean to control memory usage. Default is FALSE. Only set
#' highMem to TRUE if you have plenty of memory and want to run this function
#' faster.s
#' @param numCores Optional, the number of cores to use with multiprocessing.
#' Default is 1.
#' @param verbose Set TRUE to display additional messages. Default is FALSE.
#' @param ZI boolean flag that enables zero-inflated (ZI) Spearman correlations
#' to be used. Default is TRUE. If FALSE, skip zero-inflation and calculate
#' the normal Spearman.
#'
#' @return foreGround A data.frame with Tile1, Tile2, Correlation, and p-value
#' for that correlation compared to the background
#'
#' @export
#' @keywords internal
testCoAccessibilityChromVar <- function(SampleTileObj,
tile1,
tile2,
numCores = 1,
ZI = TRUE,
backNumber = 1000,
returnBackGround = FALSE,
highMem = FALSE,
verbose = TRUE) {
lifecycle::deprecate_warn("1.0.1",
"testCoAccessibilityChromVar()",
"MOCHA::testCoAccessibility()")
. <- NULL
if (!requireNamespace("chromVAR", quietly = TRUE)) {
stop(
"Package 'chromVAR' is required for combineSampleTileMatrix. ",
"Please install 'chromVAR' to proceed."
)
}
if (length(tile1) != length(tile2)) {
stop("tile1 and tile2 must be the same length.")
}
fullObj <- combineSampleTileMatrix(SampleTileObj)
backPeaks <- chromVAR::getBackgroundPeaks(fullObj)
if (is.character(tile1) && is.character(tile2)) {
nTile1 <- match(tile1, rownames(fullObj))
nTile2 <- match(tile2, rownames(fullObj))
} else if (is.numeric(tile1) && is.numeric(tile2)) {
nTile1 <- tile1
nTile2 <- tile2
tile1 <- rownames(fullObj)[nTile1]
tile2 <- rownames(fullObj)[nTile2]
} else {
stop("tile1 and tile 2 must both be either numbers (indices) or strings")
}
if (backNumber > 2450) {
backNumber <- 2450
if (verbose) {
warning("backNumber too high, setting to maximum of 2450.")
}
} else if (backNumber <= 10) {
stop("backNumber too low (<=10). We recommend at least 100.")
}
cl <- parallel::makeCluster(numCores)
parallel::clusterExport(cl, varlist = c("nTile1", "nTile2", "backPeaks"), envir = environment())
if (verbose) {
message("Finding background peak pairs")
}
backgroundCombos <- pbapply::pblapply(seq_along(nTile1), function(x) {
tmpMat <- expand.grid(backPeaks[nTile1[x], ], backPeaks[nTile2[x], ])
tmpMat <- tmpMat[tmpMat[, 1] != tmpMat[, 2], ]
tmpMat[sample.int(dim(tmpMat)[1], backNumber), ]
}, cl = cl)
gc()
parallel::stopCluster(cl)
cl <- parallel::makeCluster(numCores)
accMat <- SummarizedExperiment::assays(fullObj)[[1]]
## Test original pairs of locations
combPairs <- data.frame(tile1, tile2)
subAccMat <- accMat[unique(c(nTile1, nTile2)), ]
if (verbose) {
message("Identifying foreground")
}
parallel::clusterExport(cl, varlist = c("subAccMat", "combPairs"), envir = environment())
foreGround <- runCoAccessibility(subAccMat, combPairs, ZI, verbose, cl)
if (any(is.na(foreGround$Correlation))) {
if (verbose) {
warning("All foreground correlations are undefined")
}
}
parallel::stopCluster(cl)
gc()
## Now we need to test the background set
if (verbose) {
message("Identifying background correlations.")
}
if (highMem) {
allBackCombos <- do.call("rbind", backgroundCombos)
uniqueBackCombos <- unique(allBackCombos)
combPairs <- data.frame(
Tile1 = rownames(accMat)[uniqueBackCombos[, 1]],
Tile2 = rownames(accMat)[uniqueBackCombos[, 2]]
)
subAccMat <- accMat[unique(c(uniqueBackCombos[, 1], uniqueBackCombos[, 2])), ]
cl <- parallel::makeCluster(numCores)
if (verbose) {
message(
paste("Generating Background correlations for all",
length(tile1), "pairs",
sep = " "
)
)
}
parallel::clusterExport(cl, varlist = c("subAccMat", "combPairs"), envir = environment())
uniquebackGround <- runCoAccessibility(subAccMat, combPairs, ZI, verbose, cl)
allBackCombosDF <- data.frame(
Tile1 = rownames(accMat)[allBackCombos[, 1]],
Tile2 = rownames(accMat)[allBackCombos[, 2]]
)
backGround <- dplyr::left_join(allBackCombosDF, uniquebackGround, by = c("Tile1" = "Tile1", "Tile2" = "Tile2")) %>%
dplyr::group_by(dplyr::row_number() %/% (backNumber + 1)) %>%
dplyr::group_map(~.x)
parallel::stopCluster(cl)
} else {
### low memory implementation
backGround <- list()
for (i in seq_along(backgroundCombos)) {
combPairs <- data.frame(
Tile1 = rownames(accMat)[backgroundCombos[[i]][, 1]],
Tile2 = rownames(accMat)[backgroundCombos[[i]][, 2]]
)
subAccMat <- accMat[unique(c(backgroundCombos[[i]][, 1], backgroundCombos[[i]][, 2])), ]
cl <- parallel::makeCluster(numCores)
if (verbose) {
message(paste("Generating Background correlations for", i, "of", length(tile1), "pairs", sep = " "))
}
parallel::clusterExport(cl, varlist = c("subAccMat", "combPairs"), envir = environment())
tmp_background <- runCoAccessibility(subAccMat, combPairs, ZI, verbose, cl)
parallel::stopCluster(cl)
gc()
backGround <- append(backGround, list(tmp_background))
}
}
cl <- parallel::makeCluster(numCores)
parallel::clusterExport(cl, varlist = c("foreGround", "backGround"), envir = environment())
pValues <- pbapply::pblapply(seq_along(backGround), function(x) {
cor1 <- foreGround$Correlation[x]
if (is.na(cor1)) {
NA
} else if (cor1 >= 0) {
sum(cor1 > backGround[[x]]$Correlation) / length(backGround[[x]]$Correlation)
} else if (cor1 < 0) {
sum(cor1 < backGround[[x]]$Correlation) / length(backGround[[x]]$Correlation)
}
}, cl = cl) %>% unlist()
parallel::stopCluster(cl)
gc()
foreGround$pValues <- pValues
if (returnBackGround) {
return(list("Foreground" = foreGround, "Background" = backGround))
} else {
return(foreGround)
}
}
#' @title \code{testCoAccessibilityRandom}
#'
#' @description
#' `r lifecycle::badge("deprecated")`
#' Renamed to `testCoAccessibility` to remove unnecessary specificity around
#' the background set, and for a shorter function name.
#'
#' \code{testCoAccessibilityRandom} takes an input set of tile
#' pairs and tests whether they are significantly different compared to
#' random, non-overlapping background set.
#'
#' @param SampleTileObj The SummarizedExperiment object output from
#' getSampleTileMatrix containing your sample-tile matrices
#' @param tile1 vector of indices or tile names (chrX:100-2000) for tile pairs
#' to test (first tile in each pair)
#' @param tile2 vector of indices or tile names (chrX:100-2000) for tile pairs
#' to test (second tile in each pair)
#' @param backNumber number of background pairs. Default is 1000.
#' @param calcPValue Boolean, if TRUE calculate p-values. Default is TRUE.
#' @param returnBackGround Boolean, if TRUE return the background correlations
#' as well as foreground. Default is FALSE.
#' @param numCores Optional, the number of cores to use with multiprocessing.
#' Default is 1.
#' @param verbose Set TRUE to display additional messages. Default is FALSE.
#' @param ZI boolean flag that enables zero-inflated (ZI) Spearman correlations
#' to be used. Default is TRUE. If FALSE, skip zero-inflation and calculate
#' the normal Spearman.
#'
#' @return foreGround A data.frame with Tile1, Tile2, Correlation, and p-value
#' for that correlation compared to the background
#'
#' @keywords internal
#' @export
#'
testCoAccessibilityRandom <- function(SampleTileObj,
tile1,
tile2,
numCores = 1,
ZI = TRUE,
backNumber = 1000,
calcPValue = TRUE,
returnBackGround = FALSE,
verbose = TRUE) {
. <- NULL
lifecycle::deprecate_warn("1.0.1",
"testCoAccessibilityRandom()",
"MOCHA::testCoAccessibility()")
testCoAccessibility(SampleTileObj,
tile1,
tile2,
numCores,
ZI,
backNumber,
calcPValue,
returnBackGround,
verbose)
}
#' @title \code{testCoAccessibility}
#'
#' @description
#' \code{testCoAccessibility} takes an input set of tile
#' pairs and tests whether they are significantly different compared to
#' random, non-overlapping background set.
#'
#' @param SampleTileObj The SummarizedExperiment object output from
#' getSampleTileMatrix containing your sample-tile matrices
#' @param tile1 vector of indices or tile names (chrX:100-2000) for tile pairs
#' to test (first tile in each pair)
#' @param tile2 vector of indices or tile names (chrX:100-2000) for tile pairs
#' to test (second tile in each pair)
#' @param backNumber number of background pairs. Default is 1000.
#' @param calcPValue Boolean, if TRUE calculate p-values. Default is TRUE.
#' @param returnBackGround Boolean, if TRUE return the background correlations
#' as well as foreground. Default is FALSE.
#' @param numCores Optional, the number of cores to use with multiprocessing.
#' Default is 1.
#' @param verbose Set TRUE to display additional messages. Default is FALSE.
#' @param ZI boolean flag that enables zero-inflated (ZI) Spearman correlations
#' to be used. Default is TRUE. If FALSE, skip zero-inflation and calculate
#' the normal Spearman.
#'
#' @return foreGround A data.frame with Tile1, Tile2, Correlation, and p-value
#' for that correlation compared to the background
#'
#' @export
#'
testCoAccessibility <- function(SampleTileObj,
tile1,
tile2,
numCores = 1,
ZI = TRUE,
backNumber = 1000,
calcPValue = TRUE,
returnBackGround = FALSE,
verbose = TRUE) {
. <- NULL
if (length(tile1) != length(tile2)) {
stop("tile1 and tile2 must be the same length.")
}
fullObj <- combineSampleTileMatrix(SampleTileObj)
if (is.character(tile1) && is.character(tile2)) {
nTile1 <- match(tile1, rownames(fullObj))
nTile2 <- match(tile2, rownames(fullObj))
} else if (is.numeric(tile1) && is.numeric(tile2)) {
nTile1 <- tile1
nTile2 <- tile2
tile1 <- rownames(fullObj)[nTile1]
tile2 <- rownames(fullObj)[nTile2]
} else {
stop("tile1 and tile 2 must both be either numbers (indices) or strings")
}
# Only run this if the backNumber is an actual number. If an actually background set is prepare, skip it.
if (is.null(dim(backNumber))) {
if (backNumber >= length(rownames(fullObj)) - length(unique(c(tile1, tile2)))) {
backNumber <- length(rownames(fullObj)) - length(unique(c(tile1, tile2)))
if (verbose) {
warning("backNumber too high. Reset to all background combinations.")
}
} else if (backNumber <= 10) {
stop("backNumber too low (<=10). We recommend 1000.")
}
}
accMat <- SummarizedExperiment::assays(fullObj)[[1]]
## Test original pairs of locations
combPairs <- data.frame(tile1, tile2)
if (verbose) {
message("Identifying foreground")
}
cl <- parallel::makeCluster(numCores)
foreGround <- runCoAccessibility(accMat, combPairs, ZI, verbose, cl)
parallel::stopCluster(cl)
gc()
if (any(is.na(foreGround$Correlation))) {
if (verbose) {
warning("All foreground correlations are undefined")
}
}
if (is.null(dim(backNumber))) {
if (verbose) {
message("Finding background peak pairs")
}
backGroundTiles <- rownames(accMat)[!rownames(accMat) %in% c(tile1, tile2)]
backgroundCombos <- data.frame(
Tile1 = sample(backGroundTiles, backNumber),
Tile2 = sample(backGroundTiles, backNumber)
)
backgroundCombos <- backgroundCombos[backgroundCombos[, 1] != backgroundCombos[, 2], ]
} else if (dim(backNumber)[2] > 1) {
if (verbose) {
message("Using user-defined background pairs")
}
backNumber <- as.data.frame(backNumber)
if (!all(c("Tile1", "Tile2") %in% colnames(backNumber))) {
stop("User-defined background pairs requires a column for Tile1 and Tile2")
} else if (!all(grepl(":", c(backNumber[, "Tile1"], backNumber[, "Tile2"])) & grepl("-", backNumber[, "Tile1"], backNumber[, "Tile2"]))) {
stop("User-defined background pairs must be in the form ChrX:100-2000")
} else if (!all(c(backNumber[, "Tile1"], backNumber[, "Tile2"]) %in% rownames(fullObj))) {
stop("User-defined background pairs includes regions not found within the sample tile accessibility matrix.")
}
backgroundCombos <- as.data.frame(backNumber)[, c("Tile1", "Tile2")]
if (sum(backgroundCombos[, "Tile1"] != backgroundCombos[, "Tile2"]) < 10) {
stop("User-defined background pairs are fewer than 10. Please provide a larger background.")
} else {
backgroundCombos <- backgroundCombos[backgroundCombos[, "Tile1"] != backgroundCombos[, "Tile2"], ]
}
} else {
stop("Incorrect backNumber provided. Please provider either a number, or a data.frame with columns entitled Tile1 and Tile2, describing pairs to test. The tile names should be in the format ChrX:100-2000.")
}
rm(combPairs)
## Now we need to test the background set
if (verbose) {
message("Identifying background correlations.")
}
cl <- parallel::makeCluster(numCores)
backGround <- runCoAccessibility(
accMat = accMat,
pairs = backgroundCombos, ZI = ZI, verbose = verbose,
numCores = cl
)
parallel::stopCluster(cl)
gc()
rm(accMat)
rm(backgroundCombos)
rm(fullObj)
if (calcPValue) {
if (verbose) {
message("Generating p-values.")
}
greatList <- unlist(pbapply::pblapply(foreGround$Correlation[which(foreGround$Correlation > 0)],
function(x) {
return(sum(x > backGround$Correlation))
},
cl = 1
)) / length(backGround$Correlation)
lesserList <- unlist(pbapply::pblapply(foreGround$Correlation[which(foreGround$Correlation < 0)],
function(x) {
return(sum(x < backGround$Correlation))
},
cl = 1
)) / length(backGround$Correlation)
foreGround$pValues <- rep(NA, length(foreGround$Correlation))
foreGround$pValues[which(foreGround$Correlation > 0)] <- 1 - greatList
foreGround$pValues[which(foreGround$Correlation < 0)] <- 1 - lesserList
}
if (returnBackGround) {
return(list("Foreground" = foreGround, "Background" = backGround))
} else {
return(foreGround)
}
}
#' @title \code{runCoAccessibility}
#'
#' @description \code{runCoAccessibility}
#'
#' @param SampleTileObj The SummarizedExperiment object output from
#' getSampleTileMatrix containing your sample-tile matrices
#' @param accMat accessibility matrix to use for correlations
#' @param pairs data.frame for pairs of tiles to test. Must be tileNames
#' (chrX:100-200).
#' @param numCores Optional, the number of cores to use with multiprocessing.
#' Default is 1.
#' @param verbose Set TRUE to display additional messages. Default is FALSE.
#' @param ZI boolean flag that enables zero-inflated (ZI) Spearman correlations
#' to be used. Default is TRUE. If FALSE, skip zero-inflation and calculate
#' the normal Spearman.
#'
#' @return zi_spear_mat_tmp a data.table of tile pairs with associated
#' correlations.
#'
#' @examples
#' runCoAccessibility(
#' assays(SampleTileObj)[[1]],
#' pairs = data.frame(
#' Tile1 = c("chrX:1000-1499", "chr21:1000-1499"),
#' Tile2 = c("chrX:1500-1999", "chr21:1500-1999")
#' )
#' )
#' @noRd
#'
runCoAccessibility <- function(accMat, pairs, ZI = TRUE, verbose = TRUE, numCores = 1) {
# Generate matrix for just Tile1, and just Tile2, then combined by column.
combinedMat <- cbind(accMat[pairs[, 1], ], accMat[pairs[, 2], ])
matList <- as.list(as.data.frame(t(combinedMat)))
# Remember how many samples you have, so you can split the list later
if (verbose) {
message("Data.frame wrangled for co-accessibility")
}
rm(combinedMat)
rm(accMat)
## split combinedMat into blocks that represent the number of cores we are using.
## Use split command for that.
## Then test each pair, but only within each block.
if (ZI) {
correlation_tmp <- unlist(pbapply::pblapply(
X = matList,
FUN = ZISpearman,
cl = numCores
))
} else {
correlation_tmp <- unlist(pbapply::pblapply(
X = matList,
FUN = Spearman,
cl = numCores
))
}
# Create zero-inflated correlation matrix from correlation values
zi_spear_mat_tmp <- data.table::data.table(
Correlation = correlation_tmp,
Tile1 = pairs[, 1],
Tile2 = pairs[, 2]
)
return(zi_spear_mat_tmp)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.