Nothing
#' @title Perform Post Hoc Inference
#'
#' @description This function performs post-hoc inference on all provided clusters
#' using \code{\link{performTest}} results.
#'
#' @param resdiff An object of class \code{resdiff} obtained from the
#' function \code{\link{performTest}}.
#' @param clusters A vector corresponding to a clustering of resdiff rows.
#' @param alpha A number between 0 and 1 at which the computed post hoc bounds
#' will be valid.
#'
#' @return An object of class \code{resposthoc} containing a matrix with true
#' positive proportions for each interaction and a dataframe with the following
#' entries:
#' \item{region1}{The first bin of the interaction.}
#' \item{region2}{The second bin of the interaction.}
#' \item{clust}{The cluster the interaction belongs to.}
#' \item{TPRate}{The minimal post hoc true positive proportion of the cluster
#' the interaction belongs to.}
#' \item{p.value}{The p-value of the diffHic test.}
#' \item{p.adj}{The adjusted p-value of the diffHic test.}
#' \item{logFC}{The log2-fold-change of the interaction.}
#' \item{meanlogFC}{The mean of the log2-fold-change for the cluster the
#' interaction belongs to.}
#' \item{varlogFC}{The variance of the log2-fold-change for the cluster the
#' interaction belongs to.}
#' \item{propPoslogFC}{The proportion of interactions with positive
#' log2-fold-change in the cluster the interaction belongs to.}
#'
#' @author Élise Jorge \email{elise.jorge@inrae.fr}\cr
#' Sylvain Foissac \email{sylvain.foissac@inrae.fr}\cr
#' Pierre Neuvial \email{pierre.neuvial@math.univ-toulouse.fr}\cr
#' Nathalie Vialaneix \email{nathalie.vialaneix@inrae.fr}
#'
#' @export
#'
#' @importFrom dplyr %>%
#' @importFrom dplyr mutate
#' @importFrom dplyr group_by
#' @importFrom dplyr summarise
#' @importFrom dplyr select
#' @importFrom dplyr ungroup
#' @importFrom limma squeezeVar
#' @importFrom reshape2 colsplit
#' @importFrom stats cophenetic
#' @importFrom stats pt
#' @importFrom rlang .data
#' @importFrom viridis scale_fill_viridis
#' @importFrom utils write.csv
#' @importFrom stats var
#' @importFrom utils head
#' @importFrom rlang .data
#'
#' @examples
#' data("pighic")
#' resdiff <- performTest(pighic$data, pighic$conditions)
#' \donttest{res2D <- AggloClust2D(pighic$data)
#' if (!is.null(res2D)) { # in case Python or modules are not available
#' clusters <- res2D$clustering
#' alpha <- 0.05
#' resposthoc <- postHoc(resdiff, clusters, alpha)
#' resposthoc
#' summary(resposthoc)
#' plot(resposthoc)
#' }}
postHoc <- function(resdiff, clusters, alpha) {
uniqueBins <- unique(c(resdiff$region1, resdiff$region2))
p <- max(uniqueBins) - min(uniqueBins) + 1
matPostHoc <- matrix(0, p, p)
m <- p * (p + 1) / 2 # Total number of interactions to consider in theory.
diffPvalInd <- c(resdiff$p.value, 1)
matPval <- matrix(length(diffPvalInd), p, p) # Untested interactions
# have a p-value of one. Put at last position in p-value vector.
# matPval[i,j] is the position of (i,j) interaction in diffHic vector.
matPval[cbind(
resdiff$region1 - min(uniqueBins) + 1,
resdiff$region2 - min(uniqueBins) + 1
)] <- seq(1:length(resdiff$region1))
matPval[cbind(
resdiff$region2 - min(uniqueBins) + 1,
resdiff$region1 - min(uniqueBins) + 1
)] <- seq(1:length(resdiff$region1))
vecRatioVPClust <- c()
## Put clustering in list format
pixels <- seq(1, length(clusters))
clusters <- sapply(seq(1:length(unique(clusters))), function(ab) {
elem <- pixels[clusters == ab]
return(elem)
})
for (i in 1:length(clusters)) {
clusterElem <- clusters[[i]]
clustBins1 <- resdiff$region1[clusterElem] - min(uniqueBins) + 1
clustBins2 <- resdiff$region2[clusterElem] - min(uniqueBins) + 1
indPval <- matPval[cbind(clustBins1, clustBins2)]
thrSimes <- alpha * (1:m) / m # Choose Simes method
# Compute upper bound on number of false positives
FP <- curveMaxFP(diffPvalInd[indPval], thrSimes)
tot <- length(clusterElem)
# Ratio of True Positives
ratio <- 1 - (FP[length(FP)] / tot)
matPostHoc[cbind(clustBins1, clustBins2)] <- ratio
matPostHoc[cbind(clustBins2, clustBins1)] <- ratio
vecRatioVPClust[i] <- ratio
}
## Create output table with cluster information
lenClust <- lengths(clusters)
clust <- unlist(lapply(c(1:length(clusters)), function(i) {
return(rep(i, lenClust[i]))
}))
postHocRes <- unlist(lapply(c(1:length(clusters)), function(i) {
return(rep(vecRatioVPClust[i], lenClust[i]))
}))
dfRes <- data.frame("int" = unlist(clusters))
dfRes$clust <- clust
dfRes$TPRate <- postHocRes
resdiff <- do.call(cbind.data.frame, resdiff)
resdiff$X <- seq(1:length(resdiff$region1))
dfRes <- merge(dfRes, resdiff, by.x = "int", by.y = "X")
dfRes <- dfRes[, c(4, 5, 2, 3, 6, 7, 8)]
## LogFC
## Compute logFC mean, variance and sign.
## Add mean column.
dfRes <- dfRes %>%
group_by(clust) %>%
mutate(meanlogFC = mean(.data$logFC))
## Add variance column.
dfRes <- dfRes %>%
group_by(clust) %>%
mutate(varlogFC = var(.data$logFC))
## Add proportion of positive logFC column.
dfRes <- dfRes %>%
group_by(clust) %>%
mutate(propPoslogFC = ((sum(.data$logFC >= 0)) / (length(.data$logFC))))
dfRes <- dfRes[with(dfRes, order(region1, region2)), ]
outList <- list(matPostHoc, dfRes)
names(outList) <- c("mat", "dfres")
# Assign class to the list
class(outList) <- "resposthoc"
return(outList)
}
##### Methods for resposthoc object ####
#' @exportS3Method
#' @param x a \code{resposthoc} object to print
#' @param ... not used
#' @rdname postHoc
print.resposthoc <- function(x, ...) {
# print title
cat("Posthoc results.")
cat("\n\n")
## Print the dataframe
print(x$dfres, max = 10)
}
#' @exportS3Method
#' @param object a \code{resposthoc} object to summarize
#' @param ... not used
#' @rdname postHoc
summary.resposthoc <- function(object, ...) {
# print title
cat("Summary of post hoc results.")
cat("\n\n")
## Only look at TPRate, p.vlaue, p.adj, logFC and propPoslogFC.
## Print the dataframe
dfSum <- object$dfres %>% ungroup()
dfSum <- dfSum %>%
select(
.data$TPRate, .data$p.value, .data$p.adj, .data$logFC,
.data$propPoslogFC
)
summary(dfSum)
}
#' @exportS3Method
#' @param x a \code{resposthoc} object to plot
#' @param ... not used
#' @rdname postHoc
plot.resposthoc <- function(x, ...) {
matposthoc <- x$mat
maxRatio <- max(matposthoc)
allBreaks <- seq(0, maxRatio, by = round(maxRatio / 6, digits = 2))
labels <- round((exp(allBreaks) - 0.5) * 100)
# Plot post-hoc ratios
suppressMessages({
phRatios <- plotSim(matposthoc, log = F) +
scale_fill_viridis(
name = "TDP",
breaks = allBreaks,
labels = allBreaks,
direction = 1
)
})
return(phRatios)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.