#' @title Infer cohort-prevalent hypermethylation
#' @description Determine if promoter hypermethylation is over-represented in the cohort.
#' @param data A data frame object generated by \emph{pvalueBetaReg} containing following columns:
#' Id, Hugo, Covariates included in \emph{matCV}, PDR_Tumor, Depth_Tumor, CpGs_Tumor, DHcR_Tumor,
#' DHcR_Tumor_Beta, Beta_Response, Beta_Precision and Pvalue.
#' An example can be loaded by using \emph{invisible(pvalByGenePt)}.
#' @param K An integer value defining the number of selected samples in random sampling.
#' @param times An integer value defining the iterative times in random sampling.
#' @param r Use the rth smallest p-value in Wilkinson combination.
#' @return A data frame summarizing cohort-prevalent hypermethylation inference.
#' \itemize{
#' \item Hugo: Hugo symbol
#' \item Rank: Rank of the promoter by p-value
#' \item Sample_Size: Number of samples evaluated
#' \item Pvalue: Lower quartile of Wilkinson combined p-values from random sampling
#' \item Padjust: Benjamini-Hochberg adjusted p-value
#' }
#' @examples
#' pvalueCombine(data = invisible(pvalByGenePt))
#'
pvalueCombine <- function(data, K=10, times=100, r=2) {
# pval combination function
pvalCombine <- function(x) {
# choose replicate with maximum number of covered CpGs from the same patient
pval <- x[, c('Id', 'Pvalue', 'CpGs_Tumor')]
tmp <- split(pval, f=pval$Id, drop=T)
pval <- sapply(tmp, function(x) x[which.max(x$CpGs_Tumor), 'Pvalue'])
size <- length(pval)
p.75 <- NA
# randomly select K (10) samples from the pool for 100 times
if (size >= K) {
pvalSub <- function(K, pval) {
pval.curr <- sample(pval, K)
tmp <- metap::wilkinsonp(pval.curr, r=r, alpha=0.05)
return(tmp$p)
}
set.seed(1)
pval.curr <- sapply(rep(K, times), pvalSub, pval=pval)
p.75 <- quantile(pval.curr, probs=0.25)
}
return(c(size, p.75))
}
pval.by.gene <- split(data, f=data$Hugo, drop=T)
pval <- as.data.frame(t(as.data.frame(lapply(pval.by.gene, pvalCombine))))
colnames(pval) <- c('sampleSize', 'pvalue')
pval$Hugo <- rownames(pval)
pval <- pval[pval$sampleSize >= K,]
pval <- pval[order(pval$pvalue),]
pval$Rank <- 1:nrow(pval)
pval <- pval[,c('Hugo', 'Rank', 'sampleSize', 'pvalue')]
colnames(pval) <- c('Hugo', 'Rank', 'Sample_Size', 'Pvalue')
pval$Padjust <- p.adjust(pval$Pvalue, method='BH')
rownames(pval) <- pval$Rank
return(pval)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.