#' Calculate and Save DMRcate Method Results for Specified Design Points
#'
#' @description Given a set of design points (treatment effect size to be added
#' and number of repetitions), simulate methylation data with DMRs and then
#' apply the \code{dmrcate} method to them. Write the results to a file.
#'
#' @param beta_mat A beta value matrix for methylation samples from a
#' 450k methylation array with CpG IDs as the row names and sample IDs as
#' the column names. An example is given in the \code{betaVals_mat} data set.
#'
#' @param CPGs_df An annotation table that indicates locations of CpGs.
#' This data frame has CpG IDs as the rows with matching chromosome and
#' location info in the columns. Specifically, the columns are: \code{ILMNID}
#' - the CpG ID; \code{chr} - the chromosome label; and \code{MAPINFO} -
#' the chromosome location. An example is given in the \code{cpgLocation_df}
#' data set.
#'
#' @param Aclusters_df A data frame of beta values and CpG information for
#' clusters of CpGs over a 450k methylation array. The rows correspond to the
#' CPGs. The columns have information on the cluster number, chromosome,
#' cluster start and end locations, and the beta values for each subject
#' grouped by some clinical indicator (e.g. case v. control). An example is
#' given in the \code{startEndCPG_df} data set. This data set can be
#' generated by the file \code{/inst/1_Aclust_data_import.R}
#'
#' @param parallel Should computing be completed over multiple computing cores?
#' Defaults to \code{TRUE}.
#'
#' @param numCores If \code{parallel}, how many cores should be used? Defaults
#' to two less than the number of available cores (as calculated by the
#' \code{\link[parallel]{detectCores}} function).
#'
#' @param deltas_num A vector of treatment sizes: non-negative real numbers to
#' add to the beta values within randomly-selected clusters for a single
#' class of subjects. This artifically creates differentially-methylated
#' regions (DMRs).
#'
#' @param seeds_int A vector of seed values passed to the
#' \code{\link[base]{Random}} function to enable reproducible results
#'
#' @param lambdas_num A vector of Gaussian kernel bandwidths for smoothed-
#' function estimation in the called \code{\link[DMRcate]{dmrcate}} function
#'
#' @param Cs_int A vector of scaling factors for bandwidth in the internal call
#' to the \code{\link[DMRcate]{dmrcate}} function
#'
#' @param resultsDir Where should the results be saved? Defaults to
#' \code{DMRcate_compare/}.
#'
#' @param verbose Should the function print progress messages? Defaults to
#' \code{TRUE} only if \code{parallel = FALSE}. See the internal
#' \code{\link{RunDMRcate}} function for more details about parallel
#' computing with DMRcate.
#'
#' @return Saves output files in the specified results directory.
#'
#' @details This function creates matrices of all combinations of design points
#' and all combinations of parameters. For each combination, this function
#' executes the internal \code{\link{RunDMRcate}} function and saves the
#' results as a compressed \code{.RDS} file.
#'
#' @importFrom doParallel registerDoParallel
#' @importFrom foreach foreach
#' @importFrom foreach %dopar%
#' @importFrom parallel detectCores
#' @importFrom parallel makeCluster
#' @importFrom parallel stopCluster
#'
#'
#' @export
#'
#' @examples
#' \dontrun{
#' data("betaVals_mat")
#' data("cpgLocation_df")
#' data("startEndCPG_df")
#'
#' WriteDMRcateResults(
#' beta_mat = betaVals_mat,
#' CPGs_df = cpgLocation_df,
#' Aclusters_df = startEndCPG_df
#' )
#' }
WriteDMRcateResults <- function(beta_mat,
CPGs_df,
Aclusters_df,
parallel = TRUE,
numCores = detectCores() - 2,
deltas_num = c(0, 0.025, 0.05, 0.10,
0.15, 0.20, 0.30, 0.40),
seeds_int = c(100, 210, 330, 450, 680),
lambdas_num = c(200, 250, 500, 750, 1000),
Cs_int = 1:5,
resultsDir = "DMRcate_compare/",
verbose = !parallel){
dir.create(paste0("./", resultsDir), showWarnings = FALSE)
### Data Simulation Outer Loop ###
designPts_mat <- expand.grid(deltas_num, seeds_int)
paramsGrid_mat <- expand.grid(lambdas_num, Cs_int)
for(i in 1:nrow(designPts_mat)){
### Generate Data Set ###
delta <- designPts_mat[i, 1]
seed <- designPts_mat[i, 2]
treatment_ls <- SimulateData(beta_mat = beta_mat,
Aclusters_df = Aclusters_df,
delta_num = delta,
seed_int = seed)
betas_df <- treatment_ls$simBetaVals_df
### Parallel Setup ###
if(!parallel){
numCores <- 1
}
clust <- makeCluster(numCores)
registerDoParallel(clust)
### Inner Parameter Grid Search ###
foreach(j = 1:nrow(paramsGrid_mat),
.packages = c("DMRcompare", "data.table")) %dopar% {
### Calculate Method Output ###
lambda <- paramsGrid_mat[j, 1]
C_int <- paramsGrid_mat[j, 2]
suppressMessages(
res_ls <- RunDMRcate(betaVals_mat = betas_df,
cpgLocation_df = CPGs_df,
lambda_int = lambda,
C_int = C_int)
)
### Define NULL Data ###
if(is.null(res_ls[[1]])){
res_ls[[1]] <- data.frame(
dmr.chr = NA,
dmr.start = NA_integer_,
dmr.end = NA_integer_,
seqnames = NA,
start = NA_integer_,
end = NA_integer_,
width = NA_integer_,
strand = NA,
no.cpgs = NA_integer_,
minfdr = NA_real_,
Stouffer = NA_real_,
maxbetafc = NA_real_,
meanbetafc = NA_real_,
overlapping.promoters = NA_character_,
dmr.pval = NA_real_,
dmr.n.cpgs = NA_integer_
)
}
### Save Results ###
file_char <- paste0(
resultsDir, "DMRcateResults_delta", delta, "_seed", seed,
"_lambda", lambda, "_C", C_int, ".RDS"
)
if(verbose){
message("Saving results to file ", file_char, "\n")
}
saveRDS(res_ls, file = file_char)
} # END for(j)
stopCluster(clust)
} # END for(i)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.