R/readEH.R

Defines functions readEH

Documented in readEH

#' A wrapper function for efficiently reading in user-specified random forest
#' models generated by \code{preciseTAD::TADrandomForest}, built on cell-line
#' specific CTCF, RAD21, SMC3, and ZNF143 ChIP-seq peak regions.
#'
#' @param chr Which chromosome was used as the holdout during the training
#' process. That is, all other chromosomes were combined when building the
#' random forest.
#' @param cl The cell line that was used (either "GM12878" or "K562")
#' @param gt The ground-truth TAD or chromatin loop boundaries used to construct
#'  the binary response vector (either "Arrowhead" or "Peakachu".
#' @param source The source of the files stored on ExperimentHub using
#' query(hub, "package_name").
#'
#' @return A trained model object from \code{caret}
#' @export
#'
#' @import ExperimentHub
#'
#' @examples
#' # Suppose we want to read in the model that was built using CHR1-CHR21,
#' # on GM12878, using Arrowhead defined TAD boundaries at 5kb resolution.
#'
#' #Initialize ExperimentHub
#' library(ExperimentHub)
#' hub <- ExperimentHub()
#' query(hub, "preciseTADhub")
#' myfiles <- query(hub, "preciseTADhub")
#'
#' CHR22_GM12878_5kb_Arrowhead <- readEH(chr = "CHR22",
#'                                       cl = "GM12878",
#'                                       gt = "Arrowhead",
#'                                       source = myfiles)
readEH <- function(chr, cl, gt, source){

    # Create data.frame with file names and IDs
    FileNames <- data.frame()
    x <- 3815
    for(i in paste0("CHR",c(1:8,10:22))){
        for(j in c("GM12878","K562")){
            for(k in c("5kb_Arrowhead.rds","10kb_Peakachu.rds")){
                FileNames <- rbind.data.frame(FileNames,
                                              data.frame(FileName = paste0(i,"_",j,"_",k), EHID = paste0("EH",x)))
                x <- x+1
            }
        }
    }

    # Define the ID
    EHID <- FileNames$EHID[which(FileNames$FileName == paste0(chr,
                                                              "_",
                                                              cl,
                                                              "_",
                                                              ifelse(gt=="Arrowhead", "5kb", "10kb"),
                                                              "_",
                                                              gt,
                                                              ".rds"))]

    ptm <- source[[EHID]]

    # Keep only the model object
    ptm <- ptm[[1]]

    return(ptm)
}
stilianoudakis/preciseTADhub documentation built on Dec. 31, 2020, 7:34 a.m.