R/simulate_data.R

Defines functions makeExampleTinyEPICDataSet makeExampleSeSAMeDataSet

Documented in makeExampleSeSAMeDataSet makeExampleTinyEPICDataSet

#' Make a simulated SeSAMe data set
#'
#' Constructs a simulated \code{SigSet} dataset. For the given platform,
#' randomly simulate methylated and unmethylated allele signals. In-band signals
#' were simulated using a N(4000, 200) normal distribution. Out-of-band signals
#' were simulated using a N(400, 200) normal distribution. Control signals were
#' simulated using a N(400, 300) normal distribution.
#'
#' @param platform optional, HM450, EPIC or HM27
#' @return Object of class \code{SigSet}
#' @examples
#' sset <- makeExampleSeSAMeDataSet()
#'
#' @export
makeExampleSeSAMeDataSet <- function(platform = c('HM450','EPIC','HM27')) {

    platform <- match.arg(platform)
    dm.ordering <- sesameDataGet(paste0(platform, '.address'))$ordering
    sset <- SigSet(platform)
    probes <- rownames(
        dm.ordering[dm.ordering$DESIGN=='I' &
                        dm.ordering$COLOR_CHANNEL=='Grn',])
    mt <- matrix(pmax(rnorm(length(probes)*2, 4000, 200),0), ncol=2)
    rownames(mt) <- probes
    colnames(mt) <- c('M','U')
    IG(sset) <- mt
    mt <- matrix(pmax(rnorm(length(probes)*2, 400, 200),0), ncol=2)
    rownames(mt) <- probes
    colnames(mt) <- c('M','U')
    oobR(sset) <- mt

    probes <- rownames(dm.ordering[(
        dm.ordering$DESIGN=='I' & dm.ordering$COLOR_CHANNEL=='Red'),])
    mt <- matrix(pmax(rnorm(length(probes)*2, 4000, 200),0), ncol=2)
    rownames(mt) <- probes
    colnames(mt) <- c('M','U')
    IR(sset) <- mt
    mt <- matrix(pmax(rnorm(length(probes)*2, 400, 200),0), ncol=2)
    rownames(mt) <- probes
    colnames(mt) <- c('M','U')
    oobG(sset) <- mt

    probes <- rownames(dm.ordering[dm.ordering$DESIGN=='II',])
    mt <- matrix(pmax(rnorm(length(probes)*2, 4000, 200),0), ncol=2)
    rownames(mt) <- probes
    colnames(mt) <- c('M','U')
    II(sset) <- mt

    dm.controls <- sesameDataGet(paste0(platform, '.address'))$controls
    ctl <- as.data.frame(matrix(pmax(rnorm(
        2*nrow(dm.controls), 400, 300),0), ncol=2))
    rownames(ctl) <- make.names(dm.controls$Name,unique=TRUE)
    ctl <- cbind(ctl, dm.controls[, c("Color_Channel","Type")])
    colnames(ctl) <- c('G','R','col','type')
    ctl(sset) <- ctl

    sset <- detectionPoobEcdf(sset)
    sset
}


#' Make a tiny toy simulated EPIC data set
#'
#' Construct a tiny EPIC \code{SigSet} of only 6 probes. In-band signals
#' were simulated using a N(4000, 200) normal distribution. Out-of-band signals
#' were simulated using a N(400, 200) normal distribution. Control signals were
#' simulated using a N(400, 300) normal distribution.
#'
#' @return Object of class \code{SigSet}
#' @examples
#' sset <- makeExampleTinyEPICDataSet()
#'
#' @export
makeExampleTinyEPICDataSet <- function() {
    
    sset <- SigSet('EPIC')
    probes <- c(
        "cg18478105", "cg01763666", "cg25813447",
        "cg07779434", "cg13417420", "cg24133276")
    mt <- matrix(as.integer(
        pmax(rnorm(length(probes)*2, 4000, 200),0)), ncol=2)
    rownames(mt) <- probes
    colnames(mt) <- c('M','U')
    IG(sset) <- mt
    mt <- matrix(as.integer(
        pmax(rnorm(length(probes)*2, 400, 200),0)), ncol=2)
    rownames(mt) <- probes
    colnames(mt) <- c('M','U')
    oobR(sset) <- mt
    
    probes <- c(
        "cg16619049", "cg01782097", "cg12712429",
        "cg24373735", "cg18865112", "cg22226438")
    mt <- matrix(as.integer(
        pmax(rnorm(length(probes)*2, 4000, 200),0)), ncol=2)
    rownames(mt) <- probes
    colnames(mt) <- c('M','U')
    IR(sset) <- mt
    mt <- matrix(as.integer(
        pmax(rnorm(length(probes)*2, 400, 200),0)), ncol=2)
    rownames(mt) <- probes
    colnames(mt) <- c('M','U')
    oobG(sset) <- mt
    
    probes <- c(
        "cg07881041", "cg23229610", "cg03513874",
        "cg05451842", "cg14797042", "cg09838562")
    mt <- matrix(as.integer(
        pmax(rnorm(length(probes)*2, 4000, 200),0)), ncol=2)
    rownames(mt) <- probes
    colnames(mt) <- c('M','U')
    II(sset) <- mt
    
    dm.controls <- sesameDataGet('EPIC.address')$controls
    ctl <- as.data.frame(matrix(as.integer(
        pmax(rnorm(2*nrow(dm.controls), 400, 300),0)), ncol=2))
    rownames(ctl) <- make.names(dm.controls$Name,unique=TRUE)
    ctl <- cbind(ctl, dm.controls[, c("Color_Channel","Type")])
    colnames(ctl) <- c('G','R','col','type')
    ctl(sset) <- ctl

    sset <- detectionPoobEcdf(sset)
    sset
}

Try the sesame package in your browser

Any scripts or data that you put into this service are public.

sesame documentation built on Nov. 15, 2020, 2:08 a.m.