R/methylInheritanceSimInternalMethods.R

Defines functions estBetaAlpha estBetaBeta getSyntheticChr getDiffCase getSim calculateNbDiffCase getDiffMeth simInheritance simEachGeneration saveData testIfAlreadyDone validateRunSimParameters validateRunSimIntegerParameters fixSeed validateRunSimDoubleParameters validateRunSimLogicalParameters validateRunSimOtherParameters createSampleID runOnEachSynCHR

Documented in calculateNbDiffCase createSampleID estBetaAlpha estBetaBeta fixSeed getDiffCase getDiffMeth getSim getSyntheticChr runOnEachSynCHR saveData simEachGeneration simInheritance testIfAlreadyDone validateRunSimDoubleParameters validateRunSimIntegerParameters validateRunSimLogicalParameters validateRunSimOtherParameters validateRunSimParameters

#' @title Estimate the alpha parameter of a Beta distribution
#'
#' @description Estimate the alpha parameter from the mean and the variance
#' of a Beta distribution.
#'
#' @param meanCtrl a \code{double}, the mean of the controls (CTRL) at a 
#' specific CpG site.
#' 
#' @param varCtrl a \code{double}, the variance of the controls 
#' (CTRL) at a specific CpG site.
#'
#' @param minVal a \code{double}, the minimum value accepted for the mean
#' value. If \code{meanCtrl} is smaller than 
#' \code{minVal}, then \code{minVal} is used in the calculation of the alpha
#' parameter. 
#' Default: \code{1e-06}.
#'
#' @return a \code{double}, the alpha parameter of a Beta distribution.
#'
#' @examples
#'
#' ## Estimate alpha parameters with mean = 0.5 and variance = 0.1
#' methInheritSim:::estBetaAlpha(meanCtrl = 0.5, varCtrl = 0.1)
#'
#' @author Pascal Belleau, Astrid Deschenes
#' @keywords internal
estBetaAlpha <- function(meanCtrl, varCtrl, minVal = 1e-06){
    
    mu <- max(meanCtrl, minVal)
    
    sigma2 <- max(varCtrl, ifelse(mu < 0.01, min(minVal, mu/1000), minVal))
    
    # mu must be smaller than 1 
    mu <- min(mu, 1 - min(0.001, sigma2 * 10^(-log10(minVal)/2)))
    
    return(max(0, -mu * (sigma2 + mu^2 - mu) / sigma2))
}


#' @title Estimate the beta parameter of a beta distribution
#'
#' @description Estimate the beta parameter from the mean and the variance
#' of a beta distribution.
#'
#' @param meanCtrl  a \code{double}, the mean of the controls (CTRL) at a 
#' specific CpG site.
#' 
#' @param varCtrl  a \code{double}, the variance of the controls (CTRL) at a 
#' specific CpG site.
#'
#' @param minVal a \code{double}, the minimum value accepted for the mean
#' value. If \code{meanCtrl} is smaller than 
#' \code{minVal}, then \code{minVal} is used in the calculation of the beta
#' paramter. 
#' Default: \code{1e-06}.
#'
#' @return a \code{double}, the beta parameter of a Beta distribution.
#'
#' @examples
#'
#' ## Estimate beta parameters with mean = 0.5, variance = 0.1
#' methInheritSim:::estBetaBeta(meanCtrl=0.5, varCtrl=0.1)
#'
#' @author Pascal Belleau, Astrid Deschenes
#' @keywords internal
estBetaBeta <- function(meanCtrl, varCtrl, minVal = 1e-06) {
    
    mu <- max(meanCtrl, minVal)
    
    # variance is at least minVal or mu / 1000
    sigma2 <- max(varCtrl, ifelse(mu < 0.01, min(minVal, mu/1000), minVal))
    
    # mu must be smaller than 1. 
    mu <- min(mu, 1 - min(0.001, sigma2 * 10^(-log10(minVal)/2)))
    
    return(max(0, (sigma2 + mu^2 - mu) * (mu -1) / sigma2))
}


#' @title Create a synthetic chromosome with the CTRL genome
#'
#' @description Create a synthetic chromosome with the sampling of a specified 
#' number of blocks and a specified number of consecutive CpG.
#'
#' @param methInfo is object of class \code{methylBase}, the CpG information
#' from controls (CTRL) that will be used to create the sythetic chromosome. 
#' The object can also contain information from cases but only the controls 
#' will be used.
#'
#' @param nbBlock a positive \code{integer}, the number of blocks used 
#' for sampling.
#'
#' @param nbCpG a \code{integer}, the number of consecutive CpG positions used
#' for sampling from \code{methInfo}.
#'
#' @return a \code{GRanges} object, the synthetic chromosome.
#'
#' @examples
#'
#' ## Load methyl information
#' data(samplesForChrSynthetic)
#' 
#' ## Ensure results are reproducible
#' set.seed(32)
#' 
#' ## Create synthetic chromosome
#' methInheritSim:::getSyntheticChr(methInfo = samplesForChrSynthetic, 
#' nbBlock = 10, nbCpG = 20)
#'
#' @author Pascal Belleau
#' @importFrom stats runif var
#' @importFrom methylKit getData
#' @importFrom GenomicRanges GRanges
#' @importFrom IRanges IRanges
#' @keywords internal
getSyntheticChr <- function(methInfo, nbBlock, nbCpG) {
    
    seqChr <- unique(methInfo$chr) # list of Chr
    
    # Sample nbBlock chromosomes, the same can be sample more than once
    chr <- seqChr[sample(seq_len(length(seqChr)), nbBlock, replace=TRUE)]
    
    # The position of the CTRL
    posCTRL <- which(methInfo@treatment == 0)
    
    # Init the data.frame
    total <- nbBlock * nbCpG
    res <- data.frame(chr = rep("S", total), start=rep(0, total), 
                        end = rep(0, total), meanCTRL = rep(0, total), 
                        varCTRL = rep(0, total), alphaCTRL = rep(0, total), 
                        betaCTRL = rep(0, total), chrOri = rep(0, total), 
                        startOri = rep(0, total))
    
    # First position
    l <- 1000
    
    for(i in seq_len(nbBlock)){ # For each block of CpG
        
        # Select a position in the block
        v <- round(runif(1, 0, 1) *
                    (length(methInfo$start[methInfo$chr == chr[i]]) - nbCpG))
        
        methBlock <- getData(methInfo[methInfo$chr == 
                                            chr[i]][(v + 1):(v + nbCpG)])
        matProp <- sapply(posCTRL, function(x, methCur) {
                unname(unlist(methCur[3*(x - 1) + 2]/methCur[3*(x - 1) + 1]))},
                methCur = methBlock[5:length(methBlock)])
        
        res$chrOri[((i - 1) * nbCpG + 1):(i * nbCpG)] <- rep(chr[i], nbCpG)
        res$startOri[((i - 1) * nbCpG + 1):(i * nbCpG)] <- 
                unname(unlist(methBlock[2]))
        res$start[((i - 1) * nbCpG + 1):(i * nbCpG)] <- 
                unname(unlist(methBlock[2])) - 
                unname(unlist(methBlock[2]))[1] + l
        res$end[((i - 1) * nbCpG + 1):(i * nbCpG)] <- 
                res$start[((i - 1) * nbCpG + 1):(i * nbCpG)]
        res$meanCTRL[((i - 1) * nbCpG + 1):(i * nbCpG)] <- rowMeans(matProp)
        res$varCTRL[((i - 1) * nbCpG + 1):(i * nbCpG)] <- 
            apply(matProp, 1, var)
        l <- res$start[i * nbCpG] + 10000
    }
    
    res$alphaCTRL <- apply(res[, c(4,5)], 1, FUN = function(x) {
                            estBetaAlpha(x[1], x[2])})
    res$betaCTRL  <- apply(res[, c(4,5)], 1, FUN = function(x) {
                            estBetaBeta(x[1], x[2])})
    
    ## Create returned value
    res <- GRanges(seqnames = res$chr, 
                    ranges = IRanges(start = res$start, end = res$end),
                    strand = rep("+", total), chrOri = res$chrOri,
                    startOri = res$startOri, meanCTRL = res$meanCTRL,
                    varCTRL = res$varCTRL)
    
    return(res)
}


#' @title Get the C/T proportion at a selected site,
#' differentially methylated or not, for all cases
#'
#' @description Simulate the proportion of C/T for each case at a selected 
#' site, differentially methylated or not.
#' 
#' @param ctrlMean a \code{double}, the mean of the CTRL at the site.
#'
#' @param ctrlVar a \code{double}, the variance of the CTRL at the site.
#' 
#' @param selectedAsDM a \code{integer}, \code{1} if the site is selected as 
#' differentially methylated, otherwise \code{0}.
#'
#' @param nbCase a \code{integer}, the number of cases.
#' 
#' @param sDiff a non-negative \code{double} 
#' included in [0,1], the proportion of C/T for a case differentially 
#' methylated that follows 
#' a beta distribution where the mean is shifted of \code{vDiff} 
#' from the CTRL distribution.
#'
#' @param nbDiffCase an \code{integer}, the number of cases differentially 
#' methylated.
#'
#' @return a \code{vector} containing 3 + \code{nbCase} entries of type 
#' \code{double}:
#' \itemize{
#' \item The mean proportion of C/T of the differentially methylated cases
#' \item The number of cases simulated using shifted distribution
#' \item The number of cases simulated using the control distribution
#' \item The proportion of C/T for each case
#' }
#'
#' @examples
#' 
#' ## Fix seed to obtain replicable results
#' set.seed(2010)
#' 
#' ## Get the proportion of C/T for each case at a specific site.
#' methInheritSim:::getDiffCase(ctrlMean = 0.9814562, ctrlVar = 
#' 0.0003607153, selectedAsDM = 0, nbCase=6, sDiff = 0.8, 
#' nbDiffCase = round(6 * 0.9))
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @importFrom stats rbeta
#' @keywords internal
getDiffCase <- function(ctrlMean, ctrlVar, selectedAsDM, nbCase, sDiff, 
                            nbDiffCase) {
    
    meanDiff <- 0
    
    if(selectedAsDM == 0) {
        ## Site selected as not differentially methylated
        val <- rbeta(nbCase, estBetaAlpha(ctrlMean, ctrlVar), 
                        estBetaBeta(ctrlMean, ctrlVar))
        meanDiff <- ctrlMean
        partitionDiff <- c(0, nbCase)
    } else {
        ## Site selected as differentially methylated
        meanDiff <- ifelse(ctrlMean < 0.5, min(1, ctrlMean + sDiff),
                            max(0, ctrlMean - sDiff))
        
        partitionDiff <- c(nbDiffCase, nbCase - nbDiffCase)
        
        val <- c(rbeta(partitionDiff[1], estBetaAlpha(meanDiff, ctrlVar), 
                    estBetaBeta(meanDiff, ctrlVar)),
                    rbeta(partitionDiff[2], estBetaAlpha(ctrlMean, ctrlVar), 
                    estBetaBeta(ctrlMean, ctrlVar)))
    }
    
    return(c(meanDiff, partitionDiff, val))
}


#' @title Simulate the proportion of C/T at each site of synthetic CHR for 
#' each control and case
#'
#' @description For each control and case, generate the proportion of C/T at 
#' each of the synthetic CHR.
#'
#' @param nbCtrl a positive \code{integer}, the number of controls.
#'
#' @param nbCase a positive \code{integer}, the number of cases.
#'
#' @param generation a positive \code{integer}, the number of generations.
#'
#' @param stateInfo a \code{GRanges} object, the synthetic chromosome 
#' generated by \code{getSyntheticChr} function. 
#'
#' @param stateDiff a \code{vector} of \code{integer} (\code{0} 
#' and \code{1}) with length corresponding the length of \code{stateInfo}.
#' The \code{vector} indicates, using a \code{1}, the positions where the CpG 
#' sites are differentially methylated.
#' 
#' @param stateInherite a \code{vector} of \code{integer} (\code{0} and 
#' \code{1}) with length corresponding the length of \code{stateInfo}. The 
#' \code{vector} indicates, using a \code{1}, the positions where the CpG 
#' values are inherited.
#'
#' @param diffValue a non-negative \code{double} between between [0,1], the 
#' proportion of C/T for a case differentially methylated following a 
#' beta distribution 
#' where the mean is shifted of \code{diffValue} from the CTRL distribution.
#'
#' @param propDiff a \code{double} superior to \code{0} and inferior or equal 
#' to \code{1}, the mean value for the proportion of samples that will have,
#' for a specific position, differentially methylated values. It can be 
#' interpreted as the penetrance.
#'
#' @param propDiffsd a non-negative \code{double}, the standard deviation 
#' associated to the \code{propDiff}.
#'
#' @param propInheritance a non-negative \code{double} between [0,1], the 
#' proportion of case that inherite differentially methylated sites.
#'
#' @param propHetero a non-negative \code{double} between [0,1], the 
#' reduction of \code{vDiff} for the second and following generations.
#'
#' @return a \code{GRangesList}, the object contains information about the 
#' simulation. The file have four metadata related to the real dataset:
#' \itemize{
#' \item meanDiff, the means of the shifted distribution.
#' \item meanCTRL, the means of the control distribution.
#' \item partitionCase, the number of cases simulated using the shifted 
#' distribution.
#' \item partitionCtrl, the number of cases simulated using the control 
#' distribution and a metadata for each cases and controls 
#' the proportion of C/T.
#' }
#' 
#' @examples
#'
#' ## Fix seed to have reproducible results
#' set.seed(312)
#' 
#' ## Load dataset
#' data("samplesForChrSynthetic")
#' 
#' ## Generate a stateInfo object using samples
#' stateInformation <- methInheritSim:::getSyntheticChr(methInfo = 
#'     samplesForChrSynthetic, nbBlock = 1, nbCpG = 3)
#' 
#' ## Generate a stateDiff and stateInherite objects with length corresponding 
#' ## to nbBlock * nbCpG from stateInformation
#' stateDiff  <- c(1, 0, 1)
#' stateInherite <- c(1, 0, 0)
#' 
#' ## Create a simulation using stateInformation, stateDiff and stateInherite
#' methInheritSim:::getSim(nbCtrl = 3, nbCase = 2, generation = 3, 
#'     stateInfo = stateInformation, stateDiff = stateDiff, 
#'     stateInherite = stateInherite, diffValue = 10, 
#'     propDiff = 0.8, propDiffsd = 0.2, propInheritance = 0.8, 
#'     propHetero = 0.1)
#'
#' @author Pascal Belleau, Astrid Deschenes
#' @importFrom msm rtnorm
#' @importFrom GenomicRanges GRangesList GRanges
#' @importFrom GenomeInfoDb seqnames
#' @importFrom IRanges ranges
#' @importFrom BiocGenerics strand
#' @importFrom S4Vectors mcols
#' @keywords internal
getSim <- function(nbCtrl, nbCase, generation, stateInfo, stateDiff, 
                stateInherite, diffValue, propDiff, propDiffsd, 
                propInheritance, propHetero) {

    ## Returned object
    res <- GRangesList()
    
    ## Calculate the number of differentially methylated cases
    diffCase <- calculateNbDiffCase(nbCase = nbCase, propDiff = propDiff, 
                                        propDiffSd = propDiffsd)
    
    ctrl <- t(apply(mcols(stateInfo)[3:4], 1, function(x, nb){
        rbeta(nb, estBetaAlpha(x[1], x[2]), estBetaBeta(x[1], x[2]))},
        nb = nbCtrl))
    
    case <- t(apply(cbind(matrix(unlist(mcols(stateInfo)[3:4]) , ncol = 2), 
                stateDiff), 1, function(x, nbCase, diffValue, diffCase) 
                {getDiffCase(x[1], x[2], x[3], nbCase, diffValue, 
                diffCase)}, nbCase = nbCase, diffValue = diffValue, 
                diffCase = diffCase))
    
    # TODO change meanCTRL.meanCTRL in meanCTRL
    #tmpCol <- matrix(mcols(stateInfo)[3]$meanCTRL, nc = 1)
    res[[1]] <- GRanges(seqnames = seqnames(stateInfo),
                    ranges = ranges(stateInfo), strand =  strand(stateInfo),
                    meanDiff = case[, 1], meanCTRL = mcols(stateInfo)[3],
                    partitionCase = case[, 2], partitionCtrl = case[, 3],
                    ctrl = ctrl, case = case[, 4:length(case[1,])])
    
    for(i in 2:generation)
    {
        rm(case, ctrl)
        
        ## Adapt proportion of DM to the generation
        inR <- propDiff * propInheritance^(i - 2) # One generation move
        
        diffCur <- diffValue * propHetero       # Change diffValue
        
        ## Calculate the number of differentially methylated cases
        diffCase <- calculateNbDiffCase(nbCase = nbCase, propDiff = inR, 
                                        propDiffSd = propDiffsd)
        
        # Note mcols(stateInfo)[3:4] is a matrix with foreach position a row 
        # meanCTRL, varianceCTRL
        ctrl <- t(apply(mcols(stateInfo)[3:4], 1, function(x, nb) {
            rbeta(nb, estBetaAlpha(x[1], x[2]), estBetaBeta(x[1], x[2]))},
            nb = nbCtrl))
        
        case <- t(apply(cbind(matrix(unlist(mcols(stateInfo)[3:4]) , ncol = 2), 
                stateInherite), 1, function(x, nbCase, diffCur, diffCase) 
                {getDiffCase(x[1], x[2], x[3], nbCase, diffCur, diffCase)},
                nbCase = nbCase, diffCur = diffCur, diffCase = diffCase))
        
        res[[i]] <- GRanges(seqnames = seqnames(stateInfo),
                        ranges = ranges(stateInfo), 
                        strand =  strand(stateInfo), meanDiff = case[, 1], 
                        meanCTRL = mcols(stateInfo)[3],
                        partitionCase = case[, 2], partitionCtrl = case[, 3],
                        ctrl = ctrl, case = case[, 4:length(case[1,])])
    }
    
    return(res)
}


#' @title Calculate the number of differentially methylated cases.
#'
#' @description Identify the number of differentially methylated cases.
#'
#' @param nbCase a positive \code{integer}, the number of cases.
#' 
#' @param propDiff a \code{double} superior to \code{0} and inferior or equal 
#' to \code{1}, the mean value for the proportion of samples that will have,
#' for a specific position, differentially methylated values. It can be 
#' interpreted as the penetrance.
#'
#' @param propDiffSd a non-negative \code{double}, the standard deviation 
#' associated to the \code{propDiff}
#' 
#' @return a \code{integer}, the number of differentially methylated cases.
#' 
#' @examples
#'
#' ## Fix seed to have reproducible results
#' set.seed(3122)
#' 
#' ## Obtained the number of differential cases
#' methInheritSim:::calculateNbDiffCase(nbCase = 8, 
#'     propDiff = 0.8, propDiffSd = 0.2)
#'
#' @author Pascal Belleau, Astrid Deschenes
#' @importFrom msm rtnorm
#' @keywords internal
calculateNbDiffCase <- function(nbCase, propDiff, propDiffSd) {
    
    if (propDiffSd < 0.0000001) {
        diffCase <- round(nbCase * propDiff)
    } else{
        diffCase <- round(nbCase * rtnorm(1, mean = propDiff, sd = propDiffSd, 
                                    lower = 0, upper = 1))
    }
    
    return(diffCase)
}

#' @title Identify differentially methylated sites and among those, the ones
#' that are inherited.
#'
#' @description Identify the site positions where the cases are differentially 
#' methylated and, among those, the one that are inherited.
#'
#' @param stateInfo a \code{GRanges} that contains the CpG (or methylated 
#' sites).
#' The \code{GRanges} have four metadata from the real dataset:
#' \itemize{
#' \item chrOri, the chromosome from the real dataset
#' \item startOri, the position of the site in the real dataset
#' \item meanCTRL, the mean of the control in the real dataset
#' \item varCTRL, the variance of the control in the real dataset
#' }
#'
#' @param rateDiff a positive \code{double} inferior to \code{1}, the mean of 
#' the chance that a site is differentially 
#' methylated.
#'
#' @param minRate a non-negative \code{double} inferior to \code{1}, the 
#' minimum rate for differentially methylated sites.
#'
#' @param propInherite a non-negative \code{double} inferior or equal 
#' to \code{1}, 
#' the proportion of differentially methylated regions that 
#' are inherated.
#'
#' @param c a positive \code{double}, a factor in the formula to compute the 
#' probabylity of site to be diffentially methylated in a differentially
#' methylated region. 
#' The probability formula of site in differentially methylated region is
#' \code{c} * exp(\code{b} * log(distance with the preceding sites))
#' Default: \code{1.0}.
#'
#' @param b a negative \code{double}, a factor in the formula to compute the 
#' probabylity of site to be diffentially methylated in a differentially
#' methylated region. 
#' The probability formula of site in differentially methylated region is
#' \code{c} * exp(\code{b} * log(distance with the preceding sites)). 
#' Default: \code{-1e-01}.
#'
#' @param endLength a positive \code{integer}, when the distance with the 
#' preceding sites in a differentially
#' methylated region is larger than \code{endLength}, the differentially
#' methylated region is finished. Default: \code{1000}.
#'
#' @return a \code{list} containing the 2 following elements:
#' \itemize{
#' \item \code{stateDiff} a \code{vector} of \code{integer} (\code{0} 
#' and \code{1}) with length corresponding the length of \code{stateInfo}.
#' The \code{vector}
#' indicates, using \code{1}, the positions where the CpG sites are
#' differentially methylated.
#' \item \code{stateInherite} a \code{vector} of \code{integer} (\code{0} and 
#' \code{1})
#' with length corresponding the length of \code{stateInfo}. The 
#' \code{vector}
#' indicates, using \code{1}, the positions where the CpG values are
#' inherited.
#' }
#'
#' @examples
#' 
#' ## Load dataset containing a list of objects used by 
#' ## methInheritSim internal functions
#' data(dataSimExample)
#' 
#' ## Identify differentially methylated sites and among those, the ones
#' ## that are inherited
#' methInheritSim:::getDiffMeth(stateInfo = 
#'     dataSimExample$stateInfo, rateDiff = 0.3, minRate = 0.3,
#'     propInherite = 0.3)
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @importFrom BiocGenerics start
#' @importFrom stats rbeta rexp runif rpois
#' @keywords internal
getDiffMeth <- function(stateInfo, rateDiff, minRate, propInherite, 
                            c = 1.0, b = -1e-01, endLength = 1000) {
    
    nbPos <- length(stateInfo)
    nbTry <- 1
    flag  <-  TRUE
    
    while (nbTry < 1000 & flag) {
        stateDiff <- rep(0, nbPos)
        stateInherite <- rep(0, nbPos)
        vExp <- rexp(nbPos, rateDiff)
        
        i<-1
        m<- max(1, round(vExp[i]))
        
        while(m <= nbPos){
            flagInherite <- ifelse(runif(1, 0, 1) < propInherite, TRUE, FALSE)
            stateDiff[m] <- 1
            if(flagInherite){
                stateInherite[m] <- 1
            }
            m <- m + 1
            while(m <= nbPos && 
                (start(stateInfo)[m] - start(stateInfo)[m - 1]) <= endLength) {
                cutOff <- c * 
                    exp(b*log(start(stateInfo)[m] - start(stateInfo)[m-1]))
                
                u <- runif(1,0,1)
                if(u < cutOff) {
                    stateDiff[m] <- 1
                    if(flagInherite) {
                        stateInherite[m] <- 1
                    }
                }
                m <- m + 1
            }
            i <- i + 1
            m <- m + round(vExp[i])
        }
        
        
        if (length(which(stateDiff == 1)) >= minRate * nbPos) {
            flag <- FALSE
        }
        
        nbTry <- nbTry + 1
    }
    
    if (flag) {
        stateDiff <- NULL
        stop(paste0("Enable to generate the differentially methyyleted ", 
                        "proportion fin\n"))
    }
    
    return(list(stateDiff = stateDiff, stateInherite = stateInherite))
}


#' @title Simulate a multigenerational methylation experiment with inheritance
#'
#' @description Simulate a multigenerational methylation case versus control 
#' experiment with inheritance relation using a real control dataset. 
#' 
#' The simulation can be parametrized to fit different models. The number of 
#' cases and controls, the proportion of the case affected 
#' by the treatment (penetrance), the effect of the treatment on the mean of 
#' the distribution, the proportion of sites inherited, the proportion of the 
#' differentially methylated sites from the precedent generation inherited, 
#' etc..
#' 
#' The function simulates a multigeneration dataset like a bisulfite 
#' sequencing experiment. The simulation includes the information about 
#' control and case for each generation.
#'
#' @param pathOut a string of \code{character} or \code{NULL}, the path 
#' where the 
#' files created by the function will be saved. When \code{NULL}, the files
#' are saved in the current directory.
#'
#' @param pref a string of \code{character} representing the parameters of
#' specific simulation the string is composed of those elements, separated 
#' by "_":
#' \itemize{ 
#' \item a \code{fileID}
#' \item the chromosome number, a number between 1 and \code{nbSynCHR}
#' \item the number of samples, a number in the \code{vNbSample} \code{vector}
#' \item the mean proportion of samples that has,
#' for a specific position, differentially methylated values, a 
#' number in the \code{vpDiff} \code{vector}
#' \item the proportion of 
#' C/T for a case differentially methylated that follows a shifted beta 
#' distribution, a
#' number in the \code{vDiff} \code{vector}
#' \item the 
#' proportion of cases that inherits differentially sites, a number in the
#' \code{vInheritance} \code{vector}
#' }
#'
#' @param k a positive \code{integer}, an ID for the current simulation.
#'
#' @param nbCtrl a positive \code{integer}, the number of controls.
#'
#' @param nbCase a positive \code{integer}, the number of cases.
#'
#' @param treatment a \code{vector} of integer denoting controls and cases. The
#' \code{vector} length must correspond to the sum of cases and controls.
#'
#' @param sample.id a matrix the name of each samples for each generation (row)
#' and each case and control (column).
#'
#' @param generation a positive \code{integer}, the number of generations
#' simulated.
#'
#' @param stateInfo a \code{GRanges} that contains the CpG (or 
#' methylated sites).
#' The \code{GRanges} have four metadata from the real dataset:
#' \itemize{
#' \item chrOri a \code{numeric}, the chromosome from the real dataset
#' \item startOri a \code{numeric}, the position of the site in the real dataset
#' \item meanCTRL a \code{numeric}, the mean of the control in the real dataset
#' \item varCTRL a \code{numeric}, the variance of the control in the real 
#' dataset.
#' }
#'
#' @param rateDiff a positive \code{double} inferior to \code{1}, the mean of 
#' the chance that a site is differentially methylated.
#'
#' @param minRate a non-negative \code{double} inferior to \code{1}, the 
#' minimum rate for differentially methylated sites.
#' Default: \code{0.01}.
#'
#' @param propInherite a non-negative \code{double} inferior or equal 
#' to \code{1}, 
#' the proportion of differentially methylated regions that 
#' are inherated.
#'
#' @param diffValue a non-negative \code{double} 
#' included in [0,1], the proportion of C/T for a case differentially 
#' methylated that follows 
#' a beta distribution where the mean is shifted by \code{vDiff} 
#' from the CTRL distribution.
#'
#' @param propDiff a \code{double} superior to 
#' \code{0} and inferior or equal 
#' to \code{1}, the mean value for the proportion of samples that will have,
#' for a specific position, differentially methylated values. It can be 
#' interpreted as the penetrance.
#'
#' @param propDiffsd a non-negative \code{double}, the 
#' standard deviation associated to the \code{vpDiff}. Note that 
#' \code{vpDiff} and \code{vpDiffsd} must be the same length.
#'
#' @param propInheritance a non-negative \code{double} 
#' included in [0,1], the proportion of cases 
#' that inherits differentially methylated sites.
#'
#' @param propHetero a non-negative \code{double} between [0,1], the 
#' reduction of \code{vDiff} for the second and following generations.
#'
#' @param minReads a positive \code{integer}, sites and regions having lower
#' coverage than this count are discarded. The parameter
#' corresponds to the \code{lo.count} parameter in 
#' the \code{methylKit} package.
#' 
#' @param maxPercReads a \code{double} between [0,100], the percentile of read
#' counts that is going to be used as upper cutoff. Sites and regions
#' having higher
#' coverage than \code{maxPercReads} are discarded. This parameter is used for 
#' both CpG sites and tiles analysis. The parameter
#' correspond to the \code{hi.perc} parameter in the \code{methylKit} package.
#' 
#' @param context a string of \code{character}, the short description of the 
#' methylation context, such as "CpG", "CpH", "CHH", etc..
#' 
#' @param assembly a string of \code{character}, the short description of the 
#' genome assembly, such as "mm9", "hg18", etc..
#' 
#' @param meanCov a positive \code{integer}, the mean of the coverage
#' at the simulated CpG sites.
#' 
#' @param diffRes a \code{list} with 2 entries:
#' \itemize{
#' \item \code{stateDiff} a \code{vector} of \code{integer} (\code{0} 
#' and \code{1}) with length corresponding the length of \code{stateInfo}.
#' The \code{vector}
#' indicates, using a \code{1}, the positions where the CpG sites are
#' differentially methylated.
#' \item \code{stateInherite} a \code{vector} of \code{integer} (\code{0} and 
#' \code{1})
#' with length corresponding the length of \code{stateInfo}. The 
#' \code{vector}
#' indicates, using a \code{1}, the positions where the CpG values are
#' inherited.
#' } when is \code{NULL} generate a new ones with \code{getDiffMeth}.
#'
#' @param saveGRanges a \code{logical}, when \code{true}, the package save two 
#' files type. The first generate for each simulation contains a \code{list}. 
#' The length of the \code{list} corresponds to the number of generation. 
#' The generation are stored in order (first entry = first generation, 
#' second entry = second generation, etc..). All samples related to one 
#' generations are contained in a \code{GRangesList}. 
#' The \code{GRangeaList} store a \code{list} of \code{GRanges}. Each 
#' \code{GRanges} stores the raw mehylation data of one sample.
#' The second file a numeric \code{vector} denoting controls and cases 
#' (a file is generates by entry in the \code{vector} parameters 
#' \code{vNbSample}).
#' 
#' @param saveMethylKit a \code{logical}, when \code{TRUE}, the package save 
#' a file contains a \code{list}. The length of the 
#' \code{list} corresponds to the number of generation. The generation are 
#' stored in order (first entry = first generation, 
#' second entry = second generation, etc..). All samples related to one 
#' generations are contained in a S4 \code{methylRawList} object. The 
#' \code{methylRawList} object contains two Slots:
#' 1. treatment: A numeric \code{vector} denoting controls and cases.
#' 2. .Data: A \code{list} of \code{methylRaw} objects. Each object stores the 
#' raw methylation data of one sample.
#' 
#' @param runAnalysis a \code{logical}, if \code{TRUE}, two files are saved :
#' \itemize{
#' \item 1. The first file is the methylObj... file formated 
#' with the \code{methylkit} package in a S4 \code{methylBase} 
#' object (with the \code{methylKit} 
#' functions: \code{filterByCoverage}, \code{normalizeCoverage} and 
#' \code{unite}).
#' \item 2. The second file contains a S4 \code{calculateDiffMeth} object 
#' generated with the \code{methylKit} functions \code{calculateDiffMeth} 
#' using the first file.
#' }
#' 
#' @return \code{0} indicating that the function has been successful.
#'
#' @examples
#'
#' ## Name of the directory that will contained the generated files
#' temp_dir <- "test_simInheritance"
#' 
#' ## Load dataset
#' data(dataSimExample)
#' 
#' ## Generate a stateDiff object with length corresponding to
#' ## nbBlock * nbCpG from stateInformation
#' stateDiff <- list()
#' stateDiff[["stateDiff"]] <- c(1, 0, 1)
#' stateDiff[["stateInherite"]] <- c(1, 0, 0)
#' 
#' ## Simulate multigenerational methylation experiment with inheritance
#' methInheritSim:::simInheritance(pathOut = temp_dir,
#'     pref = "S1_6_0.9_0.8_0.5", k = 1, nbCtrl = 6, nbCase = 6, 
#'     treatment = dataSimExample$treatment, 
#'     sample.id = dataSimExample$sample.id,
#'     generation = 3, stateInfo = dataSimExample$stateInfo[1:3],
#'     propDiff = 0.9, propDiffsd = 0.1, diffValue = 0.8, 
#'     propInheritance = 0.5, rateDiff = 0.3, minRate = 0.3,
#'     propInherite = 0.3, propHetero = 0.5, minReads = 10, maxPercReads = 99, 
#'     assembly="RNOR_5.0", context="Cpg", meanCov = 40, diffRes = stateDiff,
#'     saveGRanges = FALSE, saveMethylKit = FALSE, runAnalysis = FALSE)
#' 
#' ## Delete directory
#' if (dir.exists(temp_dir)) {
#'     unlink(temp_dir, recursive = TRUE, force = FALSE)
#' }
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @keywords internal
simInheritance <- function(pathOut, pref, k, nbCtrl, nbCase, treatment, 
                        sample.id, generation, stateInfo, propDiff, propDiffsd, 
                        diffValue, propInheritance, rateDiff, minRate, 
                        propInherite, propHetero, minReads, maxPercReads,
                        context, assembly, meanCov, diffRes, saveGRanges, 
                        saveMethylKit, runAnalysis) {
    
    # Test if the simulation was done before
    # if just a part of the simulation is done it do it again
    if (!is.null(pathOut) && !dir.exists(pathOut)) {
        dir.create(pathOut, showWarnings = TRUE)
    }
    
    # Test if the simulation has already been done
    alreadyDone <- testIfAlreadyDone(pathOut, pref, k, saveGRanges, 
                                        saveMethylKit, runAnalysis)
    
    if (!(alreadyDone)) {
        # Create extension used for all saved files
        extension <- paste0(pref, "_", k, ".rds")
        
        if (is.null(diffRes)) {
            ## Generate the stataeDiff and stateInherite data needed
            ## for the simulation
            diffRes <- getDiffMeth(stateInfo = stateInfo, rateDiff = rateDiff, 
                                minRate = minRate, propInherite = propInherite)
        }
        
        ## Simulate multigenerational methylation experiment
        ## Premiere generation seulement ?
        simV0.1 <- getSim(nbCtrl = nbCtrl, nbCase = nbCase, 
                        generation = generation, stateInfo = stateInfo, 
                        stateDiff = diffRes$stateDiff, 
                        stateInherite = diffRes$stateInherite,
                        diffValue = diffValue, 
                        propDiff = propDiff, propDiffsd = propDiffsd, 
                        propInheritance = propInheritance, 
                        propHetero = propHetero)
        
        saveRDS(diffRes, file = paste0(pathOut, "/stateDiff_", extension))
        saveRDS(simV0.1, file = paste0(pathOut, "/simData_", extension))
        
        ## Generate data formatted for methylKit
        ## TODO : decrire ce que ca fait
        simData <- simEachGeneration(simulation = simV0.1, 
                        nbCtrl = nbCtrl, nbCase = nbCase,
                        treatment = treatment, sample.id = sample.id,
                        generation = generation, stateInfo = stateInfo, 
                        minReads = minReads, maxPercReads = maxPercReads, 
                        context = context, assembly = assembly, 
                        meanCov = meanCov, saveGRanges = saveGRanges, 
                        saveMethylKit = saveMethylKit, 
                        runAnalysis = runAnalysis)
        
        ## Save results
        saveData(pathOut = pathOut, extension = extension, 
                    gRanges = simData$myGR, methylData = simData$myObj, 
                    methUnit = simData$meth, diffData = simData$myDiff,
                    saveGRanges = saveGRanges, saveMethylKit = saveMethylKit,
                    runAnalysis = runAnalysis)
    }
    
    return(0)
}

#' @title Simulate a multigeneration methylation experiment with inheritance
#'
#' @description Simulate a multigeneration methylation case versus control 
#' experiment with inheritance relation using a real control dataset. 
#' 
#' The simulation can be parametrized to fit different models. The number of 
#' cases and controls, the proportion of the case affected 
#' by the treatment (penetrance), the effect of the treatment on the mean of 
#' the distribution, the proportion of sites inherited, the proportion of the 
#' differentially methylated sites from the precedent generation inherited, 
#' etc..
#' 
#' The function simulates a multigeneration dataset like a bisulfite 
#' sequencing experiment. The simulation includes the information about 
#' control and case for each generation.
#'
#' @param nbCtrl a positive \code{integer}, the number of controls.
#'
#' @param nbCase a positive \code{integer}, the number of cases.
#'
#' @param treatment a numeric vector denoting controls and cases
#'
#' @param sample.id a matrix the name of each samples for each generation (row)
#' and each case and control (column).
#'
#' @param generation a positive \code{integer}, the number of generations
#' simulated.
#'
#' @param stateInfo a \code{GRanges} that contains the CpG (or 
#' methylated sites).
#' The \code{GRanges} have four metadata from the real dataset:
#' \itemize{
#' \item chrOri a \code{numeric}, the chromosome from the real dataset
#' \item startOri a \code{numeric}, the position of the site in the real dataset
#' \item meanCTRL a \code{numeric}, the mean of the control in the real dataset
#' \item varCTRL a \code{numeric}, the variance of the control in the real 
#' dataset.
#' }
#'
#' @param minReads a positive \code{integer}, sites and regions having lower
#' coverage than this count are discarded. The parameter
#' corresponds to the \code{lo.count} parameter in 
#' the \code{methylKit} package.
#' 
#' @param maxPercReads a \code{double} between [0,100], the percentile of read
#' counts that is going to be used as upper cutoff. Sites and regions
#' having higher
#' coverage than \code{maxPercReads} are discarded. This parameter is used for 
#' both CpG sites and tiles analysis. The parameter
#' correspond to the \code{hi.perc} parameter in the \code{methylKit} package.
#' 
#' @param context a string of \code{character}, the short description of the 
#' methylation context, such as "CpG", "CpH", "CHH", etc..
#' 
#' @param assembly a string of \code{character}, the short description of the 
#' genome assembly, such as "mm9", "hg18", etc..
#' 
#' @param meanCov a positive \code{integer}, the mean of the coverage
#' at the simulated CpG sites.
#' 
#' @param saveGRanges a \code{logical}, when \code{true}, the package save two 
#' files type. The first generate for each simulation contains a \code{list}. 
#' The length of the \code{list} corresponds to the number of generation. 
#' The generation are stored in order (first entry = first generation, 
#' second entry = second generation, etc..). All samples related to one 
#' generations are contained in a \code{GRangesList}. 
#' The \code{GRangeaList} store a \code{list} of \code{GRanges}. Each 
#' \code{GRanges} stores the raw mehylation data of one sample.
#' The second file a numeric \code{vector} denoting controls and cases 
#' (a file is generates by entry in the \code{vector} parameters 
#' \code{vNbSample}).
#' 
#' @param saveMethylKit a \code{logical}, when \code{TRUE}, the package save 
#' a file contains a \code{list}. The length of the 
#' \code{list} corresponds to the number of generation. The generation are 
#' stored in order (first entry = first generation, 
#' second entry = second generation, etc..). All samples related to one 
#' generations are contained in a S4 \code{methylRawList} object. The 
#' \code{methylRawList} object contains two Slots:
#' 1. treatment: A numeric \code{vector} denoting controls and cases.
#' 2. .Data: A \code{list} of \code{methylRaw} objects. Each object stores the 
#' raw methylation data of one sample.
#' 
#' @param runAnalysis a \code{logical}, if \code{TRUE}, two files are saved :
#' \itemize{
#' \item 1. The first file is the methylObj... file formated 
#' with the \code{methylkit} package in a S4 \code{methylBase} 
#' object (with the \code{methylKit} 
#' functions: \code{filterByCoverage}, \code{normalizeCoverage} and 
#' \code{unite}).
#' \item 2. The second file contains a S4 \code{calculateDiffMeth} object 
#' generated with the \code{methylKit} functions \code{calculateDiffMeth} 
#' using the first file.
#' }
#' 
#' @return \code{0} indicating that the function has been successful.
#'
#' @examples
#'
#' ## Load dataset
#' data("samplesForChrSynthetic")
#' data("dataSimExample")
#' 
#' ## Generate a stateInfo object using samples
#' stateInformation <- methInheritSim:::getSyntheticChr(methInfo = 
#'     samplesForChrSynthetic, nbBlock = 1, nbCpG = 3)
#' 
#' ## Generate a stateDiff and stateInherite objects with length corresponding 
#' ## to nbBlock * nbCpG from stateInformation
#' stateDiff  <- c(1, 0, 1)
#' stateInherite <- c(1, 0, 0)
#' 
#' ## Create simulation
#' sim <- methInheritSim:::getSim(nbCtrl = 3, nbCase = 2, 
#'     generation = 3, stateInfo = stateInformation, stateDiff = stateDiff, 
#'     stateInherite = stateInherite, diffValue = 10, 
#'     propDiff = 0.8, propDiffsd = 0.2, propInheritance = 0.8, 
#'     propHetero = 0.1)
#' 
#' ## TODO
#' methInheritSim:::simEachGeneration(simulation = sim, 
#' nbCtrl = 3, nbCase = 2, treatment = c(0,0,0,1,1), 
#' sample.id = dataSimExample$sample.id,
#' generation = 3, stateInfo = stateInformation, minReads = 10, 
#' maxPercReads = 99, context = "Cpg", assembly = "RNOR_5.0", meanCov = 80, 
#' saveGRanges = FALSE, saveMethylKit = FALSE, runAnalysis = FALSE)
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @importFrom methylKit read filterByCoverage normalizeCoverage unite 
#' calculateDiffMeth get.methylDiff getData tileMethylCounts methRead 
#' getSampleID getAssembly getContext getTreatment
#' @importFrom GenomicRanges GRanges
#' @importFrom IRanges IRanges
#' @importFrom stats rpois
#' @importFrom methods new
#' @importFrom BiocGenerics start end strand
#' @keywords internal
simEachGeneration <- function(simulation, nbCtrl, nbCase, treatment, 
                        sample.id, generation, stateInfo, minReads, 
                        maxPercReads, context, assembly, meanCov, saveGRanges, 
                        saveMethylKit, runAnalysis) {
    ## Returned objects
    myobj <- list()
    myGR <- list()
    meth <- list()
    myDiff <- list()
    
    for(i in seq_len(generation)) {
        outList <- list()
        outGR <- GRangesList() 
        for(j in seq_len((nbCtrl + nbCase))) {
            coverage <- rpois(length(stateInfo), meanCov) + 1
            
            testM <- GRanges(seqnames = seqnames(stateInfo), 
                        ranges = ranges(stateInfo), strand = strand(stateInfo), 
                        coverage = coverage, numCs = round(coverage * 
                        unlist(mcols(simulation[[i]])[4+j])))
            
            if(saveMethylKit || runAnalysis || saveGRanges) {
                outList[[j]] <- new("methylRaw", 
                    data.frame(chr = seqnames(testM), start = start(testM), 
                    end = end(testM), strand = strand(testM), 
                    coverage = testM$coverage, numCs = testM$numCs, 
                    numTs = testM$coverage - testM$numCs), 
                    sample.id = sample.id[[i]][[j]], assembly = assembly, 
                    context = context, resolution = 'base')
            }
            if (saveGRanges) {
                outGR[[j]] <- testM
            }
        }
        
        if (saveMethylKit || runAnalysis) {
            myobj[[i]] <- new("methylRawList", outList, treatment = treatment)
        }
        
        if (saveGRanges) {
            myGR[[i]] <- outGR
        }
        
        if (runAnalysis) {
            filtered.myobj <- filterByCoverage(myobj[[i]],
                                    lo.count = minReads, lo.perc = NULL, 
                                    hi.count = NULL, hi.perc = maxPercReads)
            filtered.myobj <- normalizeCoverage(filtered.myobj, "median")
            meth[[i]]   <- suppressWarnings(unite(filtered.myobj, 
                                                    destrand = FALSE))
            
            if (nrow(meth[[i]]) > 0) {
                myDiff[[i]] <- calculateDiffMeth(meth[[i]])
            } else {
                ## calculateDiffMeth throws an error when meth is empty
                ## Create an empty methylDiff
                myDiff[[i]] <- new("methylDiff", data.frame(chr = character(), 
                                start = integer(), end = integer(), 
                                strand = strand(), pvalue = double(), 
                                qvalue = double(), meth.diff = double()),
                                sample.ids = getSampleID(meth[[i]]), 
                                destranded = FALSE,
                                assembly = getAssembly(meth[[i]]), 
                                context = getContext(meth[[i]]), 
                                treatment = getTreatment(meth[[i]]), 
                                resolution = 'base')
            }
        }
    }
    
    return(list("myObj" = myobj, "myGR" = myGR, "meth" = meth, 
                    "myDiff" = myDiff))
}


#' @title Save data created during the simulation
#'
#' @description Save data created during the simulation.
#' 
#' @param pathOut a string of \code{character}, the path 
#' where the files are saved.
#' 
#' @param extension a string of \code{character} representing the extension
#' that will be given to the saved files.
#' 
#' @param gRanges a \code{list} of \code{methylRawList}  TODO
#' 
#' @param methylData a \code{list} of \code{methylRawList}, the results
#' of the normalization of the coverage.
#' 
#' @param methUnit a \code{list} of \code{methylBase}, the results of the
#' base filtering for all samples. 
#' 
#' @param diffData a \code{list} of \code{methylDiff}, the results of the
#' calculation of differential methylation statistics.
#' 
#' @param saveGRanges a \code{logical}, when \code{true}, files containing 
#' \code{GRangeaList} are saved.
#' 
#' @param saveMethylKit a \code{logical}, when \code{TRUE}, files 
#' \code{methylRawList} object are saved.
#' 
#' @param runAnalysis a \code{logical}, when \code{TRUE}, two files related
#' to the analysis are saved.
#' 
#' @return \code{0} indicating that the function has been successful.
#' 
#' @examples
#' 
#' ## TODO
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @keywords internal
saveData <- function(pathOut, extension, gRanges, methylData, methUnit, 
                diffData, saveGRanges, saveMethylKit, runAnalysis) {
    
    if (saveGRanges) {
        saveRDS(gRanges, file = paste0(pathOut, "/methylGR_", extension))
    }
    
    if (saveMethylKit) {
        saveRDS(methylData, file = paste0(pathOut, "/methylObj_", extension))
    }
    
    if (runAnalysis) {
        saveRDS(methUnit, file = paste0(pathOut, "/meth_", extension))
        saveRDS(diffData, file = paste0(pathOut, "/methDiff_", extension))
    }
    
    return(0)
}

#' @title Test if a specific simulation has already be done.
#'
#' @description Test if a specific simulation has already be done.
#' 
#' @param pathOut a string of \code{character}, the path 
#' where the files are saved.
#' 
#' @param preference a string of \code{character} representing the 
#' parameters of specific simulation.
#' 
#' @param id a positive \code{integer}, a ID for the current simulation.
#' 
#' @param saveGRanges a \code{logical}, when \code{true}, files containing 
#' \code{GRangeaList} are saved.
#' 
#' @param saveMethylKit a \code{logical}, when \code{TRUE}, files 
#' \code{methylRawList} object are saved.
#' 
#' @param runAnalysis a \code{logical}, when \code{TRUE}, two files related
#' to the analysis are saved.
#' 
#' @return \code{logical} indicating if the simulation has already done.
#' 
#' @examples
#' 
#' ## Return TRUE when the specified simulation has already be done;
#' ## otherwise, return FALSE.
#' methInheritSim:::testIfAlreadyDone(pathOut = ".", 
#' preference = "S1_6_0.9_0.8_0.5", id = 33, 
#' saveGRanges = FALSE, saveMethylKit = FALSE, runAnalysis = FALSE)
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @keywords internal
testIfAlreadyDone <- function(pathOut, preference, id, saveGRanges, 
                                saveMethylKit, runAnalysis) {
    
    extension <- paste0(preference, "_", id, ".rds")
    
    alreadyDone <- TRUE
    
    if(! (file.exists(paste0(pathOut, "/stateDiff_", extension))) 
        || ! (file.exists(paste0(pathOut, "/simData_", extension)))) {
        alreadyDone <- FALSE
    }
    if(saveGRanges && 
        ! (file.exists(paste0(pathOut, "/methylGR_", extension)))) {
        alreadyDone <- FALSE
    }
    if(saveMethylKit &&
        ! (file.exists(paste0(pathOut, "/methylObj_", extension)))) {
        alreadyDone <- FALSE
    }
    if(runAnalysis &&
        (! (file.exists(paste0(pathOut, "/meth_", extension)))
            || ! (file.exists(paste0(pathOut, "/methDiff_", extension))))) {
        alreadyDone <- FALSE
    }
    
    return(alreadyDone)
}
    

#' @title Parameters validation for the \code{\link{runSim}} function. 
#'
#' @description Validation of all parameters needed by the public
#' \code{\link{runSim}} function. 
#' 
#' @param vpDiff a \code{double} superior to \code{0} and inferior or equal 
#' to \code{1}, the mean value for the proportion of samples that will have,
#' for a specific position, differentially methylated values. It can be 
#' interpreted as the penetrance.
#' 
#' @param vpDiffsd a non-negative \code{double}, the standard deviation 
#' associated to the \code{propDiff}.
#'
#' @param vDiff a positive \code{double} between [0,1], the proportion of 
#' C/T for a case differentially methylated follow a beta distribution 
#' where the mean is shifted of \code{vDiff} from the CTRL distribution
#'
#' @param vInheritance a positive \code{double} between [0,1], the 
#' proportion of cases that inherited differentially sites.
#' 
#' @param propInherite a non-negative \code{double} inferior or equal to 
#' \code{1}, the proportion of differentially methylated site
#' are inherated
#'
#' @param rateDiff a positive \code{double} inferior to \code{1}, the mean of 
#' the chance that a site is differentially methylated.
#'
#' @param minRate a non-negative \code{double} inferior to \code{1}, the 
#' minimum rate of differentially methylated sites.
#'
#' @param propHetero a positive \code{double} between [0,1], the 
#' reduction of vDiff for the second and following generations.
#' 
#' @param maxPercReads a \code{double} between [0,100], the percentile of read
#' counts that is going to be used as upper cutoff. Bases ore regions
#' having higher
#' coverage than this percentile are discarded. Parameter used for both CpG
#' sites and tiles analysis. The parameter
#' correspond to the \code{hi.perc} parameter in the  \code{methylKit} package.
#'
#' @param nbSynCHR a positive \code{integer}, the number of distinct 
#' synthetic chromosomes that will be generated.
#' 
#' @param nbSimulation a positive \code{integer}, the number of simulations 
#' for each parameter (\code{vNbSample}, \code{vpDiff}, \code{vDiff} and
#' \code{vInheritance}).
#'
#' @param nbBlock a positive \code{integer}, the number of blocks used 
#' for sampling.
#'
#' @param nbCpG a positive \code{integer}, the number of consecutive CpG 
#' positions used for sampling from \code{methInfo}.
#'
#' @param vNbSample a \code{vector} of positive \code{integer}, the number of 
#' methData (CTRL) and cases in the the simulation dataset. In 
#' the simulated dataset, the number of CTRL equals the number of Case. 
#' The number of CTRL do not need to be equal to the number of Case in
#' the real dataset.
#'
#' @param nbGeneration a positive \code{integer}, the number of generations.
#' 
#' @param minReads a positive \code{integer} Bases and regions having lower
#' coverage than this count are discarded. The parameter
#' correspond to the \code{lo.count} parameter in the \code{methylKit} package.
#' 
#' @param meanCov a positive \code{integer} represent the mean of the coverage
#' at the CpG site Default: \code{80}.
#' 
#' @param nbCores a positive \code{integer}, the number of cores to use when
#' creating the simulated datasets. Default: \code{1} and always 
#' \code{1} for Windows.
#' 
#' @param vSeed a \code{integer}, a seed used when reproducible results are
#' needed. When a value inferior or equal to zero is given, a random integer
#' is used. Default: \code{-1}.
#' 
#' @param keepDiff \code{logical} if true, the differentially methyled sites
#' will be the same for each parameter (\code{vpDiff}, 
#' \code{vDiff} and \code{vInheritance}). Default: \code{FALSE}.
#' 
#' @param saveGRanges a \code{logical}, when \code{true}, the package save two 
#' files type. The first generate for each simulation contains a \code{list}. 
#' The length of the \code{list} corresponds to the number of generation. 
#' The generation are stored in order (first entry = first generation, 
#' second entry = second generation, etc..). All samples related to one 
#' generations are contained in a \code{GRangesList}. 
#' The \code{GRangeaList} store a \code{list} of \code{GRanges}. Each 
#' \code{GRanges} stores the raw mehylation data of one sample.
#' The second file a numeric \code{vector} denoting controls and cases 
#' (a file is generates by entry in the \code{vector} parameters 
#' \code{vNbSample}).
#' 
#' @param saveMethylKit a \code{logical}, when \code{TRUE}, for each 
#' simulations save a file contains a \code{list}. The length of the 
#' \code{list} corresponds to the number of generation. The generation are 
#' stored in order (first entry = first generation, 
#' second entry = second generation, etc..). All samples related to one 
#' generations are contained in a S4 \code{methylRawList} object. The 
#' \code{methylRawList} object contains two Slots:
#' 1. treatment: A numeric \code{vector} denoting controls and cases.
#' 2. .Data: A \code{list} of \code{methylRaw} objects. Each object stores the 
#' raw methylation data of one sample.
#' 
#' @param runAnalysis a \code{logical}, if \code{TRUE}, two files are saved 
#' for each simulation:
#' \itemize{
#' \item 1. The first file is the methylObj... file formated with 
#' the \code{methylkit} package in a S4 \code{methylBase} object 
#' (with the \code{methylKit} functions: \code{filterByCoverage}, 
#' \code{normalizeCoverage} and \code{unite}).
#' \item 2. The second file contains a S4 \code{calculateDiffMeth} object 
#' generated with the \code{methylKit} functions \code{calculateDiffMeth} on 
#' the first file.
#' }
#' 
#' @param outputDir a string of \code{character} or \code{NULL}, the path 
#' where the 
#' files created by the function will be saved. When \code{NULL}, the files
#' are saved in the current directory. 
#'
#' @param fileID a string of \code{character}, a identifiant that will be 
#' included in each output file name. Each output 
#' file name is 
#' composed of those elements, separated by "_":
#' \itemize{ 
#' \item a type name, ex: methylGR, methylObj, etc..
#' \item a \code{fileID}
#' \item the chromosome number, a number between 1 and \code{nbSynCHR}
#' \item the number of samples, a number in the \code{vNbSample} \code{vector}
#' \item the mean proportion of samples that has,
#' for a specific position, differentially methylated values, a 
#' number in the \code{vpDiff} \code{vector}
#' \item the proportion of 
#' C/T for a case differentially methylated that follows a shifted beta 
#' distribution, a
#' number in the \code{vDiff} \code{vector}
#' \item the 
#' proportion of cases that inherits differentially sites, a number in the
#' \code{vInheritance} \code{vector}
#' \item the identifiant for the simulation, a number 
#' between 1 and \code{nbSimulation}
#' \item the file extension ".rds"
#' }
#'
#' @param methData an object of class \code{methylBase}, the CpG information
#' from controls (CTRL) that will be used to create the sythetic chromosome. 
#' The \code{methData} object can also contain information from cases but 
#' only the controls will be used.
#' 
#' @param context a string of \code{character}, the methylation context 
#' string, ex: CpG,CpH,CHH, etc. 
#' 
#' @param assembly a string of \code{character}, the short description of the 
#' genome assembly. Ex: mm9,hg18 etc.
#' 
#' @return \code{0} indicating that the function has been successful.
#'
#' @examples
#'
#' ## Load dataset
#' data("samplesForChrSynthetic")
#' 
#' ## The function returns 0 when all paramaters are valid
#' methInheritSim:::validateRunSimParameters(vpDiff =0.2, 
#' vpDiffsd = 0.3, vDiff = 0.4, vInheritance = 0.2, propInherite = 0.5, 
#' rateDiff = 0.2, minRate = 0.1, propHetero = 0.2, maxPercReads = 99.1, 
#' nbSynCHR = 1, nbSimulation = 2, nbBlock = 10, nbCpG = 4, vNbSample = 10, 
#' nbGeneration = 3, minReads = 10, meanCov = 80, 
#' nbCores = 1, vSeed = -1, keepDiff = FALSE, saveGRanges = TRUE, 
#' saveMethylKit = FALSE, runAnalysis = FALSE, outputDir = "test", 
#' fileID = "test", methData = samplesForChrSynthetic, 
#' context = "CpG", assembly = "Rnor_5.0")
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @importFrom S4Vectors isSingleInteger isSingleNumber
#' @keywords internal
validateRunSimParameters <- function(vpDiff, vpDiffsd, vDiff, vInheritance, 
            propInherite, rateDiff, minRate, propHetero,
            maxPercReads, nbSynCHR, nbSimulation, nbBlock, nbCpG, 
            vNbSample, nbGeneration, minReads, meanCov, nbCores, 
            vSeed, keepDiff, saveGRanges, saveMethylKit,
            runAnalysis, outputDir, fileID, methData, context, assembly) {
    
    ## Validate double parameters
    validateRunSimDoubleParameters(vpDiff = vpDiff, vpDiffsd = vpDiffsd, 
                vDiff = vDiff, vInheritance = vInheritance, 
                propInherite = propInherite, rateDiff = rateDiff, 
                minRate = minRate, propHetero = propHetero,
                maxPercReads = maxPercReads)
    
    ## Validate integer parameters
    validateRunSimIntegerParameters(nbSynCHR = nbSynCHR, nbSimulation = 
                nbSimulation, nbBlock = nbBlock, nbCpG  = nbCpG, 
                vNbSample = vNbSample, nbGeneration = nbGeneration,    
                minReads = minReads, meanCov = meanCov, nbCores = nbCores, 
                vSeed = vSeed) 
    
    ## Validate logical parameters
    validateRunSimLogicalParameters(keepDiff = keepDiff, saveGRanges = 
                saveGRanges, saveMethylKit = saveMethylKit,
                runAnalysis = runAnalysis)
    
    ## Validate other parameters
    validateRunSimOtherParameters(outputDir = outputDir, fileID = fileID, 
                methData = methData, context = context, assembly = assembly)
}


#' @title Parameters validation for the \code{\link{runSim}} function. Only
#' integer parameters are validated.
#'
#' @description Validation of all parameters needed by the public
#' \code{\link{runSim}} function. Only integer parameters are validated.
#'
#' @param nbSynCHR a positive \code{integer}, the number of distinct 
#' synthetic chromosomes that will be generated.
#' 
#' @param nbSimulation a positive \code{integer}, the number of simulations 
#' for each parameter (\code{vNbSample}, \code{vpDiff}, \code{vDiff} and
#' \code{vInheritance}).
#'
#' @param nbBlock a positive \code{integer}, the number of blocks used 
#' for sampling.
#'
#' @param nbCpG a positive \code{integer}, the number of consecutive CpG 
#' positions used for sampling from \code{methInfo}.
#'
#' @param vNbSample a \code{vector} of positive \code{integer}, the number of 
#' methData (CTRL) and cases in the the simulation dataset. In 
#' the simulated dataset, the number of CTRL equals the number of Case. 
#' The number of CTRL do not need to be equal to the number of Case in
#' the real dataset.
#'
#' @param nbGeneration a positive \code{integer}, the number of generations.
#' 
#' @param minReads a positive \code{integer} Bases and regions having lower
#' coverage than this count are discarded. The parameter
#' correspond to the \code{lo.count} parameter in the \code{methylKit} package.
#' 
#' @param meanCov a positive \code{integer} represent the mean of the coverage
#' at the CpG site Default: \code{80}.
#' 
#' @param nbCores a positive \code{integer}, the number of cores to use when
#' creating the simulated datasets. Default: \code{1} and always 
#' \code{1} for Windows.
#' 
#' @param vSeed a \code{integer}, a seed used when reproducible results are
#' needed. When a value inferior or equal to zero is given, a random integer
#' is used. Default: \code{-1}.
#' 
#' @return \code{0} indicating that the function has been successful.
#'
#' @examples
#' 
#' ## The function returns 0 when all paramaters are valid
#' methInheritSim:::validateRunSimIntegerParameters(nbSynCHR = 1, 
#' nbSimulation = 2, nbBlock = 10, nbCpG = 4, vNbSample = 10, 
#' nbGeneration = 3, minReads = 10, meanCov = 80, 
#' nbCores = 1, vSeed = -1)
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @importFrom S4Vectors isSingleInteger isSingleNumber
#' @keywords internal
validateRunSimIntegerParameters <- function(nbSynCHR, nbSimulation, nbBlock, 
                                    nbCpG, vNbSample, nbGeneration, minReads, 
                                    meanCov, nbCores, vSeed) {

    ## Validate that nbSynCHR is an positive integer
    if (!(isSingleInteger(nbSynCHR) || isSingleNumber(nbSynCHR)) ||
        as.integer(nbSynCHR) < 1) {
        stop("nbSynCHR must be a positive integer or numeric")
    }
    
    ## Validate that nbSimulation is an positive integer
    if (!(isSingleInteger(nbSimulation) || isSingleNumber(nbSimulation)) ||
        as.integer(nbSimulation) < 1) {
        stop("nbSimulation must be a positive integer or numeric")
    }
    
    ## Validate that nbBlock is an positive integer
    if (!(isSingleInteger(nbBlock) || isSingleNumber(nbBlock)) ||
        as.integer(nbBlock) < 1) {
        stop("nbBlock must be a positive integer or numeric")
    }
    
    ## Validate that nbCpG is an positive integer
    if (!(isSingleInteger(nbCpG) || isSingleNumber(nbCpG)) ||
        as.integer(nbCpG) < 1) {
        stop("nbCpG must be a positive integer or numeric")
    }
    
    ## Validate that nbGeneration is an positive integer
    if (!(isSingleInteger(nbGeneration) || isSingleNumber(nbGeneration)) ||
        as.integer(nbGeneration) < 1) {
        stop("nbGeneration must be a positive integer or numeric")
    }
    
    ## Validate that vNbSample is a vector of distinct positive integer
    if (! is.numeric(vNbSample) || anyDuplicated(vNbSample) > 0 ||
        any(vNbSample < 1) || ! all(as.integer(vNbSample) == vNbSample)) {
        stop("vNbSample must be a vector of distinct positive integer")
    }
    
    ## Validate that minReads is an positive integer
    if (!(isSingleInteger(minReads) || isSingleNumber(minReads)) ||
        as.integer(minReads) < 1) {
        stop("minReads must be a positive integer or numeric")
    }
    
    ## Validate that meanCov is an positive integer
    if (!(isSingleInteger(meanCov) || isSingleNumber(meanCov)) ||
        as.integer(meanCov) < 1) {
        stop("meanCov must be a positive integer or numeric")
    }
    
    ## Validate that nbCores is an positive integer
    if (!(isSingleInteger(nbCores) || isSingleNumber(nbCores)) ||
        as.integer(nbCores) < 1) {
        stop("nbCores must be a positive integer or numeric")
    }
    
    ## Validate that nbCores is set to 1 on Windows system
    if (Sys.info()["sysname"] == "Windows" && as.integer(nbCores) != 1) {
        stop("nbCores must be 1 on a Windows system")
    }
    
    ## Validate that vSeed is an integer
    if (!(isSingleInteger(vSeed) || isSingleNumber(vSeed))) {
        stop("vSeed must be an integer or numeric")
    }
    
    return(0)
}


#' @title Fix seed value.
#'
#' @description Fix seed value when specified value is -1.
#'
#' @param vSeed a \code{integer}, a seed used when reproducible results are
#' needed. When a value inferior or equal to zero is given, a random integer
#' is used. 
#' 
#' @return a \code{double}, the seed value.
#' 
#' @examples 
#' 
#' ## Return vSeed value when value is not -1
#' methInheritSim:::fixSeed(vSeed = 10)
#' 
#' ## Return new value when value is -1
#' methInheritSim:::fixSeed(vSeed = -1)
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @keywords internal
fixSeed <- function(vSeed) {
    if (vSeed <= -1) {
        tSeed <- as.numeric(Sys.time())
        vSeed <- 1e8 * (tSeed - floor(tSeed))
    }
    return(vSeed)
}

#' @title Parameters validation for the \code{\link{runSim}} function. Only
#' double parameters are validated.
#'
#' @description Validation of all parameters needed by the public
#' \code{\link{runSim}} function. Only double parameters are validated.
#'
#' @param vpDiff a \code{double} superior to \code{0} and inferior or equal 
#' to \code{1}, the mean value for the proportion of samples that will have,
#' for a specific position, differentially methylated values. It can be 
#' interpreted as the penetrance.
#' 
#' @param vpDiffsd a non-negative \code{double}, the standard deviation 
#' associated to the \code{propDiff}.
#'
#' @param vDiff a positive \code{double} between [0,1], the proportion of 
#' C/T for a case differentially methylated follow a beta distribution 
#' where the mean is shifted of \code{vDiff} from the CTRL distribution
#'
#' @param vInheritance a positive \code{double} between [0,1], the 
#' proportion of cases that inherited differentially sites.
#' 
#' @param propInherite a non-negative \code{double} inferior or equal to 
#' \code{1}, the proportion of differentially methylated site
#' are inherated
#'
#' @param rateDiff a positive \code{double} inferior to \code{1}, the mean of 
#' the chance that a site is differentially methylated.
#'
#' @param minRate a non-negative \code{double} inferior to \code{1}, the 
#' minimum rate of differentially methylated sites.
#'
#' @param propHetero a positive \code{double} between [0,1], the 
#' reduction of vDiff for the second and following generations.
#' 
#' @param maxPercReads a \code{double} between [0,100], the percentile of read
#' counts that is going to be used as upper cutoff. Bases ore regions
#' having higher
#' coverage than this percentile are discarded. Parameter used for both CpG
#' sites and tiles analysis. The parameter
#' correspond to the \code{hi.perc} parameter in the  \code{methylKit} package.
#' 
#' @return \code{0} indicating that the function has been successful.
#'
#' @examples
#'
#' ## The function returns 0 when all paramaters are valid
#' methInheritSim:::validateRunSimDoubleParameters(vpDiff =0.2, 
#' vpDiffsd = 0.3, vDiff = 0.4, vInheritance = 0.2, propInherite = 0.5, 
#' rateDiff = 0.2, minRate = 0.1, propHetero = 0.2, maxPercReads = 99.1)
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @importFrom S4Vectors isSingleInteger isSingleNumber
#' @keywords internal
validateRunSimDoubleParameters <-function(vpDiff, vpDiffsd, vDiff, 
                                        vInheritance,propInherite, rateDiff, 
                                        minRate, propHetero, maxPercReads) {
    
    ## Validate that vpDiff is an positive double include in (0,1]
    if (! is.numeric(vpDiff) || anyDuplicated(vpDiff) > 0 ||
        any(vpDiff <= 0.00) || any(vpDiff > 1.00)) {
        stop(paste0("vpDiff must be a vector of distinct positive double ", 
                    "include in (0,1]"))
    }
    
    ## Validate that vpDiffsd is an non-negative double
    if (! is.numeric(vpDiffsd) || any(vpDiffsd < 0.00) ) {
        stop("vpDiffsd must be a vector of non-negative double")
    }
    
    ## Validate that vpDiff and vpDiffsd must be the same length
    if (length(vpDiff) != length(vpDiffsd)) {
        stop("vpDiff and vpDiffsd must be the same length")
    }
    
    ## Validate that vDiff is a vector of distinct non-negative double 
    ## include in  [0,1]
    if (! is.numeric(vDiff) || anyDuplicated(vDiff) > 0 ||
        any(vDiff < 0.00) || any(vDiff > 1.00)) {
        stop(paste0("vDiff must be a vector of distinct non-negative double", 
                    " include in [0,1]"))
    }
    
    ## Validate that vInheritance is a vector of distinct non-negative double 
    ## include in [0,1]
    if (! is.numeric(vInheritance) || anyDuplicated(vInheritance) > 0 ||
        any(vInheritance < 0.00) || any(vInheritance > 1.00)) {
        stop(paste0("vInheritance must be a vector of distinct non-negative ", 
                    "double include in [0,1]"))
    }
    
    ## TODO finish unit test
    ## Validate that rateDiff is an positive double include in (0,1)
    if (!(isSingleNumber(rateDiff)) || rateDiff <= 0.00 || rateDiff >= 1.00) {
        stop("rateDiff must be a positive double include in (0,1)")
    }
    
    ## Validate that minRate is an non-negative double include in [0,1)
    if (!(isSingleNumber(minRate)) || minRate < 0.00 || minRate >= 1.00) {
        stop("minRate must be a non-negative double include in [0,1)")
    }
    
    ## Validate that propInherite is a non-negative double include in [0,1]
    if (!(isSingleNumber(propInherite)) ||
        propInherite < 0.00 || propInherite > 1.00) {
        stop("propInherite must be a non-negative double include in [0,1]")
    }
    
    
    ## Validate that propHetero is an non-negative double include in [0,1]
    if (!(isSingleNumber(propHetero)) ||
        propHetero < 0.00 || propHetero > 1.00) {
        stop("propHetero must be a non-negative double include in [0,1]")
    }
    
    ## Validate that maxPercReads is an positive double between [0,100]
    if (!(isSingleNumber(maxPercReads)) ||
        maxPercReads < 0.00 || maxPercReads > 100.00) {
        stop("maxPercReads must be a positive double between [0,100]")
    }
    
    return(0)
}


#' @title Parameters validation for the \code{\link{runSim}} function. Only 
#' logical parameters are validated.
#'
#' @description Validation of all parameters needed by the public
#' \code{\link{runSim}} function. Only logical parameters are validated.
#' 
#' @param keepDiff \code{logical} if true, the differentially methyled sites
#' will be the same for each parameter (\code{vpDiff}, 
#' \code{vDiff} and \code{vInheritance}). Default: \code{FALSE}.
#' 
#' @param saveGRanges a \code{logical}, when \code{true}, the package save two 
#' files type. The first generate for each simulation contains a \code{list}. 
#' The length of the \code{list} corresponds to the number of generation. 
#' The generation are stored in order (first entry = first generation, 
#' second entry = second generation, etc..). All samples related to one 
#' generations are contained in a \code{GRangesList}. 
#' The \code{GRangeaList} store a \code{list} of \code{GRanges}. Each 
#' \code{GRanges} stores the raw mehylation data of one sample.
#' The second file a numeric \code{vector} denoting controls and cases 
#' (a file is generates by entry in the \code{vector} parameters 
#' \code{vNbSample}).
#' 
#' @param saveMethylKit a \code{logical}, when \code{TRUE}, for each 
#' simulations save a file contains a \code{list}. The length of the 
#' \code{list} corresponds to the number of generation. The generation are 
#' stored in order (first entry = first generation, 
#' second entry = second generation, etc..). All samples related to one 
#' generations are contained in a S4 \code{methylRawList} object. The 
#' \code{methylRawList} object contains two Slots:
#' 1. treatment: A numeric \code{vector} denoting controls and cases.
#' 2. .Data: A \code{list} of \code{methylRaw} objects. Each object stores the 
#' raw methylation data of one sample.
#' 
#' @param runAnalysis a \code{logical}, if \code{TRUE}, two files are saved 
#' for each simulation:
#' \itemize{
#' \item 1. The first file is the methylObj... file formated with 
#' the \code{methylkit} package in a S4 \code{methylBase} object 
#' (with the \code{methylKit} functions: \code{filterByCoverage}, 
#' \code{normalizeCoverage} and \code{unite}).
#' \item 2. The second file contains a S4 \code{calculateDiffMeth} object 
#' generated with the \code{methylKit} functions \code{calculateDiffMeth} on 
#' the first file.
#' }
#' 
#' @return \code{0} indicating that the function has been successful.
#'
#' @examples
#'
#' ## Load dataset
#' data("samplesForChrSynthetic")
#'
#' ## The function returns 0 when all paramaters are valid
#' methInheritSim:::validateRunSimLogicalParameters(keepDiff = FALSE, 
#' saveGRanges = TRUE, saveMethylKit = FALSE, runAnalysis = FALSE)
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @importFrom S4Vectors isSingleInteger isSingleNumber
#' @keywords internal
validateRunSimLogicalParameters <-function(keepDiff, saveGRanges, 
                                            saveMethylKit, runAnalysis) {
    
    ## Validate that keepDiff is a logical
    if (!is.logical(keepDiff)) {
        stop("keepDiff must be a logical")
    }
    
    ## Validate that saveGRanges is a logical
    if (!is.logical(saveGRanges)) {
        stop("saveGRanges must be a logical")
    }
    
    ## Validate that saveMethylKit is a logical
    if (!is.logical(saveMethylKit)) {
        stop("saveMethylKit must be a logical")
    }
    
    ## Validate that runAnalysis is a logical
    if (!is.logical(runAnalysis)) {
        stop("runAnalysis must be a logical")
    }
    
    return(0)
}


#' @title Parameters validation for the \code{\link{runSim}} function. Only 
#' parameters other than double, integer and logical are validated.
#'
#' @description Validation of all parameters needed by the public
#' \code{\link{runSim}} function. Only parameters other than double, integer
#' and logical are validated.
#'
#' @param outputDir a string of \code{character} or \code{NULL}, the path 
#' where the 
#' files created by the function will be saved. When \code{NULL}, the files
#' are saved in the current directory. 
#'
#' @param fileID a string of \code{character}, a identifiant that will be 
#' included in each output file name. Each output 
#' file name is 
#' composed of those elements, separated by "_":
#' \itemize{ 
#' \item a type name, ex: methylGR, methylObj, etc..
#' \item a \code{fileID}
#' \item the chromosome number, a number between 1 and \code{nbSynCHR}
#' \item the number of samples, a number in the \code{vNbSample} \code{vector}
#' \item the mean proportion of samples that has,
#' for a specific position, differentially methylated values, a 
#' number in the \code{vpDiff} \code{vector}
#' \item the proportion of 
#' C/T for a case differentially methylated that follows a shifted beta 
#' distribution, a
#' number in the \code{vDiff} \code{vector}
#' \item the 
#' proportion of cases that inherits differentially sites, a number in the
#' \code{vInheritance} \code{vector}
#' \item the identifiant for the simulation, a number 
#' between 1 and \code{nbSimulation}
#' \item the file extension ".rds"
#' }
#'
#' @param methData an object of class \code{methylBase}, the CpG information
#' from controls (CTRL) that will be used to create the sythetic chromosome. 
#' The \code{methData} object can also contain information from cases but 
#' only the controls will be used.
#' 
#' @param context a string of \code{character}, the methylation context 
#' string, ex: CpG,CpH,CHH, etc. 
#' 
#' @param assembly a string of \code{character}, the short description of the 
#' genome assembly. Ex: mm9,hg18 etc.
#' 
#' @return \code{0} indicating that the function has been successful.
#'
#' @examples
#'
#' ## Load dataset
#' data("samplesForChrSynthetic")
#'
#' ## The function returns 0 when all paramaters are valid
#' methInheritSim:::validateRunSimOtherParameters(
#' outputDir = "test", fileID = "test", methData = samplesForChrSynthetic, 
#' context = "CpG", assembly = "Rnor_5.0")
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @importFrom S4Vectors isSingleInteger isSingleNumber
#' @keywords internal
validateRunSimOtherParameters <-function(outputDir, fileID, methData, 
                                    context, assembly) {
    
    ## Validate that the outputDir is an not empty string
    if (!is.null(outputDir) && !is.character(outputDir)) {
        stop("outputDir must be a character string or NULL")
    }
    
    ## Validate that the fileID is an not empty string
    if (!is.null(fileID) && !is.character(fileID)) {
        stop("fileID must be a character string or NULL")
    }
    
    ## Validate that the methData is a methylBase object
    if (!"methylBase" %in% class(methData)) {
        stop("methData must be an object of class \"methyBase\"")
    }
    
    ## Validate that context is a character string
    if(!is.character(context)) {
        stop("context must be a character string")
    }
    
    ## Validate that assembly is a character string
    if (!is.character(assembly)) {
        stop("assembly must be a character string")
    }
    
    return(0)
}


#' @title Generate the samples ID for the simulated dataset.
#'
#' @description Generate the samples ID for the simulated dataset. The
#' standard format of the samples ID is : 
#' "F[Number for the generation]_[Number for the sample]
#' _[OC for case or C for control]"
#'
#' @param nbGeneration a positive \code{integer}, the number of generations
#' simulated.
#' 
#' @param nbSample a positive \code{integer}, 
#' the number of controls (CTRL) and cases in the simulated dataset.
#'
#' @return a \code{list} containing  a \code{list} of sample ID for 
#' each generation.
#' 
#' @examples
#'
#' ## Create sample ID 
#' methInheritSim:::createSampleID(nbGeneration = 3, nbSample = 6)
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @keywords internal
createSampleID <- function(nbGeneration, nbSample) {
    
    sample.id <- list()
    
    for (i in seq_len(2 * nbSample)) {
        if (i <= nbSample) {
            for (j in seq_len(nbGeneration)) {
                if(i == 1) {
                    sample.id[[j]] <- list()
                }
                sample.id[[j]][[i]] <- paste0("F", j, "_", i, "_C")
            }
        } else {
            for (j in seq_len(nbGeneration)) {
                sample.id[[j]][[i]] <- paste0("F", j, "_", i, "_OC")
            }
        }
    }
    
    return(sample.id)
}


#' @title Simulate a multigeneration methylation experiment with inheritance on
#' each synthetic chromosome.
#' 
#' @description Simulate a multigeneration methylation case versus control 
#' experiment with inheritance relation using a real control dataset. 
#' 
#' @param methData an object of class \code{methylBase}, the CpG information
#' from controls (CTRL) that will be used to create the synthetic chromosome. 
#' The \code{methData} object can also contain information from cases but 
#' only the controls are used.
#' 
#' @param nbSynCHR a positive \code{integer}, the number of distinct synthetic 
#' chromosomes that will be generated.
#'
#' @param nbSimulation a positive \code{integer}, the number of simulations 
#' generated 
#' for each parameter (\code{vNbSample}, \code{vpDiff}, \code{vDiff} and
#' \code{vInheritance}). 
#' The total number of simulation is 
#' nbSimulation * \code{length(vNbSample)} * \code{length(vpDiff)} *
#' \code{length(vInheritance)})
#'
#' @param nbBlock a positive \code{integer}, the number of blocks used 
#' for sampling.
#'
#' @param nbCpG a positive \code{integer}, the number of consecutive CpG 
#' positions used for sampling from \code{methInfo}.
#'
#' @param nbGeneration a positive \code{integer}, the number of generations
#' simulated.
#'
#' @param vNbSample a \code{vector} of distinct positive \code{integer}, 
#' the number of controls (CTRL) and cases in the simulated dataset. In 
#' the simulated dataset, the number of CTRL equals the number of cases. 
#' The number of CTRL do not need to be equal to the number of Case in
#' the real \code{methData} dataset.
#'
#' @param vpDiff a \code{vector} of distinct \code{double} superior to 
#' \code{0} and inferior or equal 
#' to \code{1}, the mean value for the proportion of samples that will have,
#' for a specific position, differentially methylated values. It can be 
#' interpreted as the penetrance. Note that \code{vpDiff} and \code{vpDiffsd}
#'  must be the same length.
#' 
#' @param vpDiffsd a \code{vector} of a non-negative \code{double}, the 
#' standard deviation associated to the \code{vpDiff}. Note that 
#' \code{vpDiff} and \code{vpDiffsd} must be the same length.
#'
#' @param vDiff a \code{vector} of distinct non-negative \code{double} 
#' included in [0,1], the proportion of C/T for a case differentially 
#' methylated that follows 
#' a beta distribution where the mean is shifted by \code{vDiff} 
#' from the CTRL distribution.
#'
#' @param vInheritance a \code{vector} of distinct non-negative \code{double} 
#' included in [0,1], the proportion of cases 
#' that inherits differentially methylated sites.
#' 
#' @param rateDiff a positive \code{double} inferior to \code{1}, the mean of 
#' the chance that a site is differentially 
#' methylated.
#'
#' @param minRate a non-negative \code{double} inferior to \code{1}, the 
#' minimum rate for differentially methylated sites.
#'
#' @param propInherite a non-negative \code{double} inferior or equal 
#' to \code{1}, the proportion of differentially methylated regions that 
#' are inherated.
#'
#' @param propHetero a non-negative \code{double} between [0,1], the 
#' reduction of \code{vDiff} for the second and following generations.
#' 
#' @param keepDiff a \code{logical}, when \code{TRUE}, the 
#' differentially methylated sites
#' will be the same for all simulated datasets. Datasets generated using 
#' differents parameter values from vector parameters (\code{vpDiff}, 
#' \code{vDiff} and \code{vInheritance}) wil all have the same differentially
#' methylated sites.
#' 
#' @param outputDir a string of \code{character} or \code{NULL}, the path 
#' where the files created by the function will be saved. When \code{NULL}, 
#' the files are saved in a directory called "outputDir" that is located in 
#' the current directory. 
#'
#' @param fileID a string of \code{character}, a identifiant that will be 
#' included in each output file name. Each output file name is 
#' composed of those elements, separated by "_":
#' \itemize{ 
#' \item a type name, ex: methylGR, methylObj, etc..
#' \item a \code{fileID}
#' \item the chromosome number, a number between 1 and \code{nbSynCHR}
#' \item the number of samples, a number in the \code{vNbSample} \code{vector}
#' \item the mean proportion of samples that has,
#' for a specific position, differentially methylated values, a 
#' number in the \code{vpDiff} \code{vector}
#' \item the proportion of 
#' C/T for a case differentially methylated that follows a shifted beta 
#' distribution, a
#' number in the \code{vDiff} \code{vector}
#' \item the 
#' proportion of cases that inherits differentially sites, a number in the
#' \code{vInheritance} \code{vector}
#' \item the identifiant for the simulation, a number 
#' between 1 and \code{nbSimulation}
#' \item the file extension ".rds"
#' }
#' 
#' @param minReads a positive \code{integer}, sites and regions having lower
#' coverage than this count are discarded. The parameter
#' corresponds to the \code{lo.count} parameter in 
#' the \code{methylKit} package.
#' 
#' @param maxPercReads a \code{double} between [0,100], the percentile of read
#' counts that is going to be used as upper cutoff. Sites and regions
#' having higher
#' coverage than \code{maxPercReads} are discarded. This parameter is used for 
#' both CpG sites and tiles analysis. The parameter
#' correspond to the \code{hi.perc} parameter in the \code{methylKit} package.
#' 
#' @param meanCov a positive \code{integer}, the mean of the coverage
#' at the simulated CpG sites.
#' 
#' @param context a string of \code{character}, the short description of the 
#' methylation context, such as "CpG", "CpH", "CHH", etc.. 
#' 
#' @param assembly a string of \code{character}, the short description of the 
#' genome assembly, such as "mm9", "hg18", etc..
#' 
#' @param saveGRanges a \code{logical}, when \code{true}, the package save two 
#' files type. The first generate for each simulation contains a \code{list}. 
#' The length of the \code{list} corresponds to the number of generation. 
#' The generation are stored in order (first entry = first generation, 
#' second entry = second generation, etc..). All samples related to one 
#' generations are contained in a \code{GRangesList}. 
#' The \code{GRangeaList} store a \code{list} of \code{GRanges}. Each 
#' \code{GRanges} stores the raw mehylation data of one sample.
#' The second file a numeric \code{vector} denoting controls and cases 
#' (a file is generates by entry in the \code{vector} parameters 
#' \code{vNbSample}).
#' 
#' @param saveMethylKit a \code{logical}, when \code{TRUE}, for each 
#' simulations save a file contains a \code{list}. The length of the 
#' \code{list} corresponds to the number of generation. The generation are 
#' stored in order (first entry = first generation, 
#' second entry = second generation, etc..). All samples related to one 
#' generations are contained in a S4 \code{methylRawList} object. The 
#' \code{methylRawList} object contains two Slots:
#' 1. treatment: A numeric \code{vector} denoting controls and cases.
#' 2. .Data: A \code{list} of \code{methylRaw} objects. Each object stores the 
#' raw methylation data of one sample.
#' 
#' @param runAnalysis a \code{logical}, if \code{TRUE}, two files are saved 
#' for each simulation:
#' \itemize{
#' \item 1. The first file is the methylObj... file formated with 
#' the \code{methylkit} 
#' package in a S4 \code{methylBase} object (using the \code{methylKit} 
#' functions: \code{filterByCoverage}, \code{normalizeCoverage} and 
#' \code{unite}).
#' \item 2. The second file contains a S4 \code{calculateDiffMeth} object 
#' generated 
#' using the \code{methylKit} functions \code{calculateDiffMeth} on the 
#' first file.
#' }
#' 
#' @param nbCores a positive \code{integer}, the number of cores used when
#' creating the simulated datasets. Default: \code{1} and always 
#' \code{1} for Windows.
#' 
#' @param vSeed a \code{integer}, a seed used when reproducible results are
#' needed. When a value inferior or equal to zero is given, a random integer
#' is used. .
#'
#' @return \code{0} indicating that the function have been
#' successful.
#' 
#' @examples
#'
#' ## Load dataset containing methyl information
#' data(samplesForChrSynthetic)
#' 
#' ## Set the output directory where files will be created
#' temp_dir <- "test_runOnEachSynCHR"
#' 
#' ## Create directory
#' if(!dir.exists(temp_dir)) {
#'     dir.create(temp_dir, showWarnings = TRUE)
#' }
#' 
#' ## Create 2 simulated dataset (nbSimulation = 2) 
#' ## over 3 generations (nbGenration = 3) with
#' ## 6 cases and 6 controls (nNbsample = 6) using only one set
#' ## of parameters (vpDiff = 0.9, vpDiffsd = 0.1, vDiff = 0.8)
#' methInheritSim:::runOnEachSynCHR(methData = samplesForChrSynthetic, 
#'     nbSynCHR = 1, nbSimulation = 2, nbBlock = 10, nbCpG = 20, 
#'     nbGeneration = 3, vNbSample = c(6), vpDiff = c(0.9), vpDiffsd = c(0.1), 
#'     vDiff = c(0.8), vInheritance = c(0.5), propInherite = 0.3, 
#'     rateDiff = 0.3, minRate = 0.2, propHetero = 0.5, keepDiff = FALSE, 
#'     outputDir = temp_dir, fileID = "F1",  minReads = 10, 
#'     maxPercReads = 99.9, meanCov = 80, context = "CpG", assembly="Rnor_5.0",
#'     saveGRanges = FALSE, saveMethylKit = FALSE,
#'     runAnalysis = FALSE, nbCores = 1, vSeed = 32)
#' 
#' ## Delete the output directory and its content
#' if (dir.exists(temp_dir)) {
#'     unlink(temp_dir, recursive = TRUE, force = FALSE)
#' }
#' 
#' @author Pascal Belleau, Astrid Deschenes
#' @keywords internal
runOnEachSynCHR <- function(methData, nbSynCHR, nbSimulation, nbBlock, nbCpG, 
                    nbGeneration, vNbSample, vpDiff, vpDiffsd, vDiff, 
                    vInheritance, rateDiff, minRate, propInherite, propHetero,
                    keepDiff, outputDir, fileID, minReads, maxPercReads, 
                    meanCov, context, assembly, saveGRanges, saveMethylKit, 
                    runAnalysis, nbCores, vSeed)
{
    ## Fix seed
    RNGkind("L'Ecuyer-CMRG")
    set.seed(fixSeed(vSeed))
    
    for(s in 1:nbSynCHR) {
        # Create synthetic chromosome
        res <- getSyntheticChr(methInfo = methData, nbBlock = nbBlock, 
                                nbCpG = nbCpG)
        adPref <- paste0(fileID, "_", s)
        saveRDS(res, file = paste0(outputDir, "/syntheticChr_", adPref, ".rds"))
        
        for(nbSample in vNbSample) {
            adPrefSample <- paste0(adPref, "_", nbSample)
            nbCtrl <- nbSample
            nbCase <- nbSample
            
            # Define treatment and sample.id 
            treatment <- c(rep(0,nbSample), rep(1,nbSample))
            
            if (saveGRanges) {
                saveRDS(treatment, file = paste0(outputDir, "/treatment_", 
                                                    adPrefSample, ".rds"))
            }
            
            # Define  sample.id 
            sample.id <- createSampleID(nbGeneration = nbGeneration, 
                                        nbSample = nbSample)
            
            ## Obtain the positions of the DMS when those have to be
            ## the same in all generated simulation
            if (keepDiff == TRUE) {
                diffRes <- getDiffMeth(stateInfo = res, rateDiff = rateDiff, 
                                minRate = minRate, propInherite = propInherite)
            } else {
                diffRes <- NULL
            }
            
            for (i in seq_len(length(vInheritance))) {
                for(j in seq_len(length(vpDiff))) {
                    propDiff <- vpDiff[j]
                    propDiffsd <- vpDiffsd[j]
                    
                    for (k in seq_len(length(vDiff))) {
                        diffValue <- vDiff[k]
                        propInheritance <- ifelse(vInheritance[i] >= 0, 
                                                vInheritance[i], vpDiff[j])
                        
                        prefBase <- paste0(adPrefSample , "_", propDiff, 
                                        "_", diffValue, "_", propInheritance)
                        
                        if (nbCores > 1) {
                            .Random.seed <- nextRNGSubStream(.Random.seed)
                            a <- mclapply(seq_len(nbSimulation), 
                                    FUN = simInheritance, pathOut = outputDir, 
                                    pref = prefBase, nbCtrl = nbCtrl, 
                                    nbCase = nbCase, treatment = treatment, 
                                    sample.id = sample.id, generation = 
                                    nbGeneration, stateInfo = res, rateDiff = 
                                    rateDiff, minRate = minRate, 
                                    propInherite = propInherite,
                                    diffValue = diffValue, propDiff = propDiff,
                                    propDiffsd = propDiffsd, 
                                    propInheritance = propInheritance,
                                    propHetero = propHetero, minReads = 
                                    minReads, maxPercReads = maxPercReads,
                                    context = context, assembly = assembly,
                                    meanCov = meanCov, diffRes = diffRes,
                                    saveGRanges = saveGRanges,
                                    saveMethylKit = saveMethylKit,
                                    runAnalysis = runAnalysis,
                                    mc.cores=nbCores,
                                    mc.preschedule = FALSE)
                        } else {
                            ## Manage case where mclapply is transformed into
                            ## lapply on Windows. This case created
                            ## unrepoducible results on Windows and other OS.
                            a <- lapply(seq_len(nbSimulation), 
                                    FUN = simInheritance, pathOut = outputDir, 
                                    pref = prefBase, nbCtrl = nbCtrl, 
                                    nbCase = nbCase, treatment = treatment, 
                                    sample.id = sample.id, generation = 
                                    nbGeneration, stateInfo = res, rateDiff = 
                                    rateDiff, minRate = minRate, 
                                    propInherite = propInherite, diffValue = 
                                    diffValue, propDiff = propDiff, 
                                    propDiffsd = propDiffsd, propInheritance = 
                                    propInheritance, propHetero = propHetero, 
                                    minReads = minReads, maxPercReads = 
                                    maxPercReads, context = context, assembly = 
                                    assembly, meanCov = meanCov, diffRes = 
                                    diffRes, saveGRanges = saveGRanges,
                                    saveMethylKit = saveMethylKit,
                                    runAnalysis = runAnalysis)
                        }
                    }
                }
            }
        }
    }
    return(0)
}
belleau/methylInheritanceSim documentation built on April 1, 2020, 2:43 p.m.