R/MHMM_v4.R

Defines functions MHMM_v4

Documented in MHMM_v4

#' Mixture HMM with Bernoulli distributions
#'
#' Add description here
#'
#' @param object a mikadoDataSet
#' @param control list of control arguments from controlEM()
#'
#' @return
#' Add return here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#' @references
#' \url{https://github.com/plbaldoni/epigraHMM}
#'
#' @import data.table
#' @importFrom  stats rbinom dbinom glm quasibinomial
#' @rawNamespace import(SummarizedExperiment, except = shift)
#' @rawNamespace import(S4Vectors, except = c(first,second))
#'
#' @export
MHMM_v4 = function(object,control)
{
    
    ##############################################################
    ### Fits 3-component HMM with three independent bernoullis ###
    ##############################################################
    
    
    # ChIP = Group = JoinProb11 = JoinProb12 = JoinProb21 = JoinProb22 = PostProb1 = NULL
    # PostProb2 = Rejection1 = Rejection2 = Var1 = Var2 = epsilon.em = NULL
    # maxcount.em = maxit.em = minit.em = windowSize = pcut = quiet = NULL
    # weights = z = clusters = cores = NULL
    maxcount.em = maxit.em = epsilon.em = minit.em = quiet = NULL
    
    # Creating control elements
    
    for(i in seq_along(control)){assign(names(control)[i],control[[i]])}
    
    # General parameters
    
    M <- nrow(object);N <- ncol(object); K = 3; L <- 2
    
    it.em <- 0
    count.em <- 0 
    error.em <- c('error' = 1)
    
    parlist <- list()
    theta.old <- sapply(c('pi','gamma','psi'),function(x) NULL)
    theta.new <- sapply(c('pi','gamma','psi'),function(x) NULL)
    
    # Creating subdirectories
    
    dir <- path.expand(dir)
    paths <- list(logF = file.path(dir,'logF'),
                  logB = file.path(dir,'logB'),
                  logP = file.path(dir,'logP'),
                  logT = file.path(dir,'logT'),
                  logW = file.path(dir,'logW'))
    lapply(unlist(paths),function(x){if(!dir.exists(x)){system2('mkdir',paste('-p',x))}})
    
    # Parameter initializations
    
    theta.old[['gamma']] <- do.call(rbind,lapply(seq_len(K),function(x){y <- rep(0,K);y[x] <- 0.999;y[seq_len(K)[-x]] <- (1-y[x])/(K-1);return(y)}))
    theta.old[['psi']] <- -4.5+0:2
    theta.old[['pi']] <- c(1-1e-8,rep(1e-8/(K-1),K-1))
    
    # EM algorithm begins
    
    message(paste0(c(rep('#',80))));message(Sys.time());message("Starting the EM algorithm")
    
    while(count.em<maxcount.em & it.em<maxit.em){
        it.em = it.em + 1
        
        # E-step
        ## Forward-Backward probabilities
        
        mhmmK_logFB_v4(counts = assay(object,'counts'),
                       pi = theta.old$pi,
                       gamma = theta.old$gamma,
                       psi = theta.old$psi,
                       nameFB = c(file.path(paths[['logF']],paste0('logF.bin')),file.path(paths[['logB']],paste0('logB.bin'))),
                       size = 1)
        
        ## Window-based posterior probabilities
        
        mhmmK_logPP_v4(counts = assay(object,'counts'),
                       gamma = theta.old$gamma,
                       psi = theta.old$psi,
                       namePT = c(file.path(paths[['logP']],paste0('logP.bin')),file.path(paths[['logT']],paste0('logT.bin'))),
                       dirFB = c(file.path(paths[['logF']],paste0('logF.bin')),file.path(paths[['logB']],paste0('logB.bin'))),
                       size = 1)
        
        # M-step
        
        theta.new[1:2] <- mhmmK_maxDPT_v4(dirP = file.path(paths[['logP']],paste0('logP.bin')),
                                          dirT = file.path(paths[['logT']],paste0('logT.bin')),M = M,N = N)
        
        ## Model parameters
        
        weights <- mhmmK_agg_v4(counts = assay(object,'counts'),
                                dirP = file.path(paths[['logP']],paste0('logP.bin')),
                                size = 1)
        
        fit <- lapply(seq_len(K),function(x){stats::optim(par = theta.old$psi[x],fn = f,method = 'L-BFGS-B',weights = weights[x,],size = 1)$par})
        theta.new[['psi']] <- unlist(fit)
        
        # Updating parameter history
        
        error.em['error'] <- sum(unlist(lapply(names(theta.old),function(x){sqrt(sum((theta.old[[x]]-theta.new[[x]])^2))})))
        
        parlist[[it.em]] <- c(it=it.em,error.em,m=1*fit$converged)
        
        theta.old <- theta.new
        
        # Computing EM error
        
        count.em <- as.numeric(error.em<=epsilon.em)*(it.em>minit.em)*(count.em+1) + 0
        
        #Outputing history
        if(!quiet){
            message(paste0(c(rep('#',80))))
            message('\rIteration: ',it.em,'. Error: ',paste(formatC(error.em, format = "e", digits = 2)),sep='')
            message("\r",paste('Intercept estimates: '),paste(formatC(theta.new[['psi']], format = "e", digits = 2),collapse = ' '))
            message("\r",paste('Initial prob. estimates: '),paste(formatC(theta.new[['pi']], format = "e", digits = 2),collapse = ' '))
            message(paste0(c(rep('#',80))))
        }
    }
    
    S4Vectors::metadata(object) <- list('pi'=theta.new[['pi']],
                                        'gamma'=theta.new[['gamma']],
                                        'psi'=theta.new[['psi']],
                                        'viterbi'=viterbi(dirF = file.path(paths[['logF']],paste0('logF.bin')),theta.new$pi,theta.new$gamma))
    
    message('Done!');message(Sys.time());message(paste0(c(rep('#',80))))
    return(object)
}
plbaldoni/mikado documentation built on June 9, 2020, 3:34 p.m.