R/zooroh.R

Defines functions run_kl run_mixkl run_kr run_mixkr zoorun

Documented in zoorun

#'RZooRoH: A package for estimating global and local individual autozygosity.
#'
#'Functions to identify Homozygous-by-Descent (HBD) segments associated with
#'runs of homozygosity (RoH) and to estimate individual autozygosity (or
#'inbreeding coefficient). HBD segments and autozygosity are assigned to
#'multiple HBD classes with a model-based approach relying on a mixture of
#'exponential distributions. The rate of the exponential distribution is
#'distinct for each HBD class and defines the expected length of the HBD
#'segments. These HBD classes are therefore related to the age of the segments
#'(longer segments and smaller rates for recent autozygosity / recent common
#'ancestor). The functions allow to estimate the parameters of the model (rates
#'of the exponential distributions, mixing proportions), to estimate global and
#'local autozygosity probabilities and to identify HBD segments with the Viterbi
#'decoding.
#'
#'@section Data pre-processing:  Note that the model is designed for autosomes.
#'  Other chromosomes and additional filtering (e.g. call rate, missing, HWE,
#'  etc.) should be performed prior to run RZooRoH with tools such as plink or
#'  bcftools. The model works on an ordered map and ignores SNPs with a null
#'  position.
#'
#'@section RZooRoH functions: The main functions included in the package are
#'  zoodata(), zoomodel() and zoorun(). There are also four function to plot
#'  the results: zooplot_partitioning(), zooplot_hbdseg(), zooplot_prophbd()
#'  and zooplot_individuals(). Finally, four accessors functions help to
#'  extract the results: realized(), cumhbd(), rohbd() and probhbd().
#'
#'  You can obtain individual help for each of the functions. By typing for
#'  instance: help(zoomodel) or ? zoomodel.
#'
#'  To run RZooRoH, you must first load your data with the zoodata() function.
#'  It will create a zooin object required for further analysis. Next, you need
#'   to define the model you want to run. You can define
#'  a default model by typing for instance, my.mod <- zoomodel(). Finally, you
#'  can run the model with the zoorun function. You can choose to estimate
#'  parameters with different procedures, estimate global and local
#'  homozygous-by-descent (HBD) probabilities with the Forward-Backward
#'  procedure or identify HBD segments with the Viterbi algorithm. The results
#'  are saved in a zres object.
#'
#'  The four plot functions zooplot_partitioning(), zooplot_hbdseg(),
#'  zooplot_prophbd() and zooplot_individuals() use zres objects to make different
#'  graphics. Similarly, the accessor functions help to extract information
#'  from the zres objects (see vignette for more details).
#'
#'  To get the list of data sets (for examples):
#'
#'  data(package="RZooRoH")
#'
#'  And to get the description of one data set, type ? name_data (with name_data
#'  being the name of the data set). For instance:
#'
#'  ? genosim
#'
#' @examples
#'
#' # Start with a small data set with six individuals and external frequencies.
#' freqfile <- (system.file("exdata","typsfrq.txt",package="RZooRoH"))
#' typfile <- (system.file("exdata","typs.txt",package="RZooRoH"))
#' frq <- read.table(freqfile,header=FALSE)
#' typdata <- zoodata(typfile,supcol=4,chrcol=1,poscol=2,allelefreq=frq$V1)
#' # Define a model with two HBD classes with rates equal to 10 and 100.
#' Mod3R <- zoomodel(K=3,base_rate=10)
#' # Run the model on all individuals.
#' typ.res <- zoorun(Mod3R,typdata)
#' # Observe some results: likelihood, realized autozygosity in different
#' # HBD classes and identified HBD segments.
#' typ.res@modlik
#' typ.res@realized
#' typ.res@hbdseg
#' # Define a model with one HBD and one non-HBD class and run it.
#' Mod1R <- zoomodel(K=2,predefined=FALSE)
#' typ2.res <- zoorun(Mod1R,typdata)
#' # Print the estimated rates and mixing coefficients.
#' typ2.res@krates
#' typ2.res@mixc
#'
#' # Get the name and location of a second example file.
#' myfile <- (system.file("exdata","genoex.txt",package="RZooRoH"))
#' # Load your data with default format:
#' example2 <- zoodata(myfile)
#' # Define the default model:
#' my.model <- zoomodel()
#' # Run RZooRoH on your data with the model (parameter estimation with optim). This can
#' # take a few minutes because it is a large model for 20 individuals:
#' \donttest{my.res <- zoorun(my.model,example2)}
#' # To estimate the parameters with the EM-algorithm, run the Forward-Backward
#' # algorithm to estimate realized autozygosity and the Viterbi algorithm to
#' # identify HBD segments (a few mintues too, see above).
#' \donttest{my.res2 <- zoorun(my.model, example2, fb=TRUE, vit=TRUE, method = "estem")}
#'# To run the model on a subset of individuals with 1 thread:
#' \donttest{my.res3 <- zoorun(my.model, example2, ids=c(7,12,16,18), nT = 1)}
#' # Define a smaller model and run it on two individuals.
#' my.mod2 <- zoomodel(K=4,base_rate=10)
#' \donttest{my.res4 <- zoorun(my.mod2, example2, ids=c(9,18))}
#'
#'@import graphics
#'@import stats
#'@import utils
#'@keywords internal
"_PACKAGE"
NULL

setClass(Class = "zres",
         representation(nind = "numeric", ids = "vector", mixc = "matrix", krates = "matrix", realized = "matrix",
                        hbdp = "list", hbdseg ="data.frame", modlik ="vector", modbic ="vector", niter = "vector",
                        optimerr = "vector", sampleids = "vector")
)


is.zres <- function (x)
{
  res <- (is(x,"zres") & validObject(x))
  return(res)
}

setClass(Class = "oneres",
         representation(mixc = "vector", krates = "vector", hbdp = "matrix", hbdseg ="data.frame",
                        realized = "vector", modlik ="numeric", modbic ="numeric", num ="numeric",
                        niter = "numeric", optimerr ="numeric")
)

is.oneres <- function (x)
{
  res <- (is(x,"oneres") & validObject(x))
  return(res)
}

#### estimate parameters for one individual

#'Run the ZooRoH model
#'
#'Apply the defined model on a group of individuals: parameter estimation,
#'computation of realized autozygosity and homozygous-by-descent probabilities,
#' and identification of HBD segments (decoding).
#'
#'@param zoomodel A valid zmodel object as defined by the zoomodel function. The
#'  model indicates whether rates of exponential distributions are estimated or
#'  predefined, the number of classes, the starting values for mixing
#'  coefficients and rates, the error probabilities. See "zoomodel" for more
#'  details.
#'
#'@param zooin A valid zdata object as obtained by the zoodata function. See
#'"zoodata" for more details.
#'
#'@param ids An optional argument indicating the individual (its position in the
#'  data file) that must be proceeded. It can also be a vector containing the
#'  list of numbers that must be proceeded. By default, the model runs for all
#'  individuals.
#'
#'@param method Specifies whether the parameters are estimated by optimization
#'  with the L-BFGS-B method from the optim function (option method="opti",
#'  default value) or with the EM-algorithm (option method="estem"). When the EM
#'  algorithm is used, the Forward-Backward algorithm is run automatically (no
#'  need to select the fb option - see below). We recommend to set minr to 1
#'  when using the EM algorithm. If the user doesn't want to estimate the
#'  parameters he must set method="no".
#'
#'@param fb A logical indicating whether the forward-backward algorithm is run
#'  (optional argument - true by default). The Forward-Backward algorithm
#'  estimates the local probabilities to belong to each HBD or non-HBD class. By
#'  default, the function returns only the HBD probabilities for each class,
#'  averaged genome-wide, and corresponding to the realized autozygosity
#'  associated with each class. To obtain HBD probabilities at every marker
#'  position, the option localhbd must be set to true (this generates larger
#'  outputs).
#'
#'@param vit A logical indicating whether the Viterbi algorithm is run (optional
#'  argument - true by default). The Viterbi algorithm performs the decoding
#'  (determining the underlying class at every marker position). Whereas the
#'  Forward-Backward algorithms provide HBD probabilities (and how confident a
#'  region can be declared HBD), the Viterbi algorithm assigns every marker
#'  position to one of the defined classes (HBD or non-HBD). When informativity
#'  is high (many SNPs per HBD segments), results from the Forward-Backward and
#'  the Viterbi algorithm are very similar. The Viterbi algorithm is best suited
#'  to identify HBD segments. To estimate realized inbreeding and determine HBD
#'  status of a position, we recommend to use the Forward-Backward algorithm
#'  that better reflects uncertainty.
#'
#'@param localhbd A logical indicating whether the HBD probabilities for each
#'  individual at each marker are returned when using the EM or the
#'  Forward-Backward algorithm (fb option). This is an optional
#'  argument that is false by default.
#'
#'@param nT Indicates the number of threads used when running RZooRoH in
#'  parallel (optional argument - one thread by default).
#'
#'@param maxiter Indicates the maximum number of iterations with the EM
#'  algorithm (optional argument - 1000 by default).
#'
#'@param convem Indicates the convergence criteria for the EM algorithm
#'  (optional argument / 1e-10 by default).
#'
#'@param minr With optim and the reparametrized model (default), this indicates
#'  the minimum difference between rates of successive classes. With the EM
#'  algorithm, it indicates the minimum rate for a class. It is an optional
#'  argument set to 0. Adding such constraints might slow down the speed of
#'  convergence with optim and we recommend to run first optim without these
#'  constraints. With the EM algorithm, we recommend to use a value of 1.
#'
#'@param maxr With optim and the reparametrized model (default), this indicates
#'  the maximum difference between rates of successive classes. With the EM
#'  algorithm, it indicates the maximum rate for a class. It is an optional
#'  argument set to an arbitrarily large value (100000000). Adding such
#'  constraints might slow down the speed of convergence with optim and we
#'  recommend to run first optim without these constraints.
#'
#'@return The function return a zoores object with several slots accesses by
#' the "@" symbol. The three main results are zoores@@realized (the matrix with
#' partitioning of the genome in different HBD classes for each individual),
#' zoores@@hbdseg (a data frame with identified HBD segments) and zoores@@hbdp
#' (a list of matrices with HBD probabilities per SNP and per class).
#'
#' Here is a list with all the slots and their description:
#'
#' \enumerate{
#' \item zoores@@nind the number of individuals in the analysis,
#' \item zoores@@ids a vector containing the numbers of the analyzed individuals
#' (their position in the data file),
#' \item zoores@@mixc the (estimated) mixing coefficients per class for all
#' individuals,
#'  \item zoores@@krates the (estimated) rates for the exponential distributions
#'  associated with each HBD or non-HBD class for all individuals,
#'  \item zoores@@niter the number of iterations for estimating the
#'  parameters (per individual),
#'  \item zoores@@modlik a vector containing the likelihood of the model for
#'   each individual,
#'  \item zoores@@modbic a vector containing the value of the BIC for each
#'  individual,
#'  \item zoores@@realized a matrix with estimated realized autozygosity per HBD
#'  class (columns) for each individual (rows). These values are obtained with
#'  the Forward-Backward algorithm - fb option),
#'  \item zoores@@hbdp a list of matrices with the
#'  local probabilities to belong to an underlying hidden state (computed for
#'  every class and every individual). Each matrix has one row per class and
#'  one column per snp. To access the matrix from individual i, use the
#'  brackets "[[]]", for instance zoores@@hbdp[[i]],
#'  \item zoores@@hbdseg a data frame with the list
#'  of identified HBD segments with the Viterbi algorithm (the columns are the
#'  individual number, the chromosome number, the first and last SNP of the
#'  segment, the positions of the first and last SNP of the segment, the number
#'  of SNPs in the segment, the length of the segment, the HBD state of the
#'  segment),
#'  \item zoores@@optimerr a vector indicating whether optim ran with or
#'  without error (0/1),
#'  \item zoores@@sampleids is a vector with the names of the
#'  samples (when provided in the zooin object through the zoodata function).}
#'
#'
#'@examples
#'
#' # Start with a small data set with six individuals and external frequencies.
#' freqfile <- (system.file("exdata","typsfrq.txt",package="RZooRoH"))
#' typfile <- (system.file("exdata","typs.txt",package="RZooRoH"))
#' frq <- read.table(freqfile,header=FALSE)
#' typ <- zoodata(typfile,supcol=4,chrcol=1,poscol=2,allelefreq=frq$V1)
#' # Define a model with two HBD classes with rates equal to 10 and 100.
#' Mod3R <- zoomodel(K=3,base_rate=10)
#' # Run the model on all individuals.
#' typ.res <- zoorun(Mod3R, typ)
#' # Observe some results: likelihood, realized autozygosity in different
#' # HBD classes and identified HBD segments.
#' typ.res@modlik
#' typ.res@realized
#' typ.res@hbdseg
#' # Define a model with one HBD and one non-HBD class and run it.
#' Mod1R <- zoomodel(K=2,predefined=FALSE)
#' typ2.res <- zoorun(Mod1R, typ)
#' # Print the estimated rates and mixing coefficients.
#' typ2.res@krates
#' typ2.res@mixc

#' # Get the name and location of a second example file and load the data:
#' myfile <- (system.file("exdata","genoex.txt",package="RZooRoH"))
#' ex2 <- zoodata(myfile)
#' # Run RZooRoH to estimate parameters on your data with the 1 HBD and 1 non-HBD
#' # class (parameter estimation with optim).
#' my.mod1R <- zoomodel(predefined=FALSE,K=2,krates=c(10,10))
#' \donttest{my.res <- zoorun(my.mod1R, ex2, fb = FALSE, vit = FALSE)}
#' # The estimated rates and mixing coefficients:
#' \donttest{my.res@mixc}
#' \donttest{my.res@krates}
#' # Run the same model and run the Forward-Backward alogrithm to estimate
#' # realized autozygosity and the Viterbi algorithm to identify HBD segments:
#' \donttest{my.res2 <- zoorun(my.mod1R, ex2)}
#' # The table with estimated realized autozygosity:
#' \donttest{my.res2@realized}
#' # Run a model with 4 classes (3 HBD classes) and estimate the rates of HBD
#' # classes with one thread:
#' my.mod4R <- zoomodel(predefined=FALSE,K=4,krates=c(16,64,256,256))
#' \donttest{my.res3 <- zoorun(my.mod4R, ex2, fb = FALSE, vit = FALSE, nT =1)}
#' # The estimated rates for the 4 classes and the 20 individuals:
#' \donttest{my.res3@krates}
#' # Run a model with 5 classes (4 HBD classes) and predefined rates.
#' # The model is run only for a subset of four selected individuals.
#' # The parameters are estimated with the EM-algorithm, the Forward-Backward
#' # alogrithm is ued to estimate realized autozygosity and the Viterbi algorithm to
#' # identify HBD segments. One thread is used.
#' mix5R <- zoomodel(K=5,base=10)
#' \donttest{my.res4 <- zoorun(mix5R,ex2,ids=c(7,12,16,18), method = "estem", nT = 1)}
#' # The table with all identified HBD segments:
#' \donttest{my.res4@hbdseg}
#'
#'@useDynLib RZooRoH, .registration = TRUE
#'@export
#'@import iterators
#'@import foreach
#'@import parallel
#'@import doParallel
#'@import methods

zoorun <- function(zoomodel, zooin, ids = NULL, method = "opti",
                   fb = TRUE, vit = TRUE, minr = 0, maxr = 100000000,
                   maxiter = 1000, convem = 1e-10, localhbd = FALSE, nT = 1){

  if(is.null(ids)){ids=seq(from=1,to=zooin@nind)}
  if(is.zmodel(zoomodel) == FALSE){print("First element is not a valid model - see help for zoomodel function.")}
  if(is.zdata(zooin) == FALSE){print("First element is not a valid data set - see help for zoodata function.")}
  zrates <- zoomodel@krates
  zmix <- zoomodel@mix_coef
  gerr <- zoomodel@err
  seqerr <- zoomodel@seqerr

  if(.Platform$OS.type == "windows" & nT > 1){
    warning("Multithreading is not supported under windows. Only one thread will be used.\n")}
  if(.Platform$OS.type != "windows"){registerDoParallel(cores= nT)}

  if(zoomodel@typeModel != "mixkr" & zoomodel@typeModel != "MIXKR"
     & zoomodel@typeModel != "mixkl" & zoomodel@typeModel != "MIXKL"
     & zoomodel@typeModel != "kr" & zoomodel@typeModel != "KR"
     & zoomodel@typeModel != "kl" & zoomodel@typeModel != "KL"){
    print("The model type must be kr, kl, mixkr or mixkl")
  }

  opti=FALSE;estem=FALSE
  if(method == "opti"){opti=TRUE}
  if(method == "estem"){estem=TRUE}
  if(method != "opti" & method != "estem" & method !="no"){
    print(c("Unrecognized method ::",method))
    print("Will use optim.")
    opti = TRUE
  }

  if(estem & minr < 1){
    print("We recommend to set minr to 1 with the EM algorithm")
  }

  if(estem){fb = FALSE}

  zoores <- new("zres")
  zoores@nind = length(ids)
  zoores@ids = ids
  zoores@mixc = matrix(0,zoores@nind,length(zmix))
  zoores@krates = matrix(0,zoores@nind,length(zrates))

  if(fb || estem){zoores@realized = matrix(0,zoores@nind,length(zmix))}

  i=NULL
  if(estem & minr < 1) minr = 1
  if(.Platform$OS.type == 'windows'){
    if(zoomodel@typeModel == "mixkr" || zoomodel@typeModel == "MIXKR"){
      xx <- foreach (i=1:length(ids), .combine=c) %do% {
        id <- ids[i]
        run_mixkr(zooin,id, zrates, zmix, opti, estem, fb, vit, gerr, seqerr, minr, maxr, maxiter, convem, localhbd)
      }
    }
    if(zoomodel@typeModel == "mixkl" || zoomodel@typeModel == "MIXKL"){
      xx <- foreach (i=1:length(ids), .combine=c) %do% {
        id <- ids[i]
        run_mixkl(zooin,id, zrates, zmix, opti, estem, fb, vit, gerr, seqerr, minr, maxr, maxiter, convem, localhbd)
      }
    }
    if(zoomodel@typeModel == "kr" || zoomodel@typeModel == "KR"){
      xx <- foreach (i=1:length(ids), .combine=c) %do% {
        id <- ids[i]
        run_kr(zooin,id, zrates, zmix, opti, estem, fb, vit, gerr, seqerr, minr, maxr, maxiter, convem, localhbd)
      }
    }
    if(zoomodel@typeModel == "kl" || zoomodel@typeModel == "KL"){
      xx <- foreach (i=1:length(ids), .combine=c) %do% {
        id <- ids[i]
        run_kl(zooin,id, zrates, zmix, opti, estem, fb, vit, gerr, seqerr, minr, maxr, maxiter, convem, localhbd)
      }
    }
  }else{
    if(zoomodel@typeModel == "mixkr" || zoomodel@typeModel == "MIXKR"){
      xx <- foreach (i=1:length(ids), .combine=c) %dopar% {
        id <- ids[i]
        run_mixkr(zooin,id, zrates, zmix, opti, estem, fb, vit, gerr, seqerr, minr, maxr, maxiter, convem, localhbd)
      }
    }
    if(zoomodel@typeModel == "mixkl" || zoomodel@typeModel == "MIXKL"){
      xx <- foreach (i=1:length(ids), .combine=c) %dopar% {
        id <- ids[i]
        run_mixkl(zooin,id, zrates, zmix, opti, estem, fb, vit, gerr, seqerr, minr, maxr, maxiter, convem, localhbd)
      }
    }
    if(zoomodel@typeModel == "kr" || zoomodel@typeModel == "KR"){
      xx <- foreach (i=1:length(ids), .combine=c) %dopar% {
        id <- ids[i]
        run_kr(zooin,id, zrates, zmix, opti, estem, fb, vit, gerr, seqerr, minr, maxr, maxiter, convem, localhbd)
      }
    }
    if(zoomodel@typeModel == "kl" || zoomodel@typeModel == "KL"){
      xx <- foreach (i=1:length(ids), .combine=c) %dopar% {
        id <- ids[i]
        run_kl(zooin,id, zrates, zmix, opti, estem, fb, vit, gerr, seqerr, minr, maxr, maxiter, convem, localhbd)
      }
    }
  }

  #### transfer list of oneres into zoores object

  if(length(ids) == 1){
    xx <- array(list(xx),1)
  }

  zoores@ids <- unlist(lapply(xx,slot,"num"))
  zoores@modlik <- unlist(lapply(xx,slot,"modlik"))
  zoores@modbic <- unlist(lapply(xx,slot,"modbic"))
  zoores@niter <- unlist(lapply(xx,slot,"niter"))
  zoores@mixc <- do.call("rbind",lapply(xx,slot,"mixc"))
  zoores@krates <- do.call("rbind",lapply(xx,slot,"krates"))
  zoores@optimerr <- unlist(lapply(xx,slot,"optimerr"))
  if(fb || estem){
    zoores@realized <- do.call("rbind",lapply(xx,slot,"realized"))
    if(localhbd){zoores@hbdp <- lapply(xx,slot,"hbdp")}
  }
  if(vit){zoores@hbdseg <- do.call("rbind", lapply(xx,slot,"hbdseg"))}
#  zoores@sampleids <- .GlobalEnv$zooin@sample_ids[zoores@ids]
  zoores@sampleids <- zooin@sample_ids[zoores@ids]
  return(zoores)
}

#### run mixkr model

run_mixkr <- function(zooin,id, zrates, zmix, opti = TRUE, estem = FALSE, fb = FALSE, vit = FALSE,
                      gerr = 0.001, seqerr = 0.001, minr = 0, maxr = 100000000,
                      maxiter = 1000, convem = 1e-10, localhbd = FALSE){
  K <- length(zmix)
#  zrs=NULL;pem=NULL
#  zrs <<- zrates
#  pem <<- pemission(id, gerr, seqerr, zformat = .GlobalEnv$zooin@zformat)
  pem <- pemission(zooin, id, gerr, seqerr, zformat = zooin@zformat)

  if(opti){
    zpar <- array(0,K-1)
    for (j in 1:(K-1)){zpar[j]=log(zmix[j]/zmix[K])}

    niter=NULL;niter <<- 0
    checkoptim=NULL;checkoptim <<- 1
    tryCatch(optires <- optim(zpar, lik_mixkr, zooin = zooin, pem = pem, zrs = zrates, method="L-BFGS-B",
                     control = list(trace=TRUE, REPORT=10000)),
             error=function(e){cat("Warning, error returned by optim - replaced by EM ::",id,"\n"); .GlobalEnv$checkoptim <- 0})
    if(.GlobalEnv$checkoptim ==0){estem = TRUE; fb =FALSE}
    if(.GlobalEnv$checkoptim == 1){
      zmix <- exp(optires$par[1:(K-1)])/(1+sum(exp(optires$par[1:(K-1)])))
      zmix[K] <- 1 - sum(zmix[1:(K-1)])
    }
  }
  if(estem){
    estimrates <- 0
    onerate <- 0
    outem <- EM(zooin,id, pem, zrates, zmix, estimrates, onerate, maxiter, minr, maxr, convem, localhbd)
    print(paste("EM algorithm, iterations and loglik ::",outem[[5]],outem[[3]],sep=" "))
    zmix <- outem[[2]]
  }
  if(fb){
    outfb <- fb(zooin,id,pem,zrates,zmix,localhbd)
  }
  if(vit){
    outvit <- viterbi(zooin,id,pem,zrates,zmix)
  }

  ##### renvoie toujours les paramètres
  zresu <- new("oneres")
  loglik <- 0
  bicv <- 0
  zrates <- round(zrates,3)
  if(opti){
    if(.GlobalEnv$checkoptim == 1){
    loglik <- -optires$value ### optim miminizes -log(lik)
#    bicv <- -2 * loglik + log(.GlobalEnv$zooin@nsnps)*(K-1)}
    bicv <- -2 * loglik + log(zooin@nsnps)*(K-1)}
  }
  if(estem){
    loglik <- outem[[3]]
#    bicv <- -2 * loglik + log(.GlobalEnv$zooin@nsnps)*(K-1)
    bicv <- -2 * loglik + log(zooin@nsnps)*(K-1)
    zrates <- outem[[1]]
    zmix <- outem[[2]]
    .GlobalEnv$niter <- outem[[5]]
    zresu@realized <- outem[[4]]
    if(localhbd){zresu@hbdp <- outem[[6]]}
  }
  if(!estem & !opti){.GlobalEnv$niter <- 0}

  zresu@mixc <- zmix
  zresu@krates <- zrates
  zresu@modlik <- loglik
  zresu@modbic <- bicv
  zresu@num <- id
  zresu@niter <- .GlobalEnv$niter
  zresu@optimerr <- -1
  if(opti){zresu@optimerr <- .GlobalEnv$checkoptim}

  if(fb){
    if(!localhbd){zresu@realized <- outfb}
    if(localhbd){
      zresu@realized <- outfb[[1]]
      zresu@hbdp <- outfb[[2]]
      }
  }
  if(vit){
    zresu@hbdseg <- outvit
  }
  return(zresu)
}

#### run kr model

run_kr <- function(zooin,id, zrates, zmix, opti = TRUE, estem = FALSE, fb = FALSE, vit = FALSE,
                   gerr = 0.001, seqerr = 0.001, minr = 1, maxr = 100000000,
                   maxiter = 1000, convem = 1e-10, localhbd = FALSE){
#  pem=NULL
#  pem <<- pemission(id,  gerr, seqerr, zformat = .GlobalEnv$zooin@zformat)
  pem <- pemission(zooin,id,  gerr, seqerr, zformat = zooin@zformat)
  K <- length(zmix) #### vérifier que length(zmix) = length (zrates)
  if(opti){
    if(K > 2){
      zpar <- array(0,2*K-1)
      zpar[1] <- log(zrates[1])
      for(j in 2:(K-1)){zpar[j] <- log(zrates[j]-zrates[j-1])}
      zpar[K]=log(zrates[K])
      for (j in 1:(K-1)){zpar[K+j]=log(zmix[j]/zmix[K])}
    } else { #### 1R model
      zpar <- array(0,2)
      zpar[1] <- log(zrates[1]) ### the common rate
      zpar[2] <- log(zmix[1]/zmix[2])
    }

    niter=NULL;niter <<- 0
    checkoptim=NULL;checkoptim <<- 1

    if(maxr < 100000000 | minr > 0){
    if(K == 2){
      lowpar <- c(log(minr), -Inf)
      uppar <- c(log(maxr), Inf)
    }
    if(K > 2){
      lowpar <- c(rep(log(minr),K), rep(-Inf,(K-1)))
      uppar <- c(rep(log(maxr),K), rep(Inf,(K-1)))
    }
    tryCatch(optires <- optim(zpar, lik_kr, zooin = zooin, pem = pem, method="L-BFGS-B", lower = lowpar, upper = uppar,
                              control = list(trace=TRUE, REPORT=10000)),
             error=function(e){cat("Warning, error returned by optim - replaced by EM ::",id,"\n"); .GlobalEnv$checkoptim <- 0})
    }else{
      tryCatch(optires <- optim(zpar, lik_kr, zooin = zooin, pem = pem, method="L-BFGS-B",
                                control = list(trace=TRUE, REPORT=10000)),
               error=function(e){cat("Warning, error returned by optim - replaced by EM ::",id,"\n"); .GlobalEnv$checkoptim <- 0})
    }

    if(.GlobalEnv$checkoptim ==0){estem = TRUE; fb =FALSE}

    if(.GlobalEnv$checkoptim == 1){
      if(K > 2){
        zrates[1] <- exp(optires$par[1])
        for (j in 2:(K-1)){zrates[j] <- zrates[j-1] + exp(optires$par[j])}
        zrates[K] <- exp(optires$par[K])
        zmix <- exp(optires$par[(K+1):(2*K-1)])/(1+sum(exp(optires$par[(K+1):(2*K-1)])))
        zmix[K] <- 1 - sum(zmix[1:(K-1)])
      } else {
        zrates <- exp(optires$par[1])
        zrates[2] <- zrates[1]
        zmix[1] <- 1/(1+exp(-optires$par[2]))
        zmix[2] <- 1 - zmix[1]
        }
    }
  }

  if(estem){
    estimrates <- 1
    onerate <- 0
    if (K ==2) onerate <- 1
    if(minr < 1){minr=1}
    outem <- EM(zooin,id, pem, zrates, zmix, estimrates, onerate, maxiter, minr, maxr, convem, localhbd)
    print(paste("EM algorithm, iterations and loglik ::",outem[[5]],outem[[3]],sep=" "))
    zrates <- outem[[1]]
    zmix <- outem[[2]]
  }
    if(fb){
    outfb <- fb(zooin,id,pem,zrates,zmix, localhbd)
  }
  if(vit){
    outvit <- viterbi(zooin,id,pem,zrates,zmix)
  }

  ##### renvoie toujours les paramètres
  zresu <- new("oneres")
  loglik <- 0
  bicv <- 0
  zrates <- round(zrates,3)
  if(opti){
    if (.GlobalEnv$checkoptim == 1){
    loglik <- -optires$value ### optim miminizes -log(lik)
#    if(K > 2)bicv <- -2 * loglik + log(.GlobalEnv$zooin@nsnps)*(2*K-1)
#    if(K == 2)bicv <- -2 * loglik + 2*log(.GlobalEnv$zooin@nsnps)
    if(K > 2)bicv <- -2 * loglik + log(zooin@nsnps)*(2*K-1)
    if(K == 2)bicv <- -2 * loglik + 2*log(zooin@nsnps)
    }}
  if(estem){
    loglik <- outem[[3]]
#    bicv <- -2 * loglik + log(.GlobalEnv$zooin@nsnps)*(K-1)
#    if(K > 2)bicv <- -2 * loglik + log(.GlobalEnv$zooin@nsnps)*(2*K-1)
#    if(K == 2)bicv <- -2 * loglik + 2*log(.GlobalEnv$zooin@nsnps)
    bicv <- -2 * loglik + log(zooin@nsnps)*(K-1)
    if(K > 2)bicv <- -2 * loglik + log(zooin@nsnps)*(2*K-1)
    if(K == 2)bicv <- -2 * loglik + 2*log(zooin@nsnps)
    zrates <- outem[[1]]
    zmix <- outem[[2]]
    .GlobalEnv$niter <- outem[[5]]
    zresu@realized <- outem[[4]]
    if(localhbd){zresu@hbdp <- outem[[6]]}
  }

  if(!estem & !opti){.GlobalEnv$niter <- 0}

  zresu@mixc <- zmix
  zresu@krates <- zrates
  zresu@modlik <- loglik
  zresu@modbic <- bicv
  zresu@num <- id
  zresu@niter <- .GlobalEnv$niter
  zresu@optimerr <- -1
  if(opti){zresu@optimerr <- .GlobalEnv$checkoptim}

  if(fb){
    if(!localhbd){zresu@realized <- outfb}
    if(localhbd){
      zresu@realized <- outfb[[1]]
      zresu@hbdp <- outfb[[2]]
    }
  }
  if(vit){
    zresu@hbdseg <- outvit
  }

  return(zresu)
}

#### run mixkl model

run_mixkl <- function(zooin,id, zrates, zmix, opti = TRUE, estem = FALSE, fb = FALSE, vit = FALSE,
                      gerr = 0.001, seqerr = 0.001, minr = 0, maxr = 100000000,
                      maxiter = 1000, convem = 1e-10, localhbd = FALSE){
  K <- length(zmix)
  #  zrs=NULL;pem=NULL
  #  zrs <<- zrates
  #  pem <<- pemission(id, gerr, seqerr, zformat = .GlobalEnv$zooin@zformat)
  pem <- pemission(zooin, id, gerr, seqerr, zformat = zooin@zformat)

  if(opti){
    zpar <- array(0,K-1)
    #    for (j in 1:(K-1)){zpar[j]=log(zmix[j]/zmix[K])}
    for (j in 1:(K-1)){zpar[j]=log(zmix[j]/(1-zmix[j]))} #### NEW: First difference with MixKR

    niter=NULL;niter <<- 0
    checkoptim=NULL;checkoptim <<- 1
    tryCatch(optires <- optim(zpar, lik_mixkl, zooin = zooin, pem = pem, zrs = zrates, method="L-BFGS-B",
                              control = list(trace=TRUE, REPORT=10000)),
             error=function(e){cat("Warning, error returned by optim - replaced by EM ::",id,"\n"); .GlobalEnv$checkoptim <- 0})
    if(.GlobalEnv$checkoptim ==0){estem = TRUE; fb =FALSE}
    if(.GlobalEnv$checkoptim == 1){
      #      zmix <- exp(optires$par[1:(K-1)])/(1+sum(exp(optires$par[1:(K-1)])))
      zmix <- exp(optires$par[1:(K-1)])/(1+exp(optires$par[1:(K-1)]))
      zmix[K] <- 1 - zmix[K-1] #### NEW: zmix[K] <- 1-zmix[K-1] (just ignore that parameter?)
    }
  }
  if(estem){
    estimrates <- 0
    onerate <- 0
    outem <- EM_lay(zooin,id, pem, zrates, zmix, estimrates, onerate, maxiter, minr, maxr, convem, localhbd)
    print(paste("EM algorithm, iterations and loglik ::",outem[[5]],outem[[3]],sep=" "))
    zmix <- outem[[2]]
  }
  if(fb){
    outfb <- fb_lay(zooin,id,pem,zrates,zmix,localhbd)
  }
  if(vit){
    outvit <- viterbi_lay(zooin,id,pem,zrates,zmix)
  }

  ##### renvoie toujours les paramètres
  zresu <- new("oneres")
  loglik <- 0
  bicv <- 0
  zrates <- round(zrates,3)
  if(opti){
    if(.GlobalEnv$checkoptim == 1){
      loglik <- -optires$value ### optim miminizes -log(lik)
      #    bicv <- -2 * loglik + log(.GlobalEnv$zooin@nsnps)*(K-1)}
      bicv <- -2 * loglik + log(zooin@nsnps)*(K-1)}
  }
  if(estem){
    loglik <- outem[[3]]
    #    bicv <- -2 * loglik + log(.GlobalEnv$zooin@nsnps)*(K-1)
    bicv <- -2 * loglik + log(zooin@nsnps)*(K-1)
    zrates <- outem[[1]]
    zmix <- outem[[2]]
    .GlobalEnv$niter <- outem[[5]]
    zresu@realized <- outem[[4]]
    if(localhbd){zresu@hbdp <- outem[[6]]}
  }

  if(!estem & !opti){.GlobalEnv$niter <- 0}

  zresu@mixc <- zmix
  zresu@krates <- zrates
  zresu@modlik <- loglik
  zresu@modbic <- bicv
  zresu@num <- id
  zresu@niter <- .GlobalEnv$niter
  zresu@optimerr <- -1
  if(opti){zresu@optimerr <- .GlobalEnv$checkoptim}

  if(fb){
    if(!localhbd){zresu@realized <- outfb}
    if(localhbd){
      zresu@realized <- outfb[[1]]
      zresu@hbdp <- outfb[[2]]
    }
  }
  if(vit){
    zresu@hbdseg <- outvit
  }
  return(zresu)
}

#### run kl model

run_kl <- function(zooin,id, zrates, zmix, opti = TRUE, estem = FALSE, fb = FALSE, vit = FALSE,
                   gerr = 0.001, seqerr = 0.001, minr = 1, maxr = 100000000,
                   maxiter = 1000, convem = 1e-10, localhbd = FALSE){
  #  pem=NULL
  #  pem <<- pemission(id,  gerr, seqerr, zformat = .GlobalEnv$zooin@zformat)
  pem <- pemission(zooin,id,  gerr, seqerr, zformat = zooin@zformat)
  K <- length(zmix) #### vérifier que length(zmix) = length (zrates)
  if(opti){
    if(K > 2){
      zpar <- array(0,2*K-2) #### New, we don't need the rates for non-hbd segments
      zpar[1] <- log(zrates[1])
      for(j in 2:(K-1)){zpar[j] <- log(zrates[j]-zrates[j-1])}
      #zpar[K]=log(zrates[K])
      #for (j in 1:(K-1)){zpar[K+j]=log(zmix[j]/zmix[K])}
      for (j in 1:(K-1)){zpar[K+j-1]=log(zmix[j]/(1-zmix[j]))} #### NEW: First difference with MixKR
    } else { #### 1R model
      zpar <- array(0,2)
      zpar[1] <- log(zrates[1]) ### the common rate
      zpar[2] <- log(zmix[1]/zmix[2])
    }

    niter=NULL;niter <<- 0
    checkoptim=NULL;checkoptim <<- 1

    if(maxr < 100000000 | minr > 0){
      if(K == 2){
        lowpar <- c(log(minr), -Inf)
        uppar <- c(log(maxr), Inf)
      }
      if(K > 2){
        lowpar <- c(rep(log(minr),K), rep(-Inf,(K-1)))
        uppar <- c(rep(log(maxr),K), rep(Inf,(K-1)))
      }
      tryCatch(optires <- optim(zpar, lik_kl, zooin = zooin, pem = pem, method="L-BFGS-B", lower = lowpar, upper = uppar,
                                control = list(trace=TRUE, REPORT=10000)),
               error=function(e){cat("Warning, error returned by optim - EM not available with this model ::",id,"\n"); .GlobalEnv$checkoptim <- 0})
    }else{
      tryCatch(optires <- optim(zpar, lik_kl, zooin = zooin, pem = pem, method="L-BFGS-B",
                                control = list(trace=TRUE, REPORT=10000)),
               error=function(e){cat("Warning, error returned by optim - rEM not available with this model ::",id,"\n"); .GlobalEnv$checkoptim <- 0})
    }

    #if(.GlobalEnv$checkoptim ==0){estem = TRUE; fb =FALSE}
    if(.GlobalEnv$checkoptim ==0){estem = FALSE; fb =FALSE} ### CHANGED

    if(.GlobalEnv$checkoptim == 1){
      if(K > 2){
        zrates[1] <- exp(optires$par[1])
        for (j in 2:(K-1)){zrates[j] <- zrates[j-1] + exp(optires$par[j])}
        #zrates[K] <- exp(optires$par[K])
        zrates[K] = zrates[K-1]
        #zmix <- exp(optires$par[(K+1):(2*K-1)])/(1+sum(exp(optires$par[(K+1):(2*K-1)])))
        #zmix[K] <- 1 - sum(zmix[1:(K-1)])
        zmix <- exp(optires$par[(K):(2*K-2)])/(1+exp(optires$par[(K):(2*K-2)])) ### 1:(K-1) rates, K:(2K-2) mixing
        zmix[K] <- 1 - zmix[K-1] #### NEW: zmix[K] <- 1-zmix[K-1] (just ignore that parameter?)
      } else {
        zrates <- exp(optires$par[1])
        zrates[2] <- zrates[1]
        zmix[1] <- 1/(1+exp(-optires$par[2]))
        zmix[2] <- 1 - zmix[1]
      }
    }
  }

  if(estem){
    print(c("EM not implemented with this model!")) ## CHANGED
    #estimrates <- 1
    #onerate <- 0
    #if (K ==2) onerate <- 1
    #if(minr < 1){minr=1}
    #outem <- EM(zooin,id, pem, zrates, zmix, estimrates, onerate, maxiter, minr, maxr, convem, localhbd)
    #print(paste("EM algorithm, iterations and loglik ::",outem[[5]],outem[[3]],sep=" "))
    #zrates <- outem[[1]]
    #zmix <- outem[[2]]
  }
  if(fb){
    outfb <- fb_lay(zooin,id,pem,zrates,zmix, localhbd)
  }
  if(vit){
    outvit <- viterbi_lay(zooin,id,pem,zrates,zmix)
  }

  ##### renvoie toujours les paramètres
  zresu <- new("oneres")
  loglik <- 0
  bicv <- 0
  zrates <- round(zrates,3)
  if(opti){
    if (.GlobalEnv$checkoptim == 1){
      loglik <- -optires$value ### optim miminizes -log(lik)
      #    if(K > 2)bicv <- -2 * loglik + log(.GlobalEnv$zooin@nsnps)*(2*K-1)
      #    if(K == 2)bicv <- -2 * loglik + 2*log(.GlobalEnv$zooin@nsnps)
      if(K > 2)bicv <- -2 * loglik + log(zooin@nsnps)*(2*K-1)
      if(K == 2)bicv <- -2 * loglik + 2*log(zooin@nsnps)
    }}
#  if(estem){
#    loglik <- outem[[3]]
#    #    bicv <- -2 * loglik + log(.GlobalEnv$zooin@nsnps)*(K-1)
#    #    if(K > 2)bicv <- -2 * loglik + log(.GlobalEnv$zooin@nsnps)*(2*K-1)
#    #    if(K == 2)bicv <- -2 * loglik + 2*log(.GlobalEnv$zooin@nsnps)
#    bicv <- -2 * loglik + log(zooin@nsnps)*(K-1)
#    if(K > 2)bicv <- -2 * loglik + log(zooin@nsnps)*(2*K-1)
#    if(K == 2)bicv <- -2 * loglik + 2*log(zooin@nsnps)
#    zrates <- outem[[1]]
#    zmix <- outem[[2]]
#    .GlobalEnv$niter <- outem[[5]]
#    zresu@realized <- outem[[4]]
#    if(localhbd){zresu@hbdp <- outem[[6]]}
#  }

  if(!estem & !opti){.GlobalEnv$niter <- 0}

  zresu@mixc <- zmix
  zresu@krates <- zrates
  zresu@modlik <- loglik
  zresu@modbic <- bicv
  zresu@num <- id
  zresu@niter <- .GlobalEnv$niter
  zresu@optimerr <- -1
  if(opti){zresu@optimerr <- .GlobalEnv$checkoptim}

  if(fb){
    if(!localhbd){zresu@realized <- outfb}
    if(localhbd){
      zresu@realized <- outfb[[1]]
      zresu@hbdp <- outfb[[2]]
    }
  }
  if(vit){
    zresu@hbdseg <- outvit
  }

  return(zresu)
}

Try the RZooRoH package in your browser

Any scripts or data that you put into this service are public.

RZooRoH documentation built on Oct. 27, 2023, 9:07 a.m.