#' @title Fit finite mixture von Bertalanffy growth model.
#' @description \code{vb_growth_mix} fits sex-specific growth models where some of the animals are of unknown sex. Optimisation is via the Expectation-Maximisation algorithm. Equality constraints across sexes can be implemented for any combination of parameters using the \code{binding} argument.
#' @param start.list A list with a list called par containing starting values for: "mixprop", "growth.par" (see Examples).
#' @param data A data.frame with columns: "age", "length" and "obs.sex". "obs.sex" must have values "female", "male", "unclassified".
#' @param binding A (4x2) parameter index matrix with rows named (in order): "lnlinf", "lnk", "lnnt0", "lnsigma" and the left column for the female parameter index and right column for male parameter index. Used to impose arbitrary equality constraints across the sexes (see Examples).  
#' @param maxiter.em Integer for maximum number of EM iterations (1e3 default).
#' @param reltol Relative tolerance for EM observed data log likelihood convergence (1e-8 default).
#' @param plot.fit Logical, if TRUE fit plotted per iteration. Red and blue circles are used for known females and males, respectively. Unclassified animals are plotted as triangle with the colour indicating the expected probability of being female or male (FALSE default).
#' @param verbose Logical, if TRUE iteration and observed data log-likelihood printed.
#' @param optim.method Character, complete data optimisation method to use in \code{optim}.
#' @param estimate.mixprop Logical, if TRUE the mixing proportion is estimated, otherwise fixed at the starting value.
#' @param distribution Character with options: "normal" or "lognormal".
#' @return List containing the components:
#' \item{logLik.vec}{Observed data log-likelihood at each iteration.}
#' \item{logLik}{Observed data log-likelihood on the last EM iteration.}
#' \item{complete_data}{Data frame of the data (re-ordered) with component probabilities (tau).}
#' \item{coefficients}{Parameter estimates (on the real line) and associated standard errors on the real line.}
#' \item{vcov}{Estimated variance covariance matrix of the parameters estimated on the real line. Can be used to obtain parameter standard errors on the natural scale.}
#' \item{convergence}{Binary with a "0" denoting convergence of the EM algorithm.}
#' @source Minto, C., Hinde, J. and Coelho, R. (2017). Including unsexed individuals in sex-specific growth models.
#' \emph{Canadian Journal of Fisheries and Aquatic Sciences}. DOI: 10.1139/cjfas-2016-0450.
#' @examples
#' set.seed(1010)
#' sim.dat <- sim_vb_data(nfemale = 50, nmale = 50, mean_ageF = 4, mean_ageM = 4,
#'                       growth_parF = c(linf = 30, k = 0.5, t0 = -1, sigma = 0.1),
#'                       growth_parM = c(linf = 25, k = 0.5, t0 = -1, sigma = 0.1),
#'                       mat_parF = c(A50 = 5, MR = 2), mat_parM = c(A50 = 3, MR = 2),
#'                       distribution = "lognormal")
#' ## Model fit with contrained Brody's growth coefficient
#' ## Set up the constraint
#' binding <- matrix(c(1:2, rep(3, 2), 4:7), ncol = 2, byrow = TRUE)
#' rownames(binding) <- c("lnlinf", "lnk", "lnnt0", "lnsigma")
#' colnames(binding) <- c("female", "male")
#' ## note: lnnt0 is the natural logarithm of the negative of t0 (t0 < 0)
#' ## starting values 
#' start.par <- c(c(log(30), log(25)), rep(log(0.3), 1), rep(log(1), 2), rep(log(.1), 2))
#' start.list <- list(par = list(mixprop = 0.5, growth.par = start.par))
#' vb.bind.fit <- vb_growth_mix(data = sim.dat, start.list = start.list,
#'                              binding = binding, distribution = "lognormal",
#'                              reltol = 1e-6)
#' @importFrom grDevices colorRampPalette
#' @importFrom graphics lines mtext plot
#' @importFrom stats dlnorm dnorm optim plogis qlogis rbinom rlnorm rnbinom rnorm
#' @export

vb_growth_mix <- function(start.list, data, binding, maxiter.em = 1e3, reltol = 1e-8, plot.fit = FALSE, verbose = TRUE, optim.method = "BFGS", estimate.mixprop = TRUE, distribution){
  ## check mixprop starting values
  if(!"mixprop" %in% names(start.list[["par"]])){
    stop("No starting value for mixing proportion provided, specify 'mixprop = value' in start.list list")
  ## check length of the starting parameters
  if(max(binding) != length(start.list[["par"]][["growth.par"]])){
    stop("Mismatch in the length of growth.par and that specified by binding.")
  ## observed log-likelihood container
  ollike <- rep(NA, maxiter.em)
  ## if plotting set up some variables
    data$jitter.age <- jitter(data$age)
  ## split the data 
  classified.data <- data[data$obs.sex %in% c("female", "male"), ]
  unclassified.data <- data[data$obs.sex == "unclassified", ]
  ## define growth functions
  female_growth_fit <- function(x){linfF * (1 - exp(-kF * (x - t0F)))}
  male_growth_fit <- function(x){linfM * (1 - exp(-kM * (x - t0M)))}
  for(i in 1:maxiter.em){
      par <- start.list[["par"]]
    ## E-STEP
    growth.par <- par[["growth.par"]]
    linfF <- exp(growth.par[binding["lnlinf", "female"]])
    linfM <- exp(growth.par[binding["lnlinf", "male"]])
    kF <- exp(growth.par[binding["lnk", "female"]])
    kM <- exp(growth.par[binding["lnk", "male"]])
    t0F <- - exp(growth.par[binding["lnnt0", "female"]])
    t0M <- - exp(growth.par[binding["lnnt0", "male"]])
    sigmaF <- exp(growth.par[binding["lnsigma", "female"]])
    sigmaM <- exp(growth.par[binding["lnsigma", "male"]])    
    mixprop <- par[["mixprop"]]
    ## unclassified means
    muF.unclass <- female_growth_fit(unclassified.data$age)
    muM.unclass <- male_growth_fit(unclassified.data$age)
    ## classified means
    muF.class <- female_growth_fit(classified.data$age)
    muM.class <- male_growth_fit(classified.data$age)
    ## classified data (known)
    classified.data$tau <- ifelse(classified.data$obs.sex == "female", 1, ifelse(classified.data$obs.sex == "male", 0, NA))
    ## classification for unclassified data (missing)
    unclassified.data$tau <- get_growth_post_prob(mixprop = mixprop, muF = muF.unclass, muM = muM.unclass, sigmaF = sigmaF, sigmaM = sigmaM, data = unclassified.data, distribution = distribution)
    ## make the complete data
    complete.data <- rbind(classified.data, unclassified.data)
    if(distribution == "normal"){
      ll.F.class <- sum(classified.data$obs.sex == "female") * log(mixprop) +
        sum(dnorm(classified.data$length, mean = muF.class, sd = sigmaF, log=TRUE)[classified.data$obs.sex == "female"])
      ll.M.class <- sum(classified.data$obs.sex == "male") * log(1 - mixprop) +
        sum(dnorm(classified.data$length, mean = muM.class, sd = sigmaM, log=TRUE)[classified.data$obs.sex == "male"])
      ## unclassified component - finite mixture density
      ll.miss <- sum(log(
                       mixprop * dnorm(unclassified.data$length, mean = muF.unclass, sd = sigmaF) +
                       (1-mixprop) * dnorm(unclassified.data$length, mean = muM.unclass, sd = sigmaM)))
    if(distribution == "lognormal"){
      ## female classified
      ll.F.class <- sum(classified.data$obs.sex == "female") * log(mixprop) +
        sum(dlnorm(classified.data$length, meanlog = log(muF.class) - sigmaF^2 / 2, sdlog = sigmaF, log=TRUE)[classified.data$obs.sex == "female"])
      ## male classified
      ll.M.class <- sum(classified.data$obs.sex == "male") * log(1 - mixprop) +
        sum(dlnorm(classified.data$length, meanlog = log(muM.class) - sigmaM^2 / 2, sdlog = sigmaM, log=TRUE)[classified.data$obs.sex == "male"])
      ## unclassified component - finite mixture density
      ll.miss <- sum(log(
                       mixprop * dlnorm(unclassified.data$length, meanlog = log(muF.unclass) - sigmaF^2 / 2, sdlog = sigmaF) +
                       (1-mixprop) * dlnorm(unclassified.data$length, meanlog = log(muM.unclass) - sigmaM^2 / 2, sdlog = sigmaM)))
    ollike[i] <- ll.F.class + ll.M.class + ll.miss
    ## PLOT
      tau.col <- col.vec[cut(complete.data$tau, breaks)]
      par(mfrow=c(1, 1), mar = c(2, 2, 1, 1), oma = c(2, 2, 1, 1))
      age.pred <- seq(min(complete.data$jitter.age), max(complete.data$jitter.age), length=50)
      plot(complete.data$jitter.age, complete.data$length,
           pch=ifelse(complete.data$obs.sex=="unclassified",17, 19),
           col=paste(tau.col,40, sep=""),
           ylim=c(0, max(complete.data$length)),
           xlim=c(0, max(complete.data$jitter.age)),
           xlab="", ylab="")
      lines(age.pred, female_growth_fit(age.pred), col = "red")
      lines(age.pred, male_growth_fit(age.pred), col = "blue")
      mtext(side = 2, line = 2.5, text = "Length")
    ## Mixing proportion
      mixprop <- sum(complete.data$tau)/length(complete.data$tau)
    par[["mixprop"]] <- mixprop
    complete.data$weights <- complete.data$tau
    vb_fit <- optim(vb_bind_nll, par = growth.par, gr = vb_bind_gr, binding = binding, data = complete.data, method = optim.method, distribution = distribution)
    par[["growth.par"]] <- vb_fit$par
    ## OUTPUT
      cat(paste("EM iteration:", i, "|", "Observed data log-likelihood: ", ollike[i], "\n"))
      if(abs(ollike[i] - ollike[i-1]) <  abs(ollike[i-1] * reltol) | i == maxiter.em){
        ## one fit of observed data log-likelihood
        oll <- function(theta, estimate.mixprop, distribution){
          linfF <- exp(theta[binding["lnlinf", "female"]])
          linfM <- exp(theta[binding["lnlinf", "male"]])
          kF <- exp(theta[binding["lnk", "female"]])
          kM <- exp(theta[binding["lnk", "male"]])
          t0F <- - exp(theta[binding["lnnt0", "female"]])
          t0M <- - exp(theta[binding["lnnt0", "male"]])
          sigmaF <- exp(theta[binding["lnsigma", "female"]])
          sigmaM <- exp(theta[binding["lnsigma", "male"]])
            mixprop <- plogis(theta[max(binding) + 1])
            mixprop <- mixprop
          ## predicted means
          ## unclassified
          muF.unclass <- linfF * (1 - exp(-kF * (unclassified.data$age - t0F))) 
          muM.unclass <- linfM * (1 - exp(-kM * (unclassified.data$age - t0M))) 
          ## classified 
          muF.class <- linfF * (1 - exp(-kF * (classified.data$age - t0F))) 
          muM.class <- linfM * (1 - exp(-kM * (classified.data$age - t0M))) 
          if(distribution == "normal"){
            ## female classified
            ll.F.class <- sum(classified.data$obs.sex == "female") * log(mixprop) +
              sum(dnorm(classified.data$length, mean = muF.class, sd = sigmaF, log=TRUE)[classified.data$obs.sex == "female"])
            ## male classified
            ll.M.class <- sum(classified.data$obs.sex == "male") * log(1 - mixprop) +
              sum(dnorm(classified.data$length, mean = muM.class, sd = sigmaM, log=TRUE)[classified.data$obs.sex == "male"])
            ## unclassified component - finite mixture density
            ll.miss <- sum(log(
                             mixprop * dnorm(unclassified.data$length, mean = muF.unclass, sd = sigmaF) +
                             (1-mixprop) * dnorm(unclassified.data$length, mean = muM.unclass, sd = sigmaM)))
          if(distribution == "lognormal"){
            ## female classified
            ll.F.class <- sum(classified.data$obs.sex == "female") * log(mixprop) +
              sum(dlnorm(classified.data$length, meanlog = log(muF.class) - sigmaF^2 / 2, sdlog = sigmaF, log=TRUE)[classified.data$obs.sex == "female"])
            ## male classified
            ll.M.class <- sum(classified.data$obs.sex == "male") * log(1 - mixprop) +
              sum(dlnorm(classified.data$length, meanlog = log(muM.class) - sigmaM^2 / 2, sdlog = sigmaM, log=TRUE)[classified.data$obs.sex == "male"])
            ## unclassified component - finite mixture density
            ll.miss <- sum(log(
                             mixprop * dlnorm(unclassified.data$length, meanlog = log(muF.unclass) - sigmaF^2 / 2, sdlog = sigmaF) +
                             (1-mixprop) * dlnorm(unclassified.data$length, meanlog = log(muM.unclass) - sigmaM^2 / 2, sdlog = sigmaM)))
          oll <- ll.F.class + ll.M.class + ll.miss
          oll.fit <- optim(fn = oll, par = c(par[["growth.par"]], qlogis(mixprop)), hessian = TRUE, control = list(maxit = 1e4),  estimate.mixprop = TRUE, distribution = distribution, method = optim.method)
          oll.fit <- optim(fn = oll, par = c(par[["growth.par"]]), hessian = TRUE, control = list(maxit = 1e4),  estimate.mixprop = FALSE, distribution = distribution, method = optim.method)
        ## check to make sure final optim fit close to EM
        if(!(round(-oll.fit$value / ollike[i], 4) == 1)){
          warning(paste("EM solution and optim solution differ by ", -oll.fit$value - ollike[i], ", final parameter values may differ from final EM values.", sep = ""))
        ## collate the final estimates
        theta <- oll.fit$par
        par.vcov <- solve(oll.fit$hessian)
        par.se <- sqrt(diag(par.vcov))
        female.pars <- c(oll.fit$par[binding[, "female"]], oll.fit$par[max(binding) + 1])
        female.se <- c(par.se[binding[, "female"]], par.se[max(binding) + 1])
        male.pars <- c(oll.fit$par[binding[, "male"]], - oll.fit$par[max(binding) + 1])
        male.se <- c(par.se[binding[, "male"]], par.se[max(binding) + 1])
        theta.df <- data.frame(Parameter = c(rownames(binding), "logitpi"),
                               Female = female.pars,
                               Female.Std.Error = female.se,
                               Male = male.pars,
                               Male.Std.Error = male.se)
        res <- list()
        res$logLik.vec <- ollike[1:i]
        res$logLik <- ollike[i]
        res$complete.data <- complete.data
        res$coefficients <- theta.df
        res$vcov <- par.vcov
        res$convergence <- ifelse(i == maxiter.em, 1, 0)
    ## clean-up within iteration
    rm(list = ls()[!ls() %in% c("classified.data", "unclassified.data","maxiter.em","par", "ollike","reltol","plot.fit", "col.vec", "breaks", "verbose", "vb_bind_nll", "binding", "optim.method", "estimate.mixprop", "distribution", "female_growth_fit", "male_growth_fit")])

