SpTMixture: Mixture models with spatio-temporal dependencies

Documented in estimEM

#' Wrapper function for EM estimation
#'
#' Wrapper function for EM estimation
#' @param df.sptmod model data
#' @param method method to be used
#' @param print.res logical. printing or not the steps values.
#' @keywords estimation
#' @export
#' @examples
#' data("wq_analysis_week2")
#' SPTMData(wq.raw.obs, frequency = "quarter")
estimEM = function(df.sptmod, method = 'simple', theta.init = NULL, print.res = FALSE, debug = FALSE, technic = "closed", hessian = FALSE){

  p.comp = df.sptmod$lu

  p.0 = p.comp

  season = df.sptmod$data$season

  season.sites=season

  season.lu = updateSeason(season.sites, p.sites = p.comp)

  y = df.sptmod$data$obs.data

  if(method == 'simple'){
    epsilon = 10

    theta.mean = rnorm(ncol(p.comp), mean = rep(mean(y$obs, na.rm=T),ncol(p.comp)),
                       sd = rep(mean(abs(y$obs), na.rm=T),ncol(p.comp))/10)
    theta.season = rnorm(ncol(season.lu)-ncol(p.comp), mean = 1, sd = 0.1)

    if(!is.null(df.sptmod$data$basis)){
      theta.season = rnorm(ncol(df.sptmod$data$basis)*ncol(p.comp), mean = 1, sd = 0.5)
    }

    theta.sigma = rnorm(ncol(p.comp), mean = 0.5, sd = 0.05)
    if(is.null(theta.init)){
      theta.0 = c(theta.mean, theta.season, theta.sigma)
    }else{theta.0 = theta.init}


    #meanFunc(theta=theta.0, n.mixt = ncol(p.comp), season.sites = season.sites, trend.sites = trend.sites, p.sites = p.comp)

    type = "stl"
    if(!is.null(df.sptmod$data$basis)){type = "splines"}

#browser()
# Q_em(theta=theta.0, y=y, p.theta = p.comp, mean_func = meanFunc, p.sites = p.comp, season.sites=season.sites, trend.sites = trend.sites)

    if(debug){browser()}

    th = .001 / length(theta.0)
    theta.h = NULL
    while(epsilon > th){
      conv = 1
      theta.i = theta.0
      Q.prev = 0
      ite = 0
      if(technic == "optim"){
      while(conv>0 & ite < 10){
        ite = ite + 1

        if(type == "stl"){
          Q.est = optim(par = theta.i, fn = Q_em, control=list(fnscale=-1, maxit = 5000), hessian = FALSE,y=y, p.theta = p.comp,p.0 = p.0,
                        mean_func = meanFunc,
                        n.mixt = ncol(p.comp),p.sites = p.comp,season.sites = season.sites)
        }
        if(type == "splines"){
          Q.est = optim(par = theta.i, fn = Q_em, control=list(fnscale=-1), hessian = FALSE,y=y, p.theta = p.comp,p.0 = p.0,
                        mean_func = meanFuncSplines,n.mixt = ncol(p.comp), basis = df.sptmod$data$basis)
        }

        theta.i = Q.est$par
        conv = Q.est$convergence
        if(print.res==TRUE){print(conv)
        print(Q.est$value)
}
        if(abs(Q.prev - Q.est$value)<0.001){conv=0}
        Q.prev = Q.est$value
      }
      }

      # Q_em(theta = theta.0, y = y ,p.theta = p.comp, p.0 = p.0, mean_func = meanFunc, n.mixt = ncol(p.comp),p.sites = p.comp,season.sites = season.sites)


      # Q_em(theta = Q.est$par, y = y ,p.theta = p.comp, p.0 = p.0, mean_func = meanFunc, n.mixt = ncol(p.comp),p.sites = p.comp,season.sites = season.sites)

      if(technic == "closed"){
        season.lu = updateSeason(season.sites, p.sites = p.comp)
        theta.i = NULL
        for(j in 1:ncol(p.comp)){
          idx.s = (0:(length(attr(season.lu,"nameBasis"))-1))*ncol(p.comp) + j
          Bj = season.lu[,idx.s]
          ypp = Bj %*% theta.0[c(idx.s)]
          Y = matrix(y$obs, nrow = nrow(Bj), byrow=T)
          theta.j.f = nna=NULL
          for(jj in 1:ncol(Y)){
            Yc = Y[,jj]
            Ycj = Yc[!is.na(Yc)]
            Bjj = Bj[!is.na(Yc),]
            nna = c(nna,sum(!is.na(Yc)))
            theta.j.temp = (ginv(t(Bjj) %*% Bjj)) %*% (t(Bjj) %*% Ycj)
            theta.j.f = cbind(theta.j.f, theta.j.temp)
#              plot(Ycj)
#              points(Bjj %*% theta.j.temp, type="l", col = "red")
#              points(Bjj %*% Q.est$par[idx.s], type="l", col = "green")
          }
          pp = p.comp[,j] / sum(p.comp[,j])
          theta.j = theta.j.f %*% pp

            Yp = (Bj %*% theta.j)[,rep(1,nrow(p.comp))]
            #Ypp = (Bj %*% Q.est$par[idx.s])[,rep(1,8)]
            #points(Bj %*% Q.est$par[idx.s], type="l", col = "green")
            mat = colMeans((Y - Yp)^2, na.rm=T)
            sigma.j = mat %*% pp
            theta.i = cbind(theta.i, c(theta.j, log(sqrt(sigma.j))))
        }
        theta.i = c(matrix(t(theta.i), ncol = 1, byrow=T))
      }

      # Q_em(theta = theta.i, y = y ,p.theta = p.comp, p.0 = p.0, mean_func = meanFunc, n.mixt = ncol(p.comp),p.sites = p.comp,season.sites = season.sites)


      if(type == "stl"){
        p.comp = p_em(theta = theta.i, y=y, p.0 = p.0,mean_func = meanFunc,n.mixt = ncol(p.comp),
                   p.sites = p.comp,season.sites = season.sites)
      }
      if(type == "splines"){
        p.comp = p_em(theta = theta.i, y=y, p.0 = p.0,mean_func = meanFuncSplines,n.mixt = ncol(p.comp),
                      basis = df.sptmod$data$basis)
      }

      epsilon = mean((theta.i - theta.0)^2)
      theta.0 = theta.i
      theta.h = rbind(theta.h, theta.i)
      if(print.res == TRUE){print(epsilon)}

    }

    Q = function(x){
      Q_em(theta = x, y = y ,p.theta = p.comp, p.0 = p.0, mean_func = meanFunc, n.mixt = ncol(p.comp),p.sites = p.comp,season.sites = season.sites)
    }

    Hessian = NULL
    if(hessian){
      Hessian = numDeriv::hessian(func = Q, x = theta.0)
    }
    RET = list(Result = list(theta = theta.0, latent = p.comp, Hessian = Hessian),
               seasonSites = season.sites,
               data = df.sptmod)
  }


  if(method == 'spat'){

  }


class(RET) <- 'sptmRes'
 return(RET)
}