SpTMixture: Mixture models with spatio-temporal dependencies

Documented in gibbSPTM

#' Gibbs sampler with sptial Potts model for neighbouring behaviours
#'
#' Gibbs sampler with sptial Potts model for neighbouring behaviours
#' @param theta parameters to be estimated
#' @param y data
#' @param p.sites the composition of each site.
#' @param season.sites season data for each site.
#' @param Coords Coordinates of the sites.
#' @param Bds Matrix of edges between sites.
#' @param ID ID vector of the sites.
#' @param N.run Number of run of the sampler.
#' @param beta.range Range of beta for the Potts model.
#' @keywords Gibbs sampler
#' @export
#' @examples
#' data("wq_analysis_week2")
#' SPTMData(wq.raw.obs, frequency = "quarter")
gibbSPTM = function(priors= list(theta = list(m0 = 0, s0 = 1), sigma2 = list(a0=2, b0=1)), y, p.sites, season.sites, basis, Coords=NULL ,Bds=NULL, ID, N.run = 10000,
                    beta.range = c(0.5,3), meanFunc = NULL, debug = FALSE, print.res = FALSE, ...){




  theta0 = c(priors$theta$m0, priors$sigma2$b0 / (priors$sigma2$a0-1))

  n.sites = nrow(p.sites)

  n.lu = ncol(p.sites)

  n.param = length(theta0)

  n.season = dim(season.sites)[2]

  if(length(dim(season.sites))==2){
    n.season=1
  }

  type = "stl"
  if(!is.null(basis)){type = "splines"}

  beta.max.min = beta.range

  p.hist = array(0, dim = c(n.sites, n.lu, N.run))

  theta.hist = matrix(0, nrow = N.run, ncol = n.param)

  z.hist = array(0, c(n.sites, n.lu, N.run))

  beta.hist = matrix(0, nrow = N.run, ncol = 1)

  p.hist[,,1] =as.matrix(p.sites)

  theta.hist[1,] = theta0

  beta.hist[1] = runif(1,beta.max.min[1],beta.max.min[2])

  idx.max = apply(p.sites, 1,which.max)

  for(li in 1:nrow(p.sites)){
    z.hist[li,idx.max[li],1] = 1
  }

  n.colors = ncol(p.sites)

  for(t in 2:N.run){

    sigma = theta.hist[t-1, (length(theta0) - n.lu + 1):length(theta0)]

    beta = beta.hist[t-1]

    m =  meanFunc(theta.hist[t-1,], n.mixt = n.lu, priors$z$a, season.sites)

    idx.nan = which(is.nan(colSums(m)))

    # Updating beta

    zz = z.hist[,,t-1]

    zz.temp = apply(zz,1,which.max)

    out = MurraySchemePotts(beta.init = beta, Coords = Coords ,Nb = Bds ,n.colors = n.colors, col.obs = zz.temp, N.run = 100, range.beta = beta.max.min)

    beta.hist[t] = mean(out$beta.sample, na.rm=T)


    # Updating z

    if(!is.null(Coords)){Bds = getBonds(as.matrix(Coords), NN = 4, th = 2)}

    if(debug){browser()}

    vois = getCanStatF(Bds, zz.temp, n.colors)

    for(i in 1:n.sites){
      l.prob = NULL

      y.p = y[y$ID == ID[i], ]

      for(j in 1:n.lu){

        y.m = m[,j]
        n.na = sum(!is.na(y.p$obs))

        py.cx = sum(dnorm(y.p$obs, y.m , sigma[j], log = T), na.rm=T)+ n.na*(log(sqrt(2*pi)*sigma[j]))

        #py.cx = sum(dnorm(y.p$obs, y.m , sigma[j], log = T), na.rm=T)* sqrt(2*pi*sigma[j]^2)

        # l.prob = c(l.prob, py.cx  + beta * vois[j] + max(log(p.hist[i,j,t-1]), -1*10e100, na.rm=T))

        l.prob = c(l.prob, py.cx  + beta * vois[i,j])
      }

      l.prob[idx.nan] <- -Inf

      max.l = max(l.prob)

      logprob = (l.prob - max.l) + log(p.hist[i,,t-1])

      prob = exp(logprob - max(logprob))

      prob = prob / sum(prob)

      z.hist[i,,t] = 0
      z.hist[i,sample(1:n.lu,1, prob = prob),t] = 1
    }

    n.t = colSums(z.hist[,,t])

    # Updating pi


    for(i in 1:n.sites){
      alpha = priors$z$a[i,] + z.hist[i,,t] / n.colors
      p.hist[i,,t] = rdirichlet(c(as.matrix(alpha)))
    }

    # Updating mu_k

    season.lu = updateSeason(season.sites, p.sites = z.hist[,,t])

    l.j = NULL
    for(j in 1:n.lu){

      index.j = ID[which(z.hist[,j,t]==1)]

      idx.Xlu = j + (0:((length(theta0) - n.lu)/n.lu-1))*n.lu
      idx.jlu = 0:(n.season-1)*n.colors + j
      Xlu = season.lu[,idx.jlu]

      if(length(index.j)>0){

        y.p = y[y$ID %in% index.j,]

        l.j = c(l.j,length(y.p$obs))

        Xs = kronecker(Xlu,matrix(1,ncol = 1, nrow = length(index.j)))

        idx.na = which(!is.na(y.p$obs))
        Xs = Xs[idx.na,]
        est.b = ginv(t(Xs) %*% Xs) %*% t(Xs) %*% y.p$obs[idx.na]

        theta.hist[t,idx.Xlu] = est.b


      }else{

        l.j = c(l.j, 0)

        idx.Xlu = j + (0:((length(theta0) - n.lu)/n.lu-1))*n.lu

        theta.hist[t,idx.Xlu] = theta.hist[t-1,idx.Xlu]
      }


    }

    n.j = colSums(z.hist[,,t])

    var.post = (1 / priors$theta$s0 + rep(n.j, n.season) / theta.hist[t-1,(length(theta0) - n.lu + 1):length(theta0)])^(-1)

    mean.post = var.post * (priors$theta$m0 / priors$theta$s0 + n.j * theta.hist[t,1:(n.season*n.colors)] / theta.hist[t-1,(length(theta0) - n.lu + 1):length(theta0)])

    theta.hist[t,1:(n.season*n.colors)] = rnorm(n.season*n.colors,mean.post, sd=sqrt(var.post))


    # Updating sigma_k

    m =  meanFunc(theta.hist[t,], n.mixt = n.lu, z.hist[,,t], season.sites)

    y.s = NULL
    for(i in ID){
      idx.i = which(ID == i)
      idx.j = which(z.hist[idx.i,,t] == 1)
      y.p = y[which(y$ID == i),]
      y.s = c(y.s,m[,idx.j])
    }
    y.s.t = matrix(y.s, nrow = length(ID), byrow = T)
    y.s = matrix(y.s.t, ncol = 1)

    for(j in 1:n.lu){

      idx.obs = which(y$ID %in% ID[which(z.hist[,j,t] == 1)])

      mean.se = 0

      if(length(idx.obs)>0){mean.se = mean((y.s[idx.obs] - y$obs[idx.obs])^2, na.rm=T)}

      n.obs= length(which(z.hist[,j,t] == 1)) #length(idx.obs) - sum(is.na(y$obs[idx.obs]))

      a = priors$sigma2$a0[j] +n.obs/2
      b = n.obs*mean.se /2 + priors$sigma2$b0[j]

      #       mean(rigamma(10000,a,b));sd(rigamma(10000,a,b));
      #       mean(rigamma(10000,priors$sigma2$a0[j],priors$sigma2$b0[j]));sd(rigamma(10000,priors$sigma2$a0[j],priors$sigma2$b0[j]));

      theta.hist[t,(length(theta0) - n.lu + 1):length(theta0)][j] = rigamma(1,a,b)
    }

    #theta.hist[t,ncol(theta.hist)] = sd.t
    if(round(t/100) == (t/100)  & print.res){print(t)}
  }


  RET = list(theta = theta.hist, z= z.hist, p = p.hist, beta = beta.hist, priors = priors)

  return(RET)

}