R/tseriesca.R

Defines functions tseriesca

Documented in tseriesca

tseriesca <-
function(data,maxiter=500,burnin=floor(0.1*maxiter),thinning=5,scale=TRUE,
           level=FALSE,trend=TRUE,deg=2,c0eps=2,c1eps=1,c0beta=2,
           c1beta=1,c0alpha=2,c1alpha=1,priora=TRUE,pia=0.5,q0a=1,
           q1a=1,priorb=TRUE,q0b=1,q1b=1,a=0.25,b=0,indlpml=FALSE){
  
  # Function that performs the time series clustering algorithm
  # described in Nieto-Barajas and Contreras-Cristan (2014) "A Bayesian
  # Non-Parametric Approach for Time Series Clustering". Bayesian 
  # Analysis, Vol. 9, No. 1 (2014) pp.147-170". This function is 
  # designed for annual time series data. 
  #
  # IN:
  #
  # data    <- Data frame with the time series information.
  # maxiter <- Maximum number of iterations for Gibbs sampling.
  #            Default value = 2000.
  # burnin  <- Burn-in period of the Markov Chain generated by Gibbs
  #            sampling. 
  # thinning <- Number that indicates how many Gibbs sampling simulations
  #             should be skipped to form the Markov Chain.
  # scale   <- Flag that indicates if the time series data should be scaled to the
  #            [0,1] interval with a linear transformation as proposed by 
  #            Nieto-Barajas and Contreras-Cristan (2014). If TRUE, then the time
  #            series are scaled to the [0,1] interval. 
  # level   <- Flag that indicates if the level of the time 
  #            series will be considered for clustering. If TRUE, then it 
  #            is taken into account.
  # trend   <- Flag that indicates if the polinomial trend of 
  #            the model will be considered for clustering. 
  #            If TRUE, then it is taken into account. 
  # deg     <- Degree of the polinomial trend of the model. 
  #            Default value = 2. 
  # c0eps   <- Shape parameter of the hyper-prior distribution 
  #            on sig2eps. Default value = 2.
  # c1eps   <- Rate parameter of the hyper-prior distribution 
  #            on sig2eps. Default value = 1.
  # c0beta  <- Shape parameter of the hyper-prior distribution 
  #            on sig2beta. Default value = 2.
  # c1beta  <- Rate parameter of the hyper-prior distribution 
  #            on sig2beta. Default value = 1.
  # c0alpha <- Shape parameter of the hyper-prior distribution 
  #            on sig2alpha. Default value = 2.
  # c1alpha <- Rate parameter of the hyper-prior distribution 
  #            on sig2alpha. Default value = 1.
  # priora  <- Flag that indicates if a prior on parameter "a" is
  #            to be assigned. If TRUE, a prior on "a" is assigned.
  #            Default value = FALSE.
  # pia     <- Mixing proportion of the prior distribution on parameter 
  #            "a". Default value = 0.5.
  # q0a     <- Shape parameter of the continuous part of the prior
  #            distribution on parameter "a". Default value = 1.
  # q1a     <- Shape parameter of the continuous part of the prior
  #            distribution on parameter "a". Default value = 1.
  # priorb  <- Flag that indicates if a prior on parameter "b" is
  #            to be assigned. If TRUE, a prior on "b" is assigned.
  #            Default value = FALSE.
  # q0b     <- Shape parameter of the prior distribution on parameter 
  #            "b". Default value = 1.
  # q1b     <- Shape parameter of the prior distribution on parameter 
  #            "b". Default value = 1.
  # a       <- Initial/fixed value of parameter "a". 
  #            Default value = 0.25.
  # b       <- Initial/fixed value of parameter "b". 
  #            Default value = 0.
  # indlpml <- Flag that indicates if the LPML is to be calculated.
  #            If TRUE, LPML is calculated. Default value = FALSE.
  # 
  # OUT:
  #
  # mstar   <- Number of groups of the chosen cluster configuration.
  # gnstar  <- Array that contains the group number to which each time
  #            series belongs. 
  # HM      <- Heterogeneity Measure of the chosen cluster configuration.
  # arrho   <- Acceptance rate of the parameter "rho".
  # ara     <- Acceptance rate of the parameter "a".
  # arb     <- Acceptance rate of the parameter "b".
  # sig2epssample <- Matrix that in its columns contains the sample of each sig2eps_i's posterior distribution after Gibbs sampling.
  # sig2alphasample <- Matrix that in its columns contains the sample of each sig2alpha_i's posterior distribution after Gibbs sampling.
  # sig2betasample <- Matrix that in its columns contains the sample of each sig2beta_i's posterior distribution after Gibbs sampling.
  # sig2thesample <- Vector that contains the sample of sig2the's posterior distribution after Gibbs sampling. 
  # rhosample <- Vector that contains the sample of rho's posterior distribution after Gibbs sampling.
  # asample <- Vector that contains the sample of a's posterior distribution after Gibbs sampling.
  # bsample <- Vector that contains the sample of b's posterior distribution after Gibbs sampling.
  # msample <- Vector that contains the sample of the number of groups at each Gibbs sampling iteration.
  # lpml    <- If indlpml = 1, lpml contains the value of the LPML of the
  #            chosen model.
  # scale   <- Flag that indicates if the time series data were scaled to the
  #            [0,1] interval with a linear transformation. This will be taken as an input for the 
  #            plotting functions.
    
    if(level == TRUE){
      level <- 1
    }else{
      level <- 0
    }
    
    if(trend == TRUE){
      trend <- 1
    }else{
      trend <- 0
    }
    
    if(priora == TRUE){
      priora <- 1
    }else{
      priora <- 0
    }
    
    if(priorb == TRUE){
      priorb <- 1
    }else{
      priorb <- 0
    }
    
    if(indlpml == TRUE){
      indlpml <- 1
    }else{
      indlpml <- 0
    }
  
    
    if(deg%%1 != 0 | deg <= 0){
      stop("deg must be a positive integer number.")
    }
  
  if(maxiter%%1 != 0 | maxiter <= 0){
    stop("maxiter must be a positive (large) integer number.")
  }
  
  if(burnin%%1 != 0 | burnin < 0){
    stop("burnin must be a non-negative integer number.")
  }
  
  if(thinning%%1 != 0 | thinning < 0){
    stop("thinning must be a non-negative integer number.")
  }
  
  if(maxiter <= burnin){
    stop("maxiter cannot be less than or equal to burnin.")
  }
  
  if(c0eps <= 0 | c1eps <= 0 | c0beta <= 0 | c1beta <= 0 |
       c0alpha <= 0 | c1alpha <= 0){
    stop("c0eps,c1eps,c0beta,c1beta,c0alpha and c1alpha must be 
         positive numbers.")
  }
  
  if(pia <= 0 | pia >= 1){
    stop("The mixing proportion pia must be a number in (0,1).")
  }
  
  if(q0a <= 0 | q1a <= 0){
    stop("q0a and q1a must be positive numbers.")
  }
  
  if(a < 0 | a >= 1){
    stop("'a' must be a number in [0,1).")
  }
  
  if(q0b <= 0 | q1b <= 0){
    stop("q0b and q1b must be positive numbers.")
  }
  
  if(b <= -a){
    stop("'b' must be greater than '-a'.")
  }
  
  
  data <- scaleandperiods(data,scale)  
  mydata <- as.matrix(data$mydata)   # Matrix with the scaled data.
  periods <- data$periods            # Array with the data periods.
  cts <- data$cts                    # Variable that indicates if any time series
                                     # were removed from the original data set because they were constant.
  
  ##### CONSTRUCTION OF THE DESIGN MATRICES #####
  
  T <- nrow(mydata)                        # Number of periods of the time series
  n <- ncol(mydata)                        # Number of time series present in the data
  DM <- designmatrices(level,trend,seasonality=0,deg,T,n,fun="tseriesca")
  p <- DM$p
  d <- DM$d
  if((level+trend) == 0){
    Z <- DM$Z
  }else if((level+trend) == 2){
    X <- DM$X
  }else{
    Z <- DM$Z
    X <- DM$X
  }
  
  
  ##### INITIAL VALUES FOR THE PARAMETERS THAT WILL BE PART OF THE GIBBS SAMPLING #####
  
  sig2eps <- matrix(1,n,1)                        # Vector that has the diagonal entries of the variance-covariance matrix for every epsilon_i.
  sig2the <- 1    # Initial value for sig2the.
  rho <- 0        # Initial value for rho.  
  
  P <- matrix(0,T,T)             # Initial matrix P.                        
  
  for (j in seq(T)){
    for (k in seq(T)){
      P[j,k] <- rho^(abs(j-k))  
    }
  }
  
  R <- sig2the*P                 # Initial matrix R.
  
  if((level+trend) == 0){
    sig2alpha <- matrix(1,p,1)                      # Vector that has the diagonal entries of the variance-covariance matrix for alpha.
    sigmaalpha <- diag(c(sig2alpha),p,p)            # Variance-covariance matrix for alpha.
    invsigmaalpha <- diag(1/c(sig2alpha),p,p)       # Inverse variance-covariance matrix for alpha.
    
    alpha <- matrix(mvrnorm(n,matrix(0,p,1),sigmaalpha),p,n)  # alpha is a matrix with a vector value of alpha for every time series in its columns. 
    theta <- matrix(mvrnorm(n,matrix(0,T,1),R),T,n)           # theta is a matrix with a vector value of theta for every time series in its columns.
    gamma <- theta                                            # gamma is the union by rows of the beta and theta matrices.
  }else if((level+trend) == 2){
    sig2beta <- matrix(1,d,1)                       # Vector that has the diagonal entries of the variance-covariance matrix for beta. 
    sigmabeta <- diag(c(sig2beta),d,d)              # Variance-covariance matrix for beta.
    invsigmabeta <- diag(1/c(sig2beta),d,d)         # Inverse variance-covariance matrix for beta.
   
    beta <- matrix(mvrnorm(n,matrix(0,d,1),sigmabeta),d,n)    # beta is a matrix with a vector value of beta for every time series in its columns.
    theta <- matrix(mvrnorm(n,matrix(0,T,1),R),T,n)           # theta is a matrix with a vector value of theta for every time series in its columns.
    gamma <- rbind(beta,theta)                                # gamma is the union by rows of the beta and theta matrices.  
  }else{
    sig2beta <- matrix(1,d,1)                       # Vector that has the diagonal entries of the variance-covariance matrix for beta. 
    sigmabeta <- diag(c(sig2beta),d,d)              # Variance-covariance matrix for beta.
    invsigmabeta <- diag(1/c(sig2beta),d,d)         # Inverse variance-covariance matrix for beta.
    sig2alpha <- matrix(1,p,1)                      # Vector that has the diagonal entries of the variance-covariance matrix for alpha.
    sigmaalpha <- diag(c(sig2alpha),p,p)            # Variance-covariance matrix for alpha.
    invsigmaalpha <- diag(1/c(sig2alpha),p,p)       # Inverse variance-covariance matrix for alpha.
    
    alpha <- matrix(mvrnorm(n,matrix(0,p,1),sigmaalpha),p,n)  # alpha is a matrix with a vector value of alpha for every time series in its columns. 
    beta <- matrix(mvrnorm(n,matrix(0,d,1),sigmabeta),d,n)    # beta is a matrix with a vector value of beta for every time series in its columns.
    theta <- matrix(mvrnorm(n,matrix(0,T,1),R),T,n)           # theta is a matrix with a vector value of theta for every time series in its columns.
    gamma <- rbind(beta,theta)                                # gamma is the union by rows of the beta and theta matrices.  
  }
  
  
  iter <- 0                                    # Counter for each Gibbs sampling iteration.
  iter1 <- 0                                   # Counter for the number of iterations saved during the Gibbs sampling.
  arrho <- 0                                   # Variable that will contain the acceptance rate of rho in the Metropolis-Hastings step.
  ara <- 0                                     # Variable that will contain the acceptance rate of a in the Metropolis-Hastings step.
  arb <- 0                                     # Variable that will contain the acceptance rate of b in the Metropolis-Hastings step.
  sim <- matrix(0,n,n)                         # Initialization of the similarities matrix.
  
  if(thinning == 0){
    CL <- floor(maxiter-burnin)
  }else{
    CL <- floor((maxiter-burnin)/thinning)
  }
  
  memory <- matrix(0,CL*n,n)   # Matrix that will contain the cluster configuration of every iteration that is saved during the Gibbs sampling.
  memorygn <- matrix(0,CL,n)   # Matrix that will save the group number to which each time series belongs in every iteration saved.  
  sig2epssample <- matrix(0,CL,n)   # Matrix that in its columns will contain the sample of each sig2eps_i's posterior distribution after Gibbs sampling.
  sig2thesample <- matrix(0,CL,1)   # Vector that will contain the sample of sig2the's posterior distribution after Gibbs sampling. 
  rhosample <- matrix(0,CL,1)       # Vector that will contain the sample of rho's posterior distribution after Gibbs sampling.
  asample <- matrix(0,CL,1)         # Vector that will contain the sample of a's posterior distribution after Gibbs sampling.
  bsample <- matrix(0,CL,1)         # Vector that will contain the sample of b's posterior distribution after Gibbs sampling.
  msample <- matrix(0,CL,1)         # Vector that will contain the sample of the number of groups at each Gibbs sampling iteration.
  
  if((level+trend) == 0){
    sig2alphasample <- matrix(0,CL,p) # Matrix that in its columns will contain the sample of each sig2alpha_i's posterior distribution after Gibbs sampling.
  }else if((level+trend) == 2){
    sig2betasample <- matrix(0,CL,d)  # Matrix that in its columns will contain the sample of each sig2beta_i's posterior distribution after Gibbs sampling.
  }else{
    sig2alphasample <- matrix(0,CL,p) # Matrix that in its columns will contain the sample of each sig2alpha_i's posterior distribution after Gibbs sampling.
    sig2betasample <- matrix(0,CL,d)  # Matrix that in its columns will contain the sample of each sig2beta_i's posterior distribution after Gibbs sampling. 
  }
  
  if(indlpml != 0){
    iter2 <- 0
    auxlpml <- matrix(0,floor((maxiter-burnin)/10),n)
  }
  
  ##### BEGINNING OF GIBBS SAMPLING #####
  
  while(iter < maxiter){
    
    ##### 1) SIMULATION OF ALPHA'S POSTERIOR DISTRIBUTION #####
    
    if((level+trend) != 2){
      if((level+trend) == 0){
        for(i in 1:n){
          sigmaeps <- diag(c(sig2eps[i]),T)
          Q <- sigmaeps + R
          Qinv <- chol2inv(chol(Q))
          Winv <- Qinv 
          W <- Q
          Valphainv <- (t(Z) %*% Winv %*% Z) + invsigmaalpha
          Valpha <- chol2inv(chol(Valphainv))
          
          mualpha <- Valpha %*% t(Z) %*% Winv %*% mydata[,i]
          
          alpha[,i] <- mvrnorm(1,mualpha,Valpha)
        }
      }else{
        for(i in 1:n){    
          sigmaeps <- diag(c(sig2eps[i]),T)
          Q <- sigmaeps + R
          Qinv <- chol2inv(chol(Q))
          Vinv <- t(X) %*% Qinv %*% X + invsigmabeta
          V <- chol2inv(chol(Vinv))
          
          Winv <- Qinv + (Qinv %*% X %*% V %*% t(X) %*% Qinv)
          W <- chol2inv(chol(Winv))
          Valphainv <- (t(Z) %*% Winv %*% Z) + invsigmaalpha
          Valpha <- chol2inv(chol(Valphainv))
          
          mualpha <- Valpha %*% t(Z) %*% Winv %*% mydata[,i]
          
          alpha[,i] <- mvrnorm(1,mualpha,Valpha)
        }
      }
    }
    
    
    ##### 2) SIMULATION OF GAMMA'S = (BETA,THETA) POSTERIOR DISTRIBUTION #####
    
    
    for(i in 1:n){
      
      gr <- comp11(gamma[1,-i])                         # Only the first entries of gamma[,-i] are compared to determine the cluster configuration
      jstar <- gr$jstar                                 # Object that contains the positions of the unique vectors in gamma[,-i]
      gmi <- gamma[,-i]                                 # Matrix with all the elements of gamma, except for the i-th element
      gammastar <- as.matrix(gmi[,jstar])               # Matrix with the unique vectors in gamma(-i)
      mi <- gr$rstar                                    # Number of unique vectors in gamma(-i) (Number of groups)
      nstar <- gr$nstar                                 # Frequency of each unique vector in gamma(-i)
      if((level+trend) == 0){
        thetastar <- as.matrix(gammastar[(d+1):(T+d),])
      }else{
        if(d == 1){
          betastar <- t(as.matrix(gammastar[1:d,]))
          thetastar <- as.matrix(gammastar[(d+1):(T+d),]) 
        }else{
          betastar <- as.matrix(gammastar[1:d,])            # Separation of unique vectors between betastar and thetastar
          thetastar <- as.matrix(gammastar[(d+1):(T+d),])    
        }         
      }
      
      # Matrices necessary for the following steps
      sigmaeps <- sig2eps[i]*diag(1,T)
      invsigmaeps <- (1/sig2eps[i])*diag(1,T)
      Q <- sigmaeps + R
      Qinv <- chol2inv(chol(Q))
      
      if((level+trend) == 0){
        Winv <- Qinv 
        W <- Q
      }else{
        Vinv <- t(X) %*% Qinv %*% X + invsigmabeta
        V <- chol2inv(chol(Vinv))
        
        Winv <- Qinv + (Qinv %*% X %*% V %*% t(X) %*% Qinv)
        W <- chol2inv(chol(Winv))        
      }
      
      
      # Computing weigths for gamma(i)'s posterior distribution
      
      if((level+trend) == 0){
        dj <- matrix(0,mi,1)
        d0 <- (b + a*mi)*dmvnorm(mydata[,i],(Z %*% alpha[,i]),W)
        
        den <- 0
        
        for(j in 1:mi){
          dj[j] <- (nstar[j] - a)*dmvnorm(mydata[,i],(Z %*% alpha[,i] + thetastar[,j]),sigmaeps)
        }
        
        den <- d0 + sum(dj)
        if(den == 0){
          d0 <- log(b + a*mi) + dmvnorm(mydata[,i],(Z %*% alpha[,i]),W,log=TRUE)
          for(j in 1:mi){
            dj[j] <- log(nstar[j] - a) + dmvnorm(mydata[,i],(Z %*% alpha[,i] + thetastar[,j]),sigmaeps,log=TRUE)          
          }
          dj <- rbind(dj,d0)
          aa <- min(dj)
          q <- (1+(dj-aa)+(dj-aa)^2/2)/sum(1+(dj-aa)+(dj-aa)^2/2)
        }else{
          q <- dj/den
          q <- rbind(q,d0/den)
        }
      }else if((level+trend) == 2){
        dj <- matrix(0,mi,1)
        d0 <- (b + a*mi)*dmvnorm(mydata[,i],matrix(0,T,1),W)
        
        den <- 0
        
        for(j in 1:mi){
          dj[j] <- (nstar[j] - a)*dmvnorm(mydata[,i],(X %*% betastar[,j] + thetastar[,j]),sigmaeps)
        }
        
        den <- d0 + sum(dj)
        if(den == 0){
          d0 <- log(b + a*mi) + dmvnorm(mydata[,i],matrix(0,T,1),W,log=TRUE)
          for(j in 1:mi){
            dj[j] <- log(nstar[j] - a) + dmvnorm(mydata[,i],(X %*% betastar[,j] + thetastar[,j]),sigmaeps,log=TRUE)          
          }
          dj <- rbind(dj,d0)
          aa <- min(dj)
          q <- (1+(dj-aa)+(dj-aa)^2/2)/sum(1+(dj-aa)+(dj-aa)^2/2)
        }else{
          q <- dj/den
          q <- rbind(q,d0/den)
        }
      }else{
        dj <- matrix(0,mi,1)
        d0 <- (b + a*mi)*dmvnorm(mydata[,i],(Z %*% alpha[,i]),W)
        
        den <- 0
        
        for(j in 1:mi){
          dj[j] <- (nstar[j] - a)*dmvnorm(mydata[,i],(Z %*% alpha[,i] + X %*% betastar[,j] + thetastar[,j]),sigmaeps)
        }
        
        den <- d0 + sum(dj)
        if(den == 0){
          d0 <- log(b + a*mi) + dmvnorm(mydata[,i],(Z %*% alpha[,i]),W,log=TRUE)
          for(j in 1:mi){
            dj[j] <- log(nstar[j] - a) + dmvnorm(mydata[,i],(Z %*% alpha[,i] + X %*% betastar[,j] + thetastar[,j]),sigmaeps,log=TRUE)          
          }
          dj <- rbind(dj,d0)
          aa <- min(dj)
          q <- (1+(dj-aa)+(dj-aa)^2/2)/sum(1+(dj-aa)+(dj-aa)^2/2)
        }else{
          q <- dj/den
          q <- rbind(q,d0/den)
        }
      }
      
      
      # Sampling a number between 1 and (mi+1) to determine what will be the simulated value for gamma(i)
      # The probabilities of the sample are based on the weights previously computed
      
      y <- sample((1:(mi+1)), size=1, prob = q)
      
      # If sample returns the value (mi+1), a new vector from g0 will be simulated and assigned to gamma(i)
      
      if (y == mi+1){
        if((level+trend) == 0){
          Sthetai <- chol2inv(chol(invsigmaeps + chol2inv(chol(R))))
          muthetai <- Sthetai %*% invsigmaeps %*% (mydata[,i] - (Z %*% alpha[,i]))
          theta0 <- matrix(mvrnorm(1,muthetai,Sthetai),T,1)
          gamma[,i] <- theta0
        }else if((level+trend) == 2){
          Sthetai <- chol2inv(chol(invsigmaeps + chol2inv(chol(R))))
          muthetai <- Sthetai %*% invsigmaeps %*% (mydata[,i] - (X %*% beta[,i]))
          mubetai <- V %*% t(X) %*% Qinv %*% mydata[,i]
          beta0 <- matrix(mvrnorm(1,mubetai,V),d,1)
          theta0 <- matrix(mvrnorm(1,muthetai,Sthetai),T,1)
          gamma[,i] <- rbind(beta0,theta0)
        }else{
          Sthetai <- chol2inv(chol(invsigmaeps + chol2inv(chol(R))))
          muthetai <- Sthetai %*% invsigmaeps %*% (mydata[,i] - (Z %*% alpha[,i]) - (X %*% beta[,i]))
          mubetai <- V %*% t(X) %*% Qinv %*% (mydata[,i] - (Z %*% alpha[,i]))
          beta0 <- matrix(mvrnorm(1,mubetai,V),d,1)
          theta0 <- matrix(mvrnorm(1,muthetai,Sthetai),T,1)
          gamma[,i] <- rbind(beta0,theta0)
        }        
      } else{
        gamma[,i] = gammastar[,y]                     # Otherwise, column y from gammastar will be assigned to gamma(i)
      }
      
    }
    
    
    ##### 2.1) ACCELERATION STEP AND CONSTRUCTION OF SIMILARITIES MATRIX #####
    
    gr <- comp11(gamma[1,])                   # Computation of all latent classes of the gamma vectors after the simulation of their posterior distribution.
    jstar <- gr$jstar
    gammastar <- as.matrix(gamma[,jstar])     # Unique values of the gamma vectors. 
    m <- gr$rstar                             # Total number of latent classes (groups).
    nstar <- gr$nstar                         # Frequency of each latent class (group).
    gn <- gr$gn                               # Identifier of the group to which each time series belongs.
    if((level+trend) == 0){
      theta <- as.matrix(gamma[((d+1):(T+d)),])
      thetastar <- as.matrix(gammastar[(d+1):(T+d),])
    }else{
      if(d == 1){
        beta <- t(as.matrix(gamma[(1:d),]))          # Splitting the gamma vectors in beta and theta. 
        theta <- as.matrix(gamma[((d+1):(T+d)),])
        betastar <- t(as.matrix(gammastar[(1:d),]))
        thetastar <- as.matrix(gammastar[((d+1):(T+d)),])   
      }else{
        beta <- as.matrix(gamma[(1:d),])          # Splitting the gamma vectors in beta and theta. 
        theta <- as.matrix(gamma[((d+1):(T+d)),])
        betastar <- as.matrix(gammastar[(1:d),])
        thetastar <- as.matrix(gammastar[((d+1):(T+d)),])  
      }         
    }

    
    for(j in 1:m){
      
      if((level+trend) == 0){
        cc <- which(gn == j)        # Identifying the cluster configuration of each group.
        aux <- matrix(0,T,T)        # Calculating the necessary matrices for the simulation of the distributions for the acceleration step.
        aux1 <- matrix(0,T,1)
        aux2 <- matrix(0,T,1)
        
        for(i in 1:nstar[j]){
          aux <- aux + diag((1/sig2eps[cc[i]]),T)
          aux1 <- aux1 + (diag((1/sig2eps[cc[i]]),T) %*% (mydata[,cc[i]] - Z %*% alpha[,cc[i]]))          
        }
        
        Sthetastar <- chol2inv(chol(aux + chol2inv(chol(R))))
        muthetastar <- Sthetastar %*% aux1

        theta[,cc] <- mvrnorm(1,muthetastar,Sthetastar)
      }else if((level+trend) == 2){
        cc <- which(gn == j)        # Identifying the cluster configuration of each group.
        aux <- matrix(0,T,T)        # Calculating the necessary matrices for the simulation of the distributions for the acceleration step.
        aux1 <- matrix(0,T,1)
        aux2 <- matrix(0,T,1)
        
        for(i in 1:nstar[j]){
          aux <- aux + diag((1/sig2eps[cc[i]]),T)
          aux1 <- aux1 + (diag((1/sig2eps[cc[i]]),T) %*% (mydata[,cc[i]] - X %*% betastar[,j]))
          aux2 <- aux2 + (diag((1/sig2eps[cc[i]]),T) %*% (mydata[,cc[i]] - thetastar[,j]))    
        }
        
        Sthetastar <- chol2inv(chol(aux + chol2inv(chol(R))))
        muthetastar <- Sthetastar %*% aux1
        Sbetastar <- chol2inv(chol((t(X) %*% aux %*% X) + invsigmabeta))
        mubetastar <- Sbetastar %*% t(X) %*% aux2
        
        beta[,cc] <- mvrnorm(1,mubetastar,Sbetastar)
        theta[,cc] <- mvrnorm(1,muthetastar,Sthetastar)
      }else{
        cc <- which(gn == j)        # Identifying the cluster configuration of each group.
        aux <- matrix(0,T,T)        # Calculating the necessary matrices for the simulation of the distributions for the acceleration step.
        aux1 <- matrix(0,T,1)
        aux2 <- matrix(0,T,1)
        
        for(i in 1:nstar[j]){
          aux <- aux + diag((1/sig2eps[cc[i]]),T)
          aux1 <- aux1 + (diag((1/sig2eps[cc[i]]),T) %*% (mydata[,cc[i]] - Z %*% alpha[,cc[i]] - X %*% betastar[,j]))
          aux2 <- aux2 + (diag((1/sig2eps[cc[i]]),T) %*% (mydata[,cc[i]] - Z %*% alpha[,cc[i]] - thetastar[,j]))    
        }
        
        Sthetastar <- chol2inv(chol(aux + chol2inv(chol(R))))
        muthetastar <- Sthetastar %*% aux1
        Sbetastar <- chol2inv(chol((t(X) %*% aux %*% X) + invsigmabeta))
        mubetastar <- Sbetastar %*% t(X) %*% aux2
        
        beta[,cc] <- mvrnorm(1,mubetastar,Sbetastar)
        theta[,cc] <- mvrnorm(1,muthetastar,Sthetastar) 
      }
      
      # Computation of similarities matrix and saving the cluster configuration of the current iteration.
      if((iter %% thinning) == 0 & iter >= burnin){
        
        for(i1 in 1:nstar[j]){
          for(i2 in i1:nstar[j]){
            sim[cc[i1],cc[i2]] <- sim[cc[i1],cc[i2]] + 1
            sim[cc[i2],cc[i1]] <- sim[cc[i2],cc[i1]] + 1
            memory[(cc[i1] + (n*iter1)),cc[i2]] <- memory[(cc[i1] + (n*iter1)),cc[i2]] + 1
            memory[(cc[i2] + (n*iter1)),cc[i1]] <- memory[(cc[i2] + (n*iter1)),cc[i1]] + 1
          }
        }
        
      }
      
    }
    
    if((level+trend) == 0){
      gamma <- theta                            # Obtaining all gamma vectors after the acceleration step.
    }else{
      gamma <- rbind(beta,theta)                # Obtaining all gamma vectors after the acceleration step.
    }
    gr <- comp11(gamma[1,])                   # Obtaining all the latent classes in the gamma vectors.
    jstar <- gr$jstar
    gammastar <- as.matrix(gamma[,jstar])     # Unique values of the gamma vectors. 
    m <- gr$rstar                             # Number of groups after acceleration step.
    nstar <- gr$nstar                         # Frequency of each group.
    gn <- gr$gn                               # Identifier of the group to which each latent class belongs.
    if((level+trend) == 0){
      theta <- as.matrix(gamma[((d+1):(T+d)),])
      thetastar <- as.matrix(gammastar[((d+1):(T+d)),])
    }else{
      if(d == 1){
        beta <- t(as.matrix(gamma[(1:d),]))          # Splitting the gamma vectors in beta and theta. 
        theta <- as.matrix(gamma[((d+1):(T+d)),])
        betastar <- t(as.matrix(gammastar[(1:d),]))
        thetastar <- as.matrix(gammastar[((d+1):(T+d)),])   
      }else{
        beta <- as.matrix(gamma[(1:d),])          # Splitting the gamma vectors in beta and theta. 
        theta <- as.matrix(gamma[((d+1):(T+d)),])
        betastar <- as.matrix(gammastar[(1:d),])
        thetastar <- as.matrix(gammastar[((d+1):(T+d)),])  
      }    
    }
    
    
    ##### 6) SIMULATION OF SIG2THE'S POSTERIOR DISTRIBUTION #####
    
    cholP <- chol(P)              # Calculation of the Cholesky factorization of P.
    Pinv <- chol2inv(cholP)       # Obtaining the inverse of P. 
    s1 <- 0
    
    # Calculating the sum necessary for the rate parameter of the posterior distribution.
    for(j in 1:m){
      s1 <- s1 + t(as.matrix(thetastar[,j])) %*% Pinv %*% as.matrix(thetastar[,j])
    }
    
    sig2the <- 1/rgamma(1,(m*T/2),(s1/2))
    
    
    ##### 7) SIMULATION OF RHO'S POSTERIOR DISTRIBUTION (Metropolis-Hastings step) #####
    
    rhomh <- runif(1,-1,1)            # Sampling from the proposal distribution.
    
    Pmh <- matrix(0,T,T)
    
    # Calculating the matrix P for the proposed value rhomh.
    for (j in 1:T){
      for (k in 1:T){
        Pmh[j,k] <- rhomh^(abs(j-k))  
      }
    }
    
    cholPmh <- chol(Pmh)              # Calculating the Cholesky factor of Pmh.
    Pmhinv <- chol2inv(cholPmh)       # Obtaining the inverse from Pmh
    s <- 0
    
    # Calculating the sum necessary for the computation of the acceptance probability.
    for(j in 1:m){
      s <- s + t(as.matrix(thetastar[,j])) %*% (Pmhinv-Pinv) %*% as.matrix(thetastar[,j])
    }
    
    # Computation of the acceptance probability.
    q <- (-m/2)*(log(prod(diag(cholPmh)))- log(prod(diag(cholP)))) - ((1/(2*sig2the))*s) + (1/2)*(log(1 + rhomh*rhomh) - log(1 + rho*rho)) - log(1 - rhomh*rhomh) + log(1 - rho*rho) 
    
    # Definition of the acceptance probability. 
    quot <- min(0,q)
    
    # Sampling a uniform random variable in [0,1] to determine if the proposal is accepted or not.
    unif1 <- runif(1,0,1)
    
    # Acceptance step.
    if(log(unif1) <= quot){
      rho <- rhomh
      arrho <- arrho + 1
      
      for (j in seq(T)){
        for (k in seq(T)){
          P[j,k] <- rho^(abs(j-k))  
        }
      }
      
    }
    
    R <- sig2the*P  
    
    
    ##### 3) SIMULATION OF SIG2EPS' POSTERIOR DISTRIBUTION #####
    
    if((level+trend) == 0){
      M <- t(mydata - Z%*%alpha - theta) %*% (mydata - Z%*%alpha - theta)
    }else if((level+trend) == 2){
      M <- t(mydata - X%*%beta - theta) %*% (mydata - X%*%beta - theta)
    }else{
      M <- t(mydata - Z%*%alpha - X%*%beta - theta) %*% (mydata - Z%*%alpha - X%*%beta - theta)
    }
    
    sig2eps <- 1/rgamma(n,(c0eps + T/2),(c1eps + diag(M)/2))
    
    
    ##### 4) SIMULATION OF SIMGAALPHA'S POSTERIOR DISTRIBUTION #####
    
    if((level+trend) != 2){
      sig2alpha <- 1/rgamma(p,(c0alpha + n/2),(c1alpha + rowSums(alpha^2)))
      
      sigmaalpha <- diag(c(sig2alpha),p,p)
      invsigmaalpha <- diag(1/c(sig2alpha),p,p) 
    }
    
    
    ##### 5) SIMULATION OF SIGMABETA'S POSTERIOR DISTRIBUTION #####
    
    if((level+trend) != 0){
      sig2beta <- 1/rgamma(d,(c0beta + m/2),(c1beta + colSums(betastar^2)/2))
      
      sigmabeta <- diag(c(sig2beta),d,d)
      invsigmabeta <- diag(1/c(sig2beta),d,d) 
    }
    
    
    ##### 8) SIMULATION OF A'S POSTERIOR DISTRIBUTION (METROPOLIS-HASTINGS WITH UNIFORM PROPOSALS) #####
    
    if(priora == 1){
      
      if (b < 0){
        amh <- runif(1,-b,1)  
      } else{
        unif2 <- runif(1,0,1)
        if (unif2 <= 0.5){
          amh <- 0
        } else{
          amh <- runif(1,0,1) 
        }
      }
      
      # If b is not greater than -a, then accept the proposal directly.
      if ((a+b) <= 0){
        a <- amh
      } else{
        
        quot1 <- 0
        
        if(m > 1){
          for (j in 1:(m-1)){
            quot1 <- quot1 + log(b + j*amh) + log(gamma(nstar[j] - amh)) - log(gamma(1 - amh)) - log(b + j*a) - log(gamma(nstar[j] - a)) + log(gamma(1 - a))
          }
        }
        
        quot1 <- quot1 + log(gamma((nstar[m] - amh))) - log(gamma(1 - amh)) - log(gamma((nstar[m] - a))) + log(gamma(1 - a))
        
        if (a == 0){
          fa <- 0.5 
        } else{
          fa <- 0.5*dbeta(a,q0a,q1a) 
        }
        
        if (amh == 0){
          famh <- 0.5
        } else{
          famh <- 0.5*dbeta(amh,q0a,q1a)
        }
        
        # Quotient to evaluate the Metropolis-Hastings step in logs    
        quot1 <- quot1 + log(famh) - log(fa)
        
        # Determination of the probability for the Metropolis-Hastings step
        alphamh1 <- min(quot1,0)
        
        unif3 <- runif(1,0,1)
        
        # Acceptance step
        if (log(unif3) <= alphamh1){
          a <- amh
          ara <- ara + 1
        }
        
      }
      
    }
    
    
    ##### 9) SIMULATION OF B'S POSTERIOR DISTRIBUTION (METROPOLIS-HASTINGS WITH GAMMA PROPOSALS) #####
    
    if(priorb == 1){
      
      y1 <- rgamma(1,1,0.1)
      bmh <- y1 - a
      
      # If b is not greater than -a, then accept the proposal directly.
      if ((a+b) <= 0){
        b <- bmh  
      } else{
        
        quot2 <- 0
        
        if(m > 1){
          for (j in 1:(m-1)){
            quot2 <- quot2 + log(bmh + j*a) - log(b + j*a)
          }
        }
        
        fb <- dgamma(a+b,q0b,q1b)      
        fbmh <- dgamma(y1,q0b,q1b)
        
        # Quotient to evaluate the Metropolis-Hastings step in logs
        quot2 <- quot2 + (log(gamma(bmh+1)) - log(gamma(bmh+n)) - log(gamma(b+1)) + log(gamma(b+n))) + (log(fbmh) - log(fb)) - 0.1*(b - bmh)
        
        # Determination of the probability for the Metropolis-Hastings step
        alphamh2 <- min(quot2,0)
        
        unif4 <- runif(1,0,1)
        
        # Acceptance step
        if (log(unif4) <= alphamh2){
          b <- bmh
          arb <- arb + 1
        }
        
      }
      
    }
    
    
    if((iter %% thinning) == 0 & iter >= burnin){
      iter1 <- iter1 + 1
      sig2epssample[iter1,] <- sig2eps
      sig2thesample[iter1] <- sig2the
      rhosample[iter1] <- rho
      asample[iter1] <- a
      bsample[iter1] <- b
      msample[iter1] <- m
      memorygn[iter1,] <- gn
      
      if((level+trend) == 0){
        sig2alphasample[iter1,] <- sig2alpha
      }else if((level+trend) == 2){
        sig2betasample[iter1,] <- sig2beta
      }else{
        sig2alphasample[iter1,] <- sig2alpha
        sig2betasample[iter1,] <- sig2beta
      }
    }
    
    if(indlpml != 0){
      if((iter %% 10) == 0 & iter >= burnin){
        iter2 <- iter2 + 1
        for(i in 1:n){
          if((level+trend) == 0){
            for(j in 1:m){        
              auxlpml[iter2,i] <- auxlpml[iter2,i] + ((nstar[j]-a)/(b+n))*dmvnorm(mydata[,i],(Z %*% alpha[,i] + thetastar[,j]),diag(sig2eps[i],T))
            }
            
            sigmaeps <- diag(sig2eps[i],T)
            invsigmaeps <- diag((1/sig2eps[i]),T)
            
            Q <- sigmaeps + R
            Qinv <- solve(Q)
            
            Winv <- Qinv 
            W <- Q
            
            auxlpml[iter2,i] <- auxlpml[iter2,i] + ((b+(a*m))/(b+n))*dmvnorm(mydata[,i],(Z %*% alpha[,i]),W) 
          }else if((level+trend) == 2){
            for(j in 1:m){        
              auxlpml[iter2,i] <- auxlpml[iter2,i] + ((nstar[j]-a)/(b+n))*dmvnorm(mydata[,i],(X %*% betastar[,j] + thetastar[,j]),diag(sig2eps[i],T))
            }
            
            sigmaeps <- diag(sig2eps[i],T)
            invsigmaeps <- diag((1/sig2eps[i]),T)
            
            Q <- sigmaeps + R
            Qinv <- solve(Q)
            Vinv <- t(X) %*% Qinv %*% X + invsigmabeta
            V <- solve(Vinv)
            
            Winv <- Qinv + (Qinv %*% X %*% V %*% t(X) %*% Qinv)
            W <- solve(Winv)
            
            auxlpml[iter2,i] <- auxlpml[iter2,i] + ((b+(a*m))/(b+n))*dmvnorm(mydata[,i],(matrix(0,T,1)),W) 
          }else{
            for(j in 1:m){        
              auxlpml[iter2,i] <- auxlpml[iter2,i] + ((nstar[j]-a)/(b+n))*dmvnorm(mydata[,i],(Z %*% alpha[,i] + X %*% betastar[,j] + thetastar[,j]),diag(sig2eps[i],T))
            }
            
            sigmaeps <- diag(sig2eps[i],T)
            invsigmaeps <- diag((1/sig2eps[i]),T)
            
            Q <- sigmaeps + R
            Qinv <- solve(Q)
            Vinv <- t(X) %*% Qinv %*% X + invsigmabeta
            V <- solve(Vinv)
            
            Winv <- Qinv + (Qinv %*% X %*% V %*% t(X) %*% Qinv)
            W <- solve(Winv)
            
            auxlpml[iter2,i] <- auxlpml[iter2,i] + ((b+(a*m))/(b+n))*dmvnorm(mydata[,i],(Z %*% alpha[,i]),W)  
          }
        }
      }
    }
    
    iter <- iter + 1
    if(iter %% 50 == 0){
      cat("Iteration Number: ",iter,". Progress: ",(iter/maxiter)*100,"%","\n") 
    }
    
  }
  
  ##### END OF GIBBS SAMPLING #####
  
  # Calculation of acceptance rates and similarities matrix
  arrho <- arrho/iter
  ara <- ara/iter
  arb <- arb/iter
  sim <- sim/iter1
  
  
  dist <- matrix(0,CL,1)
  
  # Calculating the distance between each cluster configuration to the similarities matrix
  for (i in 1:CL){
    aux4 <- memory[(((i-1)*n)+1):(i*n),] - sim
    dist[i] <- norm(aux4,"F")
  }
  
  # Determining which cluster configuration minimizes the distance to the similarities matrix
  mstar <- msample[which.min(dist)]
  gnstar <- memorygn[which.min(dist),]
  
  
  ##### HM MEASURE CALCULATION #####
  
  HM <- 0
  
  for(j in 1:mstar){
    cc <- as.matrix(which(gnstar == j))    
    HM1 <- 0
    
    if(length(cc) > 1){
      for(i1 in 1:length(cc)){
        for(i2 in 1:i1){
          HM1 <- HM1 + sum((mydata[,cc[i2]] - mydata[,cc[i1]])^2)
        }
      }
      HM <- HM + (2/(length(cc)-1))*HM1
    }
  }
  
  
  names <- colnames(mydata)
  cat("Number of groups of the chosen cluster configuration: ",mstar,"\n")
  for(i in 1:mstar){
    cat("Time series in group",i,":",names[which(gnstar == i)],"\n")
  }
  cat("HM Measure: ",HM,"\n")
  
  if(indlpml != 0){    
    auxlpml <- 1/auxlpml
    cpo <- colMeans(auxlpml)
    cpo <- 1/cpo
    lpml <- sum(log(cpo))
  }
  
  if(indlpml !=0){
    if((level+trend) == 0){
      return(list(mstar = mstar,gnstar = gnstar,HM = HM,arrho = arrho,ara = ara,arb = arb,lpml = lpml,
                  sig2epssample = sig2epssample,sig2alphasample = sig2alphasample,
                  sig2thesample = sig2thesample,rhosample = rhosample,asample = asample,
                  bsample = bsample,msample = msample,periods = periods,scale=scale))  
    }else if((level+trend) == 2){
      return(list(mstar = mstar,gnstar = gnstar,HM = HM,arrho = arrho,ara = ara,arb = arb,lpml = lpml,
                  sig2epssample = sig2epssample,sig2betasample = sig2betasample,
                  sig2thesample = sig2thesample,rhosample = rhosample,asample = asample,
                  bsample = bsample,msample = msample,periods = periods,scale=scale))
    }else{
      return(list(mstar = mstar,gnstar = gnstar,HM = HM,arrho = arrho,ara = ara,arb = arb,lpml = lpml,
                  sig2epssample = sig2epssample,sig2alphasample = sig2alphasample,
                  sig2betasample = sig2betasample,sig2thesample = sig2thesample,rhosample = rhosample,
                  asample = asample,bsample = bsample,msample = msample,periods = periods,scale=scale))
    }    
  }else{
    if((level+trend) == 0){
      return(list(mstar = mstar,gnstar = gnstar,HM = HM,arrho = arrho,ara = ara,arb = arb,
                  sig2epssample = sig2epssample,sig2alphasample = sig2alphasample,
                  sig2thesample = sig2thesample,rhosample = rhosample,asample = asample,
                  bsample = bsample,msample = msample,periods = periods,scale=scale))  
    }else if((level+trend) == 2){
      return(list(mstar = mstar,gnstar = gnstar,HM = HM,arrho = arrho,ara = ara,arb = arb,
                  sig2epssample = sig2epssample,sig2betasample = sig2betasample,
                  sig2thesample = sig2thesample,rhosample = rhosample,asample = asample,
                  bsample = bsample,msample = msample,periods = periods,scale=scale))
    }else{
      return(list(mstar = mstar,gnstar = gnstar,HM = HM,arrho = arrho,ara = ara,arb = arb,
                  sig2epssample = sig2epssample,sig2alphasample = sig2alphasample,
                  sig2betasample = sig2betasample,sig2thesample = sig2thesample,rhosample = rhosample,
                  asample = asample,bsample = bsample,msample = msample,periods = periods,scale=scale))
    }   
  }
  
}

Try the BNPTSclust package in your browser

Any scripts or data that you put into this service are public.

BNPTSclust documentation built on Aug. 20, 2019, 1:04 a.m.