BayesFda: Bayesian Functional Data Analysis

#' @export
BayesFda_Gibbs <- function(X,  H = 5, time = NULL,
                           alpha  = 1, sigma = 0,
                           a_lambda = 1, b_lambda = 1, a_sigma=1, b_sigma=1,  inner_knots = min(round(NCOL(X)/4),40), degree=3,   
                           R = 5000, burn_in = 1000, clust_init = 3, verbose = FALSE) {
  
  if(clust_init >= H) stop("The clust_init value is greater than the upper bound H")
  
  # Fixed quantities
  n <- NROW(X)
  TT <- NCOL(X)
  
  if(is.null(time)) {
    time <- 1:TT
    warning("The time dimension was not supplied and equally-spaced intervals are assumed.")
  }
  
  if(length(time) != TT) {
    stop("The length of time and the dimension of X must coincide.")
  }
  
  colnames(X)  <- time
  X            <- as.matrix(X) # If not already done, convert it onto a Matrix
  index_not_NA <- !is.na(X)
  nTT          <- sum(index_not_NA) # Number of observed values
  
  # Smoothing splines settings
  xl    <- min(time); xr <- max(time); dx <- (xr - xl) / (inner_knots-1)
  knots <- seq(xl - degree * dx, xr + degree * dx, by = dx) # Knots placement
  
  B     <- spline.des(knots, time, degree + 1, 0 * time, outer.ok=TRUE)$design

  
  # Penalty term
  D <- diag(NCOL(B))
  DtD <- crossprod(D)
  
  pred   <- matrix(0,n,TT)
  lambda <- a_lambda / b_lambda
  
  # Initialization settings
  if(verbose){ cat("Pre-allocating observations into groups...\n")}
  pre_clust       <- FSplines_clust(X=X,  H=clust_init, time = time, lambda = lambda, a_sigma=a_sigma,
                                    b_sigma=b_sigma, inner_knots=inner_knots, degree=degree, dif = 0,
                                    verbose=FALSE, prediction = TRUE)
  G               <- pre_clust$cluster
  
  # Pre allocating according to the cluster
  for(h in 1:clust_init){  
    idx_h <- G == h
    n_h   <- sum(idx_h)
    if(n_h > 0) pred[idx_h,]<- t(matrix(pre_clust$pred_cluster[h,],TT,n_h))
  }
  
  a_sigma2_tilde <- a_sigma + nTT/2
  b_sigma2_tilde <- b_sigma + sum((X - pred)^2,na.rm=TRUE)/2
  sigma2 <-  1 / rgamma(1, a_sigma2_tilde, b_sigma2_tilde)
  
  beta       <- matrix(0, H, NCOL(B))    
  predH      <- matrix(0,H,TT)

  # Verbose settings and output
  verbose_step <- ceiling(R / 50)
  beta_out     <- array(0,c(R,H,NCOL(B)))
  lambda_out   <- numeric(R)
  sigma2_out   <- numeric(R)
  pred_out     <- matrix(0,n,TT)
  n_cluster_out<- as.numeric(R)
  G_out        <- matrix(0,R,n)
  
  # Starting the Gibbs sampling
  if(verbose){ cat("Starting the Gibbs sampling...\n")}
  for (r in 1:(R + burn_in)) {
    
    # Step 2 - 3: performed within the cluster.
    for (h in 1:H) {
      
      # Subsetting observations
      idx_h <- G == h
      n_h  <- sum(idx_h)
      
      # Quantity of interest
      X_h   <- matrix(X[idx_h, ], n_h, TT)
      n_th  <- colSums(!is.na(X_h))
      
      # If there are no observations, sample from the prior
      if(sum(n_th) == 0) {
        beta[h,]    <- rnorm(NCOL(B),0,sqrt(1/lambda))
      } else {
        
        idx_hNA <- n_th!=0
        X_bar   <- colMeans(X_h,na.rm = TRUE)
        
        B_h   <- matrix(B[idx_hNA,],sum(idx_hNA), NCOL(B))
        BW    <- t(B_h*n_th[idx_hNA]); BWB   <- BW%*%B_h
        BWY   <- BW %*%X_bar[idx_hNA]
        
        eig         <- eigen(BWB/sigma2 + lambda*DtD, symmetric = TRUE)
        Sigma_tilde <- crossprod(t(eig$vectors)/sqrt(eig$values))
        mu_tilde    <- Sigma_tilde%*%(BWY/sigma2)
        
        A1        <- t(eig$vectors)/sqrt(eig$values)
        beta[h,]  <- mu_tilde + c(matrix(rnorm(1 * NCOL(B)), 1, NCOL(B)) %*% A1)
      }
      
      # Predictions (the same for elements in the same cluster)
      predH[h,]               <- as.numeric(B%*%beta[h,])
      if(n_h > 0) pred[idx_h,]<- t(matrix(predH[h,],TT,n_h))
    }

    # Variance
    a_sigma2_tilde <- a_sigma + nTT/2
    b_sigma2_tilde <- b_sigma + sum((X - pred)^2,na.rm=TRUE)/2
    sigma2 <-  1 / rgamma(1, a_sigma2_tilde, b_sigma2_tilde)
    
    # Smoothing parameter
    a_lambda_tilde <- a_lambda + H*ncol(B)/2
    b_lambda_tilde <- b_lambda + sum(beta^2)/2
    lambda         <- rgamma(1, a_lambda_tilde, b_lambda_tilde)
    
    # Step  - Cluster allocation
    nu     <- nu_update(G, alpha, sigma, H)
    p      <- p_update(nu)
    G      <- G_update(X, predH, sigma2, p) 

    # Output
    if (r > burn_in) {
      beta_out[r - burn_in, , ]<- beta 
      sigma2_out[r - burn_in]  <- sigma2
      lambda_out[r - burn_in]  <- lambda
      pred_out                 <- pred/(r- burn_in + 1) + (r- burn_in)/(r- burn_in + 1)*pred_out
      n_cluster_out[r-burn_in] <- length(unique(G))
      G_out[r-burn_in, ]       <- G
    }
    
    if (verbose) {
      if(r%%10==0) cat(paste("Sampling iteration: ", r, "\n",sep=""))
    }
  }
  
  # Output
  out <- list(pred=pred_out, beta=beta_out, lambda = lambda_out, sigma2=sigma2_out, G = G_out, n_cluster=n_cluster_out)
  attr(out,"class") <- "BFDA_Gibbs"
  return(out)
}

#' @export
BayesBFda_Gibbs <- function(X,  H = 5, B = NULL, time = NULL,
                           alpha  = 1, sigma = 0, lambda = 100, a_sigma=1, b_sigma=1, 
                           R = 5000, burn_in = 1000, clust_init = 3, verbose = FALSE) {
  
  if(clust_init >= H) stop("The clust_init value is greater than the upper bound H")
  
  # Fixed quantities
  n <- NROW(X)
  TT <- NCOL(X)
  
  if(is.null(time)) {
    time <- 1:TT
    warning("The time dimension was not supplied and equally-spaced intervals are assumed.")
  }
  
  if(length(time) != TT) {
    stop("The length of time and the dimension of X must coincide.")
  }
  
  if(length(time) != nrow(B)) {
    stop("The length of time and the dimension of B must coincide.")
  }
  
  colnames(X)  <- time
  X            <- as.matrix(X) # If not already done, convert it onto a Matrix
  index_not_NA <- !is.na(X)
  nTT          <- sum(index_not_NA) # Number of observed values
  
  
  # Penalty term
  D   <- diag(NCOL(B))
  DtD <- crossprod(D)
  
  pred   <- matrix(0,n,TT)

  # Initialization settings
  if(verbose){ cat("Pre-allocating observations into groups...\n")}
  pre_clust       <- FB_clust(X=X,  H=clust_init, B=B, time = time, verbose=FALSE, prediction = TRUE)
  G               <- pre_clust$cluster
  
  # Pre allocating according to the cluster
  for(h in 1:clust_init){  
    idx_h <- G == h
    n_h   <- sum(idx_h)
    if(n_h > 0) pred[idx_h,]<- t(matrix(pre_clust$pred_cluster[h,],TT,n_h))
  }
  
  a_sigma2_tilde <- a_sigma + nTT/2
  b_sigma2_tilde <- b_sigma + sum((X - pred)^2,na.rm=TRUE)/2
  sigma2 <-  1 / rgamma(1, a_sigma2_tilde, b_sigma2_tilde)
  
  beta       <- matrix(0, H, NCOL(B))    
  predH      <- matrix(0,H,TT)
  
  # Verbose settings and output
  verbose_step <- ceiling(R / 50)
  beta_out     <- array(0,c(R,H,NCOL(B)))
  sigma2_out   <- numeric(R)
  pred_out     <- matrix(0,n,TT)
  n_cluster_out<- as.numeric(R)
  G_out        <- matrix(0,R,n)
  
  # Starting the Gibbs sampling
  if(verbose){ cat("Starting the Gibbs sampling...\n")}
  for (r in 1:(R + burn_in)) {
    
    # Step 2 - 3: performed within the cluster.
    for (h in 1:H) {
      
      # Subsetting observations
      idx_h <- G == h
      n_h  <- sum(idx_h)
      
      # Quantity of interest
      X_h   <- matrix(X[idx_h, ], n_h, TT)
      n_th  <- colSums(!is.na(X_h))
      
      # If there are no observations, sample from the prior
      if(sum(n_th) == 0) {
        beta[h,]    <- rnorm(NCOL(B),0,sqrt(1/lambda))
      } else {
        
        idx_hNA <- n_th!=0
        X_bar   <- colMeans(X_h,na.rm = TRUE)
        
        B_h   <- matrix(B[idx_hNA,],sum(idx_hNA), NCOL(B))
        BW    <- t(B_h*n_th[idx_hNA]); BWB   <- BW%*%B_h
        BWY   <- BW %*%X_bar[idx_hNA]
        
        eig         <- eigen(BWB/sigma2 + lambda*DtD, symmetric = TRUE)
        Sigma_tilde <- crossprod(t(eig$vectors)/sqrt(eig$values))
        mu_tilde    <- Sigma_tilde%*%(BWY/sigma2)
        
        A1        <- t(eig$vectors)/sqrt(eig$values)
        beta[h,]  <- mu_tilde + c(matrix(rnorm(1 * NCOL(B)), 1, NCOL(B)) %*% A1)
      }
      
      # Predictions (the same for elements in the same cluster)
      predH[h,]               <- as.numeric(B%*%beta[h,])
      if(n_h > 0) pred[idx_h,]<- t(matrix(predH[h,],TT,n_h))
    }
    
    # Variance
    a_sigma2_tilde <- a_sigma + nTT/2
    b_sigma2_tilde <- b_sigma + sum((X - pred)^2,na.rm=TRUE)/2
    sigma2 <-  1 / rgamma(1, a_sigma2_tilde, b_sigma2_tilde)
    
    # Step  - Cluster allocation
    nu     <- nu_update(G, alpha, sigma, H)
    p      <- p_update(nu)
    G      <- G_update(X, predH, sigma2, p) 
    
    # Output
    if (r > burn_in) {
      beta_out[r - burn_in, , ]<- beta 
      sigma2_out[r - burn_in]  <- sigma2
      pred_out                 <- pred/(r- burn_in + 1) + (r- burn_in)/(r- burn_in + 1)*pred_out
      n_cluster_out[r-burn_in] <- length(unique(G))
      G_out[r-burn_in, ]       <- G
    }
    
    if (verbose) {
      if(r%%10==0) cat(paste("Sampling iteration: ", r, "\n",sep=""))
    }
  }
  
  # Output
  out <- list(pred=pred_out, beta=beta_out, sigma2=sigma2_out, G = G_out, n_cluster=n_cluster_out)
  attr(out,"class") <- "BFDA_Gibbs"
  return(out)
}

#' @export
FSplines_clust <- function(X,  H, time = NULL, lambda = 1, a_sigma=1, b_sigma=1, inner_knots=min(round(length(time)/4),40), degree=3, dif = 1, verbose=FALSE, 
                           prediction=TRUE,...) {
  
  # Fixed quantities
  n <- NROW(X)
  TT <- NCOL(X)
  
  if(is.null(time)) {
    time <- 1:TT
    inner_knots = min(round(time/4),40)
    warning("The time dimension was not supplied and equally-spaced intervals are assumed.")
  }
  if(length(time) != TT) {
    stop("The length of time and the dimension of X must coincide.")
  }
  
  colnames(X)  <- time
  beta         <- NULL
  
  xl    <- min(time); xr <- max(time); dx <- (xr - xl) / (inner_knots-1)
  knots <- seq(xl - degree * dx, xr + degree * dx, by = dx)
  beta  <- matrix(0,n,length(knots) - degree - 1)
  
  if(prediction) smoothed_values <- matrix(0,n,TT) 
  
  for (i in 1:n) {
    id       <- !is.na(as.matrix(X[i,]))
    fit      <- Pspline_CM(time[id],X[i,id], lambda = lambda, a_sigma=a_sigma, b_sigma=b_sigma, knots = knots, degree=degree, dif = dif,...)
    beta[i,] <- as.numeric(fit$param$beta)
    if(prediction){
      smoothed_values[i,] <- c(predict(fit,newx=time))
    }
    if(verbose) cat(paste("Smoothing function: ", i, "\n",sep=""))
  }
  

  # Output
  out <- kmeans(beta,H)
  
  if(prediction){
    pred  <- matrix(0,H,TT)
    B     <- spline.des(fit$fit$knots, time, fit$fit$degree + 1, 0 * time, outer.ok=TRUE, sparse=TRUE)$design
    for(h in 1:H){
      pred[h,]  <- as.numeric(B %*% out$centers[h,])
    }
    out$pred_cluster <- pred
    out$prediction   <- smoothed_values
  }
    return(out)
}


#' @export
FB_clust <- function(X, H, B = NULL, time = NULL, verbose=FALSE, prediction=TRUE,...) {
  
  # Fixed quantities
  n  <- NROW(X)
  TT <- NCOL(X)
  
  if(is.null(time)) {
    time <- 1:TT
    warning("The time dimension was not supplied and equally-spaced intervals are assumed.")
  }
  if(length(time) != TT) {
    stop("The length of time and the dimension of X must coincide.")
  }
  
  if(length(time) != nrow(B)) {
    stop("The length of time and the dimension of B must coincide.")
  }
  
  colnames(X)  <- time
  beta         <- NULL
  
  beta  <- matrix(0,n, ncol(B))
  
  if(prediction) smoothed_values <- matrix(0,n,TT)
  
  for (i in 1:n) {
    id       <- !is.na(as.matrix(X[i,]))
    beta[i,] <- solve(crossprod(B[id,]), crossprod(B[id,],X[i,id]))
    if(prediction){
      smoothed_values[i,] <- c(B%*%beta[i,])
    }
    if(verbose) cat(paste("Smoothing function: ", i, "\n",sep=""))
  }
  
  # Output
  out <- kmeans(beta,H)
  
  if(prediction){
    pred  <- matrix(0,H,TT)
    for(h in 1:H){
      pred[h,]  <- as.numeric(B %*% out$centers[h,])
    }
    out$pred_cluster <- pred
    out$prediction   <- smoothed_values
  }
  return(out)
}