#' @export
BayesFda_Gibbs <- function(X, H = 5, time = NULL,
alpha = 1, sigma = 0,
a_lambda = 1, b_lambda = 1, a_sigma=1, b_sigma=1, inner_knots = min(round(NCOL(X)/4),40), degree=3,
R = 5000, burn_in = 1000, clust_init = 3, verbose = FALSE) {
if(clust_init >= H) stop("The clust_init value is greater than the upper bound H")
# Fixed quantities
n <- NROW(X)
TT <- NCOL(X)
if(is.null(time)) {
time <- 1:TT
warning("The time dimension was not supplied and equally-spaced intervals are assumed.")
}
if(length(time) != TT) {
stop("The length of time and the dimension of X must coincide.")
}
colnames(X) <- time
X <- as.matrix(X) # If not already done, convert it onto a Matrix
index_not_NA <- !is.na(X)
nTT <- sum(index_not_NA) # Number of observed values
# Smoothing splines settings
xl <- min(time); xr <- max(time); dx <- (xr - xl) / (inner_knots-1)
knots <- seq(xl - degree * dx, xr + degree * dx, by = dx) # Knots placement
B <- spline.des(knots, time, degree + 1, 0 * time, outer.ok=TRUE)$design
# Penalty term
D <- diag(NCOL(B))
DtD <- crossprod(D)
pred <- matrix(0,n,TT)
lambda <- a_lambda / b_lambda
# Initialization settings
if(verbose){ cat("Pre-allocating observations into groups...\n")}
pre_clust <- FSplines_clust(X=X, H=clust_init, time = time, lambda = lambda, a_sigma=a_sigma,
b_sigma=b_sigma, inner_knots=inner_knots, degree=degree, dif = 0,
verbose=FALSE, prediction = TRUE)
G <- pre_clust$cluster
# Pre allocating according to the cluster
for(h in 1:clust_init){
idx_h <- G == h
n_h <- sum(idx_h)
if(n_h > 0) pred[idx_h,]<- t(matrix(pre_clust$pred_cluster[h,],TT,n_h))
}
a_sigma2_tilde <- a_sigma + nTT/2
b_sigma2_tilde <- b_sigma + sum((X - pred)^2,na.rm=TRUE)/2
sigma2 <- 1 / rgamma(1, a_sigma2_tilde, b_sigma2_tilde)
beta <- matrix(0, H, NCOL(B))
predH <- matrix(0,H,TT)
# Verbose settings and output
verbose_step <- ceiling(R / 50)
beta_out <- array(0,c(R,H,NCOL(B)))
lambda_out <- numeric(R)
sigma2_out <- numeric(R)
pred_out <- matrix(0,n,TT)
n_cluster_out<- as.numeric(R)
G_out <- matrix(0,R,n)
# Starting the Gibbs sampling
if(verbose){ cat("Starting the Gibbs sampling...\n")}
for (r in 1:(R + burn_in)) {
# Step 2 - 3: performed within the cluster.
for (h in 1:H) {
# Subsetting observations
idx_h <- G == h
n_h <- sum(idx_h)
# Quantity of interest
X_h <- matrix(X[idx_h, ], n_h, TT)
n_th <- colSums(!is.na(X_h))
# If there are no observations, sample from the prior
if(sum(n_th) == 0) {
beta[h,] <- rnorm(NCOL(B),0,sqrt(1/lambda))
} else {
idx_hNA <- n_th!=0
X_bar <- colMeans(X_h,na.rm = TRUE)
B_h <- matrix(B[idx_hNA,],sum(idx_hNA), NCOL(B))
BW <- t(B_h*n_th[idx_hNA]); BWB <- BW%*%B_h
BWY <- BW %*%X_bar[idx_hNA]
eig <- eigen(BWB/sigma2 + lambda*DtD, symmetric = TRUE)
Sigma_tilde <- crossprod(t(eig$vectors)/sqrt(eig$values))
mu_tilde <- Sigma_tilde%*%(BWY/sigma2)
A1 <- t(eig$vectors)/sqrt(eig$values)
beta[h,] <- mu_tilde + c(matrix(rnorm(1 * NCOL(B)), 1, NCOL(B)) %*% A1)
}
# Predictions (the same for elements in the same cluster)
predH[h,] <- as.numeric(B%*%beta[h,])
if(n_h > 0) pred[idx_h,]<- t(matrix(predH[h,],TT,n_h))
}
# Variance
a_sigma2_tilde <- a_sigma + nTT/2
b_sigma2_tilde <- b_sigma + sum((X - pred)^2,na.rm=TRUE)/2
sigma2 <- 1 / rgamma(1, a_sigma2_tilde, b_sigma2_tilde)
# Smoothing parameter
a_lambda_tilde <- a_lambda + H*ncol(B)/2
b_lambda_tilde <- b_lambda + sum(beta^2)/2
lambda <- rgamma(1, a_lambda_tilde, b_lambda_tilde)
# Step - Cluster allocation
nu <- nu_update(G, alpha, sigma, H)
p <- p_update(nu)
G <- G_update(X, predH, sigma2, p)
# Output
if (r > burn_in) {
beta_out[r - burn_in, , ]<- beta
sigma2_out[r - burn_in] <- sigma2
lambda_out[r - burn_in] <- lambda
pred_out <- pred/(r- burn_in + 1) + (r- burn_in)/(r- burn_in + 1)*pred_out
n_cluster_out[r-burn_in] <- length(unique(G))
G_out[r-burn_in, ] <- G
}
if (verbose) {
if(r%%10==0) cat(paste("Sampling iteration: ", r, "\n",sep=""))
}
}
# Output
out <- list(pred=pred_out, beta=beta_out, lambda = lambda_out, sigma2=sigma2_out, G = G_out, n_cluster=n_cluster_out)
attr(out,"class") <- "BFDA_Gibbs"
return(out)
}
#' @export
BayesBFda_Gibbs <- function(X, H = 5, B = NULL, time = NULL,
alpha = 1, sigma = 0, lambda = 100, a_sigma=1, b_sigma=1,
R = 5000, burn_in = 1000, clust_init = 3, verbose = FALSE) {
if(clust_init >= H) stop("The clust_init value is greater than the upper bound H")
# Fixed quantities
n <- NROW(X)
TT <- NCOL(X)
if(is.null(time)) {
time <- 1:TT
warning("The time dimension was not supplied and equally-spaced intervals are assumed.")
}
if(length(time) != TT) {
stop("The length of time and the dimension of X must coincide.")
}
if(length(time) != nrow(B)) {
stop("The length of time and the dimension of B must coincide.")
}
colnames(X) <- time
X <- as.matrix(X) # If not already done, convert it onto a Matrix
index_not_NA <- !is.na(X)
nTT <- sum(index_not_NA) # Number of observed values
# Penalty term
D <- diag(NCOL(B))
DtD <- crossprod(D)
pred <- matrix(0,n,TT)
# Initialization settings
if(verbose){ cat("Pre-allocating observations into groups...\n")}
pre_clust <- FB_clust(X=X, H=clust_init, B=B, time = time, verbose=FALSE, prediction = TRUE)
G <- pre_clust$cluster
# Pre allocating according to the cluster
for(h in 1:clust_init){
idx_h <- G == h
n_h <- sum(idx_h)
if(n_h > 0) pred[idx_h,]<- t(matrix(pre_clust$pred_cluster[h,],TT,n_h))
}
a_sigma2_tilde <- a_sigma + nTT/2
b_sigma2_tilde <- b_sigma + sum((X - pred)^2,na.rm=TRUE)/2
sigma2 <- 1 / rgamma(1, a_sigma2_tilde, b_sigma2_tilde)
beta <- matrix(0, H, NCOL(B))
predH <- matrix(0,H,TT)
# Verbose settings and output
verbose_step <- ceiling(R / 50)
beta_out <- array(0,c(R,H,NCOL(B)))
sigma2_out <- numeric(R)
pred_out <- matrix(0,n,TT)
n_cluster_out<- as.numeric(R)
G_out <- matrix(0,R,n)
# Starting the Gibbs sampling
if(verbose){ cat("Starting the Gibbs sampling...\n")}
for (r in 1:(R + burn_in)) {
# Step 2 - 3: performed within the cluster.
for (h in 1:H) {
# Subsetting observations
idx_h <- G == h
n_h <- sum(idx_h)
# Quantity of interest
X_h <- matrix(X[idx_h, ], n_h, TT)
n_th <- colSums(!is.na(X_h))
# If there are no observations, sample from the prior
if(sum(n_th) == 0) {
beta[h,] <- rnorm(NCOL(B),0,sqrt(1/lambda))
} else {
idx_hNA <- n_th!=0
X_bar <- colMeans(X_h,na.rm = TRUE)
B_h <- matrix(B[idx_hNA,],sum(idx_hNA), NCOL(B))
BW <- t(B_h*n_th[idx_hNA]); BWB <- BW%*%B_h
BWY <- BW %*%X_bar[idx_hNA]
eig <- eigen(BWB/sigma2 + lambda*DtD, symmetric = TRUE)
Sigma_tilde <- crossprod(t(eig$vectors)/sqrt(eig$values))
mu_tilde <- Sigma_tilde%*%(BWY/sigma2)
A1 <- t(eig$vectors)/sqrt(eig$values)
beta[h,] <- mu_tilde + c(matrix(rnorm(1 * NCOL(B)), 1, NCOL(B)) %*% A1)
}
# Predictions (the same for elements in the same cluster)
predH[h,] <- as.numeric(B%*%beta[h,])
if(n_h > 0) pred[idx_h,]<- t(matrix(predH[h,],TT,n_h))
}
# Variance
a_sigma2_tilde <- a_sigma + nTT/2
b_sigma2_tilde <- b_sigma + sum((X - pred)^2,na.rm=TRUE)/2
sigma2 <- 1 / rgamma(1, a_sigma2_tilde, b_sigma2_tilde)
# Step - Cluster allocation
nu <- nu_update(G, alpha, sigma, H)
p <- p_update(nu)
G <- G_update(X, predH, sigma2, p)
# Output
if (r > burn_in) {
beta_out[r - burn_in, , ]<- beta
sigma2_out[r - burn_in] <- sigma2
pred_out <- pred/(r- burn_in + 1) + (r- burn_in)/(r- burn_in + 1)*pred_out
n_cluster_out[r-burn_in] <- length(unique(G))
G_out[r-burn_in, ] <- G
}
if (verbose) {
if(r%%10==0) cat(paste("Sampling iteration: ", r, "\n",sep=""))
}
}
# Output
out <- list(pred=pred_out, beta=beta_out, sigma2=sigma2_out, G = G_out, n_cluster=n_cluster_out)
attr(out,"class") <- "BFDA_Gibbs"
return(out)
}
#' @export
FSplines_clust <- function(X, H, time = NULL, lambda = 1, a_sigma=1, b_sigma=1, inner_knots=min(round(length(time)/4),40), degree=3, dif = 1, verbose=FALSE,
prediction=TRUE,...) {
# Fixed quantities
n <- NROW(X)
TT <- NCOL(X)
if(is.null(time)) {
time <- 1:TT
inner_knots = min(round(time/4),40)
warning("The time dimension was not supplied and equally-spaced intervals are assumed.")
}
if(length(time) != TT) {
stop("The length of time and the dimension of X must coincide.")
}
colnames(X) <- time
beta <- NULL
xl <- min(time); xr <- max(time); dx <- (xr - xl) / (inner_knots-1)
knots <- seq(xl - degree * dx, xr + degree * dx, by = dx)
beta <- matrix(0,n,length(knots) - degree - 1)
if(prediction) smoothed_values <- matrix(0,n,TT)
for (i in 1:n) {
id <- !is.na(as.matrix(X[i,]))
fit <- Pspline_CM(time[id],X[i,id], lambda = lambda, a_sigma=a_sigma, b_sigma=b_sigma, knots = knots, degree=degree, dif = dif,...)
beta[i,] <- as.numeric(fit$param$beta)
if(prediction){
smoothed_values[i,] <- c(predict(fit,newx=time))
}
if(verbose) cat(paste("Smoothing function: ", i, "\n",sep=""))
}
# Output
out <- kmeans(beta,H)
if(prediction){
pred <- matrix(0,H,TT)
B <- spline.des(fit$fit$knots, time, fit$fit$degree + 1, 0 * time, outer.ok=TRUE, sparse=TRUE)$design
for(h in 1:H){
pred[h,] <- as.numeric(B %*% out$centers[h,])
}
out$pred_cluster <- pred
out$prediction <- smoothed_values
}
return(out)
}
#' @export
FB_clust <- function(X, H, B = NULL, time = NULL, verbose=FALSE, prediction=TRUE,...) {
# Fixed quantities
n <- NROW(X)
TT <- NCOL(X)
if(is.null(time)) {
time <- 1:TT
warning("The time dimension was not supplied and equally-spaced intervals are assumed.")
}
if(length(time) != TT) {
stop("The length of time and the dimension of X must coincide.")
}
if(length(time) != nrow(B)) {
stop("The length of time and the dimension of B must coincide.")
}
colnames(X) <- time
beta <- NULL
beta <- matrix(0,n, ncol(B))
if(prediction) smoothed_values <- matrix(0,n,TT)
for (i in 1:n) {
id <- !is.na(as.matrix(X[i,]))
beta[i,] <- solve(crossprod(B[id,]), crossprod(B[id,],X[i,id]))
if(prediction){
smoothed_values[i,] <- c(B%*%beta[i,])
}
if(verbose) cat(paste("Smoothing function: ", i, "\n",sep=""))
}
# Output
out <- kmeans(beta,H)
if(prediction){
pred <- matrix(0,H,TT)
for(h in 1:H){
pred[h,] <- as.numeric(B %*% out$centers[h,])
}
out$pred_cluster <- pred
out$prediction <- smoothed_values
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.