Nothing
#' Simulate Multivariate Crop Yield Data Using a Generalized Copula-BSTS Model Without GEV Covariates
#'
#' This function simulates multivariate crop yield data using a time-varying copula
#' combined with Bayesian Structural Time Series (BSTS) models without GEV covariates
#' for comparision.
#'
#' @param nsim Integer, number of simulation replications.
#' @param n_train Integer, number of training observations.
#' @param n_test Integer, number of test observations.
#' @param copula Character, specifying the copula type: "Clayton", "Frank",
#' "Gumbel", "Joe", or "Gaussian".
#' @param init_params Numeric vector, initial parameter values for optimization.
#' @param fn Function, log-likelihood function for parameter estimation.
#' @param U_train Numeric matrix (n_train x D), pseudo-observations for the copula.
#' @param Z_train Numeric array (n_train x D x M), observed data for each margin and sub-feature.
#' @param Z_test Numeric array (n_test x D x M), observed data for each margin and sub-feature.
#' @param X_train Numeric matrix (n_train x M), risk factors for the dynamic copula parameter.
#' @param X_test Numeric matrix (n_test x M), risk factors for the dynamic copula parameter.
#' @param Y_test Numeric matrix (n_test x D), true future values for MSE calculation.
#' @param BSTS_list List of length D, each element is a BSTS model for a different margin.
#' @return A list containing:
#' \item{optim_results}{Results from the optimization process.}
#' \item{theta_sim}{Simulated copula parameters across replications.}
#' \item{Y_sim}{Simulated final BSTS-based forecasts.}
#' \item{MSE}{Mean squared error for each simulation run.}
#' @importFrom stats cor optim rnorm dnorm qnorm predict
#' @importFrom copula claytonCopula frankCopula gumbelCopula joeCopula normalCopula rCopula
#' @importFrom utils combn
#' @import bsts
#' @export
simul.fun.noGEV <- function(
nsim = 100,
n_train,
n_test,
copula,
init_params,
fn, # log-likelihood-noGEV
U_train, # dimension: (n_train x D)
Z_train, # dimension: (n_train x D x M)
Z_test, # dimension: (n_test x D x M)
X_train, # dimension: (n_train x M)
X_test, # dimension: (n_test x M)
Y_test, # dimension: (n_test x D) matrix of true future values for MSE
BSTS_list # a list of length D, each a BSTS model for margin d
){
#####################################################################
## PART 1: Basic checks & define dimension D, M
#####################################################################
supported_copulas <- c("Clayton","Frank","Gumbel","Joe","Gaussian")
if (!copula %in% supported_copulas) {
stop("Invalid copula provided. Must be one of: ", paste(supported_copulas, collapse=", "))
}
D <- ncol(U_train) # number of margins in the copula
M <- dim(Z_test)[3] # number of covariates in each margin
#####################################################################
## PART 2: Optimize log-likelihood 'fn' to get best-fit parameters
#####################################################################
optim_results <- stats::optim(
par = init_params,
fn = fn,
U = U_train,
Z = Z_train,
X = X_train,
copula = copula)
par_hat <- optim_results$par
#####################################################################
## PART 3: Parse Copula & GEV Parameters
#####################################################################
# First chunk: (omega, alpha, gamma[1..M or p])
omega <- par_hat[1]
alpha <- par_hat[2]
# Suppose gamma has length M (like your 4D code logic).
gamma <- par_hat[3:(2 + M)]
#####################################################################
## PART 4: Allocate arrays for simulation
#####################################################################
# We'll store final BSTS-based forecasts in a 3D array: (nsim x n_test x D).
Y_sim <- array(NA, dim=c(nsim, n_test, D))
# We'll store MSE in a vector of length nsim.
MSE <- numeric(nsim)
# Copula parameter storage:
if (copula == "Gaussian"&& D>2) {
nPairs <- D*(D-1)/2
# We'll store a param for each replicate i, each time t, each pair p:
theta_mat <- array(NA, dim=c(nsim, n_test, nPairs))
} else {
# For archimedean: one param per replicate i, per time t
theta_vec <- matrix(NA, nrow=nsim, ncol=n_test)
}
#####################################################################
## PART 5: Initialize Copula param (time=1)
#####################################################################
# 5B) Copula param for t=1
if (copula == "Clayton") {
theta_vec[,1] <- clayton.theta(mean(cor(U_train, method="kendall")))
} else if (copula == "Frank") {
theta_vec[, 1] <- frank.theta(mean(cor(U_train, method ="kendall")))
} else if (copula == "Gumbel") {
theta_vec[, 1] <- GH.theta(mean(cor(U_train, method= "kendall")))
} else if (copula == "Joe") {
theta_vec[, 1] <- joe.theta(mean(cor(U_train, method= "kendall")))
} else if (copula == "Gaussian"&&D>2) {
pair_idx <- combn(D, 2)
for (p_idx in seq_len(ncol(pair_idx))) {
i_d <- pair_idx[1,p_idx]
j_d <- pair_idx[2,p_idx]
theta_mat[, 1, p_idx] <- cor(U_train[, i_d], U_train[, j_d], method="pearson")
}
}else if (copula == "Gaussian"&&D==2){
theta_vec[, 1] <- cor(U_train[,1],U_train[,2], method="pearson")
}
#####################################################################
## PART 6: Main loop: (i in 1..nsim), (t in 2..n_test)
#####################################################################
for (i_sim in seq_len(nsim)) {
for (t_idx in 2:n_test) {
# Copula param update
if (copula == "Clayton") {
old_th <- theta_vec[i_sim, t_idx-1]
raw_val <- omega + alpha * old_th + sum( X_test[t_idx-1,] * gamma )
theta_vec[i_sim, t_idx] <- abs(raw_val)
} else if (copula == "Frank") {
old_th <- theta_vec[i_sim, t_idx-1]
raw_val <- omega + alpha * old_th + sum( X_test[t_idx-1,] * gamma )
theta_vec[i_sim, t_idx] <- abs(raw_val)
} else if (copula == "Gumbel") {
old_th <- theta_vec[i_sim, t_idx-1]
raw_val <- abs(omega + alpha * old_th + sum( X_test[t_idx-1,] * gamma )) + 1
theta_vec[i_sim, t_idx] <- raw_val
} else if (copula == "Joe") {
old_th <- theta_vec[i_sim, t_idx-1]
raw_val <- abs(omega + alpha * old_th + sum( X_test[t_idx-1,] * gamma )) + 1
theta_vec[i_sim, t_idx] <- raw_val
} else if (copula == "Gaussian"&& D>2) {
old_theta <- theta_mat[i_sim, t_idx-1, ] # Previous theta values for all correlation pairs
pair_idx <- combn(D, 2) # Generate all unique region pairs (D choose 2)
num_pairs <- ncol(pair_idx) # Number of correlation pairs
# Loop over each correlation pair
for (pair in seq_len(num_pairs)) {
r1 <- pair_idx[1, pair] # First region in the pair
r2 <- pair_idx[2, pair] # Second region in the pair
# Compute the maximum for each risk factor across the two regions
# This gives a vector of length M (one per risk factor)
max_z_vals <- pmax(Z_test[t_idx-1,r1,],
Z_test[t_idx-1,r2,])
# Calculate raw value by summing gamma_k * (max over regions for risk factor k)
raw_val <- omega + alpha * old_theta[pair] + sum( max_z_vals * gamma )
# Update the correlation coefficient via tanh transformation
theta_mat[i_sim, t_idx, pair] <- tanh(raw_val)
}
} else if (copula == "Gaussian" && D==2){
old_th <- theta_vec[i_sim, t_idx-1]
raw_val <- tanh(omega + alpha * old_th + sum( pmax(Z_test[t_idx-1,1,],Z_test[t_idx-1,2,]) * gamma ))
theta_vec[i_sim, t_idx] <- raw_val
}
} # end for t_idx
###################################################################
## PART 7: Now sample from copula -> BSTS -> final forecasts
###################################################################
niter_bsts <- BSTS_list[[1]]$niter
u_sim <- matrix(NA, nrow=niter_bsts, ncol=D*n_test)
# build an index for columns
col_index_base <- seq(1, D*n_test - (D-1), by=D)
# For each t in 1..n_test, sample from copula param => store columns
for (t_idx in seq_len(n_test)) {
# build the copula object for replicate i_sim at time t_idx
if (copula == "Clayton") {
param_now <- theta_vec[i_sim, t_idx]
cop_obj <- copula::claytonCopula(param=param_now, dim=D)
} else if (copula == "Frank") {
param_now <- theta_vec[i_sim, t_idx]
cop_obj <- copula::frankCopula(param=param_now, dim=D)
} else if (copula == "Gumbel") {
param_now <- theta_vec[i_sim, t_idx]
cop_obj <- copula::gumbelCopula(param=param_now, dim=D)
} else if (copula == "Joe") {
param_now <- theta_vec[i_sim, t_idx]
cop_obj <- copula::joeCopula(param=param_now, dim=D)
} else if (copula == "Gaussian" && D>2) {
# build correlation matrix from theta_mat[i_sim, t_idx, ]
R <- diag(D)
pair_idx <- combn(D,2)
for (p_idx in seq_len(nPairs)) {
i_d <- pair_idx[1,p_idx]
j_d <- pair_idx[2,p_idx]
R[i_d, j_d] <- theta_mat[i_sim, t_idx, p_idx]
R[j_d, i_d] <- theta_mat[i_sim, t_idx, p_idx]
}
cop_obj <- copula::normalCopula(param = R[upper.tri(R)], dim=D, dispstr="un")
} else if (copula == "Gaussian" && D==2){
param_now <- theta_vec[i_sim, t_idx]
cop_obj <- copula::normalCopula(param=param_now, dim=D, dispstr = "un")
}
U_block <- copula::rCopula(niter_bsts, cop_obj) # (niter_bsts x D)
# store columns
col_start <- col_index_base[t_idx]
for (d_idx in seq_len(D)) {
u_sim[, col_start + (d_idx-1)] <- U_block[, d_idx]
}
}
# Now we convert each margin's columns to normal eps, call BSTS, combine
# We'll fill Y_sim[i_sim, , d_idx].
for (d_idx in seq_len(D)) {
# Identify the corresponding column index in u_sim
col_here <- col_index_base + (d_idx-1)
# Generate the noise term from the copula-based residuals
eps_draw <- stats::qnorm(u_sim[, col_here], mean=0, sd=BSTS_list[[d_idx]]$sigma.obs)
# Extract covariates only for the current t_idx
# z_new <- t(do.call(rbind, lapply(Z_sim[d_idx], function(mat) mat[i_sim, t_idx, ])))
# z_new <- Z_sim[i_sim, , d_idx, ]
# Predict using BSTS with covariates
predDist_d <- predict(BSTS_list[[d_idx]], horizon=1, newdata=Z_test[,d_idx,], burn=500)$distribution
# Apply the noise term
predictions_d <- apply(predDist_d, 2, function(col, sds) {
col - stats::rnorm(1, mean = 0, sd = sds)
}, sds = BSTS_list[[d_idx]]$sigma.obs)
# Y_sim[i_sim, , d_idx] <- colMeans(predDist_d) + colMeans(eps_draw)
Y_sim[i_sim, , d_idx] <- predictions_d[sample(1),] + eps_draw[sample(1),]
}
sqerr <- 0
for (d_idx in seq_len(D)) {
sqerr <- sqerr + (Y_sim[i_sim, , d_idx] - Y_test[,d_idx])^2
}
MSE[i_sim] <- mean(sqerr)
} # end for i_sim
#####################################################################
## PART 8: Return final results
#####################################################################
res_list <- list(
optim_results = optim_results,
# if Gaussian => return theta_mat, else => theta_vec
theta_sim = if (copula=="Gaussian"&&D>2) theta_mat else theta_vec,
# Z_sim = Z_sim,
Y_sim = Y_sim,
MSE = MSE
)
return(res_list)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.