##################################################
# Estimation Class
##################################################
DISTAR <- R6Class("DISTAR",
public = list(
count = NULL,
spatial_dep = NULL,
temporal_dep = NULL,
outcome_dep = NULL,
N = NULL,
TT = NULL,
G = NULL,
NGT = NULL,
X = NULL,
X.ls = NULL,
y = NULL,
W_t = NULL,
W.ls = NULL,
TM.ls = NULL,
O.ls = NULL,
n_pairs = NULL,
lambda_map.ls = NULL,
rho_vec = NULL,
gamma_vec = NULL,
lambda_vec = NULL,
sigma2_vec = NULL,
beta.ls = NULL,
ystar_sample = NULL,
kern_obj = NULL,
ldA_fun = NULL,
outcome_index_vec = NULL,
VC_mat = NULL,
initialize = function(X.ls, y.ls, W_t, N, G, TT, count,
spatial_dep = FALSE, temporal_dep = FALSE, outcome_dep = FALSE) {
## Check inputs
if (TT == 1 & temporal_dep) {
warning("You requested temporal dependence estimation, but the data only cover a single period.
Temporal dependence will be set to FALSE.")
temporal_dep <- FALSE
}
if (G == 1 & outcome_dep) {
warning("You requested outcome dependence estimation, but the data only cover a single outcome.
Outcome dependence will be set to FALSE.")
outcome_dep <- FALSE
}
## Assign data
self$X <- make_X(X_list = X.ls, N = N, TT = TT)
self$X.ls <- X.ls
self$W_t <- W_t
## Make y vector
y <- c()
for (t in 1:TT) {
start <- (t-1)*N + 1
end <- start + N - 1
for (k in 1:G) {
y <- c(y, y.ls[[k]][start:end])
}
}
self$y <- y
## Assign outcome switch
self$count <- count
## Assign dependence switches
self$spatial_dep <- spatial_dep
self$temporal_dep <- temporal_dep
self$outcome_dep <- outcome_dep
## Assign data dimensions
self$N <- N
self$TT <- TT
self$G <- G
self$NGT <- N*G*TT
## Make W.ls: One NGTxNGT matrix per outcome
self$W.ls <- make_spatial_weights(W_t, G, TT)
## Make TM.ls: One NGTxNGT matrix per outcome
# NOTE: If TT = 1, returns G NGxNG matrices of zeros
self$TM.ls <- make_temporal_weights(N, G, TT)
## Make O.ls: One NGTxNGT matrix per outcome-pair
# NOTE: If G = 1, returns a single NTxNT matrix of zeros
self$O.ls <- make_outcome_weights(N, G, TT)
self$n_pairs <- length(self$O.ls) # set to 1 if G = 1
## Map lambdas/Os onto outcomes
if (self$G > 1) {
pair_mat <- combn(self$G, 2)
self$lambda_map.ls <- vector('list', self$G)
for (i in 1:self$G) {
self$lambda_map.ls[[i]] <- c(which(pair_mat[1,]==i), which(pair_mat[2,]==i))
}
} else {
self$lambda_map.ls <- rep(list(1), self$G)
}
## Initialize interdependence parameters
self$rho_vec <- rep(0, self$G)
self$gamma_vec <- rep(0, self$G)
self$lambda_vec <- rep(0, self$n_pairs)
## Initialize variance
self$sigma2_vec <- rep(1, self$G)
## Initialize beta
beta.ls <- vector('list', self$G)
for (k in 1:self$G) {
beta.ls[[k]] <- rep(0, ncol(X.ls[[k]]))
}
self$beta.ls <- beta.ls
## Initialize ystar
self$ystar_sample <- matrix(0, 1, N*TT*G)
## Make the kernel compute object
self$kern_obj <- Kernel$new(self$W.ls, self$TM.ls, self$O.ls, self$X, self$N*self$G)
## Make the outcome index vector
# Indicating outcome per y position
self$outcome_index_vec <- rep(rep(1:self$G, each = self$N), self$TT)
## Setup of log|A| function
# If G = 1, we can use lookup to eval log|A|
# Here we prepare the lookup function
if (G == 1) {
print(paste('Setting up log-determinants...'))
flush.console()
rho_seq <- seq(-0.99, 0.99, length = 20)
ldA_seq <- rep(0, length(rho_seq))
for (j in 1:length(rho_seq)) {
ldA_seq[j] <- self$TT * Matrix::determinant(.sparseDiagonal(self$N) - rho_seq[j]*self$W_t)$modulus
}
self$ldA_fun <- splinefun(x = rho_seq, y = ldA_seq)
print(paste('Done.'))
flush.console()
}
},
get_A = function() {
# Returns the NGTxNGT inverse dependence multiplier with class params
A <- .sparseDiagonal(self$NGT)
for (k in 1:self$G) {
A <- A - self$rho_vec[k]*self$W.ls[[k]] - self$gamma_vec[k]*self$TM.ls[[k]]
}
for (k in 1:self$n_pairs) {
A <- A - self$O.ls[[k]]*self$lambda_vec[k]
}
return(A)
},
sample_ystar = function(M, ystar_init) {
if (self$count) {
self$sample_ystar_count(M, ystar_init)
} else {
self$sample_ystar_binary(M, ystar_init)
}
self$kern_obj$update_ystar_sample(self$ystar_sample)
},
sample_ystar_count = function(M, ystar_init) {
# Get parameters
A <- self$get_A()
Xbeta <- self$X%*%unlist(self$beta.ls)
mu <- scipy$sparse$linalg$bicgstab(r_to_py(A), Xbeta)[[1]]
inv_sigma2_long <- (1/self$sigma2_vec)[self$outcome_index_vec]
invSigma <- .sparseDiagonal(self$NGT)*inv_sigma2_long
H <- t(A)%*%invSigma%*%A
# Sample
sampler <- arscpp::mplnposterior(self$y, as.vector(mu), H)
smp <- sampler$sample(M, ystar_init)
# Assign
self$ystar_sample <- smp
},
sample_ystar_binary = function(M, ystar_init) {
# Get parameters
A <- self$get_A()
Xbeta <- self$X%*%unlist(self$beta.ls)
mu <- scipy$sparse$linalg$bicgstab(r_to_py(A), Xbeta)[[1]]
inv_sigma2_long <- (1/self$sigma2_vec)[self$outcome_index_vec]
invSigma <- .sparseDiagonal(self$NGT)*inv_sigma2_long
H <- t(A)%*%invSigma%*%A
# Set limits
lower <- rep(0, length(self$y))
lower[is.na(self$y) | self$y == 0] <- -Inf
upper <- rep(0, length(self$y))
upper[is.na(self$y) | self$y == 1] <- Inf
# Sample
smp <- rtmvnorm.sparseMatrix(M, mean = as.vector(mu), H = H,
lower = lower, upper = upper,
start.value = ystar_init)
# Assign
self$ystar_sample <- smp
},
pack_theta = function() {
## Get vector containing all parameters to be estimated
theta <- c(unlist(self$beta.ls))
if (self$outcome_dep) {
theta <- c(theta, atanh(self$lambda_vec))
}
if (self$count) {
theta <- c(theta, asoftplus(self$sigma2_vec))
}
if (self$spatial_dep) {
theta <- c(theta, atanh(self$rho_vec))
}
if (self$temporal_dep) {
theta <- c(theta, atanh(self$gamma_vec))
}
return(theta)
},
unpack_theta = function(theta) {
## Takes the packed theta vector and returns a list with all parameters as attribues
K <- length(unlist(self$beta.ls))
beta <- theta[1:K]
beta.ls <- vector('list', self$G)
idx <- 1
for (k in 1:self$G) {
K.k <- length(self$beta.ls[[k]])
beta.ls[[k]] <- beta[idx:(idx+K.k-1)]
idx <- idx + K.k
}
idx <- K+1
if (self$outcome_dep) {
idx_end <- idx + self$n_pairs - 1
lambda_vec <- tanh(theta[idx:idx_end])
idx <- idx_end + 1
} else {
lambda_vec <- rep(0, self$n_pairs)
}
if (self$count) {
idx_end <- idx + self$G - 1
sigma2_vec <- softplus(theta[idx:idx_end])
idx <- idx_end + 1
} else {
sigma2_vec <- rep(1, self$G)
}
if (self$spatial_dep) {
idx_end <- idx + self$G - 1
rho_vec <- tanh(theta[idx:idx_end])
idx <- idx_end + 1
} else {
rho_vec <- rep(0, self$G)
}
if (self$temporal_dep) {
idx_end <- idx + self$G - 1
gamma_vec <- tanh(theta[idx:idx_end])
idx <- idx_end + 1
} else {
gamma_vec <- rep(0, self$G)
}
out <- list(beta.ls = beta.ls, sigma2_vec = sigma2_vec,
rho_vec = rho_vec, gamma_vec = gamma_vec,
lambda_vec = lambda_vec)
return(out)
},
transform_theta = function(theta) {
## Takes the packed theta vector and returns a theta vector where all
## transformed params are returned to their original support
# i.e. from atanh(rho) to rho
transformed_theta <- rep(NA, length(theta))
K <- length(unlist(self$beta.ls))
transformed_theta[1:K] <- theta[1:K]
idx <- K+1
if (self$outcome_dep) {
idx_end <- idx + self$n_pairs - 1
transformed_theta[idx:idx_end] <- tanh(theta[idx:idx_end])
idx <- idx_end + 1
}
if (self$count) {
idx_end <- idx + self$G - 1
transformed_theta[idx:idx_end] <- softplus(theta[idx:idx_end])
idx <- idx_end + 1
}
if (self$spatial_dep) {
idx_end <- idx + self$G - 1
transformed_theta[idx:idx_end] <- tanh(theta[idx:idx_end])
idx <- idx_end + 1
}
if (self$temporal_dep) {
idx_end <- idx + self$G - 1
transformed_theta[idx:idx_end] <- tanh(theta[idx:idx_end])
}
return(transformed_theta)
},
get_ldA = function(rho_vec, lambda_vec) {
## Get the log determinant of A
if (self$G == 1) {
ldA <- self$ldA_fun(rho_vec[1])
} else {
A_t <- self$kern_obj$make_A_t(rho_vec, lambda_vec)
# lud <- lu(A_t)
# ldA <- self$TT*(sum(log(diag(lud@L))) + sum(log(diag(lud@U))))
ldA <- self$TT*determinant(A_t)$modulus
}
return(ldA)
},
E_llik = function(theta) {
par <- self$unpack_theta(theta)
beta.ls <- par$beta.ls
sigma2_vec <- par$sigma2_vec
rho_vec <- par$rho_vec
gamma_vec <- par$gamma_vec
lambda_vec <- par$lambda_vec
## Safety check
if (any(!is.finite(1/sigma2_vec))) {
return(NaN)
}
## Compute expected kernel
inv_sigma2_long <- (1/sigma2_vec)[self$outcome_index_vec]
kern <- self$kern_obj$get_E_kernel(rho_vec, gamma_vec, lambda_vec,
inv_sigma2_long, unlist(beta.ls))
## Compute log determinant
ldA <- self$get_ldA(rho_vec, lambda_vec)
## Expected likelihood
sigma2_long <- (sigma2_vec)[self$outcome_index_vec]
ll <- ldA - sum(log(sqrt(sigma2_long))) - 0.5*kern
return(-1*ll)
},
check_constraint = function(theta) {
par <- self$unpack_theta(theta)
constraint_vec <- rep(NA, self$G)
for (k in 1:self$G) {
lambda_k <- sum(par$lambda_vec[self$lambda_map.ls[[k]]])
constraint_vec[k] <- par$rho_vec[k] + lambda_k # + par$gamma_vec[k]
}
return(constraint_vec)
},
update_theta = function() {
theta_init <- self$pack_theta()
ineqLB <- rep(-1, self$G)
ineqUB <- rep(1, self$G)
fit <- solnp(pars = theta_init, fun = self$E_llik,
ineqfun = self$check_constraint,
ineqLB = ineqLB, ineqUB = ineqUB,
control = list(trace = 0))
par <- self$unpack_theta(fit$pars)
self$beta.ls <- par$beta.ls
self$sigma2_vec <- par$sigma2_vec
self$rho_vec <- par$rho_vec
self$gamma_vec <- par$gamma_vec
self$lambda_vec <- par$lambda_vec
},
train = function(maxiter = 20, M = 50, abs_tol = 1e-3, soft_init = TRUE, burnin = 50, thinning = 1, verbose = FALSE) {
# Learn the parameters
iter <- 0
change_vec <- c()
max_change <- Inf
while (iter < maxiter & max_change > abs_tol) {
## Record params from last iter
theta_old <- self$pack_theta()
## In first itertion: initialize using only spatial dep
# Otherwise coordinate ascent tends to get stuck in local minimum
if (iter == 0 & soft_init) {
# Sample
Ms <- burnin + M*thinning
self$sample_ystar(M = Ms, ystar_init = colMeans(self$ystar_sample))
Mseq <- seq(burnin+1, Ms, by = thinning)
self$ystar_sample <- self$ystar_sample[Mseq,]
# Shut off temporal and outcome dep
od <- self$outcome_dep
self$outcome_dep <- FALSE
td <- self$temporal_dep
self$temporal_dep <- FALSE
# Update params
self$update_theta()
# Set temporal and outcome flags back to original state
self$outcome_dep <- od
self$temporal_dep <- td
}
## Train regularly and record parameter changes
Ms <- burnin + M*thinning
self$sample_ystar(M = Ms, ystar_init = colMeans(self$ystar_sample))
Mseq <- seq(burnin+1, Ms, by = thinning)
self$ystar_sample <- self$ystar_sample[Mseq,]
self$update_theta()
max_change <- max(abs(self$pack_theta() - theta_old))
change_vec <- c(change_vec, max_change)
iter <- iter + 1
if (verbose) {
print(paste0("Iteration ", iter, "; Max. parameter change = ", round(max_change, 3)))
flush.console()
}
}
return(change_vec)
},
llik_i = function(theta, i) {
par <- self$unpack_theta(theta)
beta.ls <- par$beta.ls
sigma2_vec <- par$sigma2_vec
rho_vec <- par$rho_vec
gamma_vec <- par$gamma_vec
lambda_vec <- par$lambda_vec
## Safety check
if (any(!is.finite(1/sigma2_vec))) {
return(NaN)
}
## Compute expected kernel
inv_sigma2_long <- (1/sigma2_vec)[self$outcome_index_vec]
kern <- self$kern_obj$get_kernel_i(rho_vec, gamma_vec, lambda_vec,
inv_sigma2_long, unlist(beta.ls), i-1)
## Compute log determinant
ldA <- self$get_ldA(rho_vec, lambda_vec)
## Expected likelihood
sigma2_long <- (sigma2_vec)[self$outcome_index_vec]
ll <- ldA - sum(log(sqrt(sigma2_long))) - 0.5*kern
return(ll)
},
get_gradient = function(theta, i) {
# Get gradient at a single sample
gr <- grad(func = self$llik_i, x = theta, method = 'simple',
i = i)
return(gr)
},
compute_vcov = function(M, thinning = 2, verbose = FALSE) {
## See Louis (1982): "Finding the observed information matrix when using the EM algorithm"
self$sample_ystar(M*thinning, ystar_init = colMeans(self$ystar_sample))
Mseq <- seq(1, M*thinning, by = thinning)
self$ystar_sample <- self$ystar_sample[Mseq,]
theta <- self$pack_theta()
grad.mat <- matrix(NA, M, length(theta))
hess.ls <- vector('list', M)
for (i in 1:M) {
gr <- self$get_gradient(theta, i)
hs <- nl.jacobian(theta, self$get_gradient, i = i)
grad.mat[i,] <- gr
hess.ls[[i]] <- hs
if (verbose) {
print(i)
flush.console()
}
}
# Get observed information matrix
IM <- -1*Reduce('+', hess.ls)/M - var(grad.mat)
# Get variance estimate for parameters
VC <- solve(Matrix(IM)) # VC is inverse of observed Fisher Information
VC_pd <- as.matrix(nearPD(VC)$mat) # Force positive definite
self$VC_mat <- VC_pd
},
get_theta_names = function() {
theta_names <- c()
for (k in 1:self$G) {
if (is.null(colnames(self$X.ls[[k]]))) {
beta_names <- paste0(paste0('beta_', k, '_', 0:(ncol(self$X.ls[[k]])-1)))
} else {
beta_names <- colnames(self$X.ls[[k]])
}
theta_names <- c(theta_names, beta_names)
}
if (self$outcome_dep) {
theta_names <- c(theta_names, paste0("lambda", 1:self$n_pairs))
}
if (self$count) {
theta_names <- c(theta_names, paste0("sigma2_", 1:self$G))
}
if (self$spatial_dep) {
theta_names <- c(theta_names, paste0("rho_", 1:self$G))
}
if (self$temporal_dep) {
theta_names <- c(theta_names, paste0("gamma_", 1:self$G))
}
return(theta_names)
},
coeftest = function(S = 1000, print_out = TRUE) {
## Does two things:
# 1. Uses MC simulation to get VC matrix for transformed params
# 2. Prints pretty regression table (or returns list with coefs & SEs)
## MC sim
theta <- self$pack_theta()
theta_trans <- self$transform_theta(theta)
theta_sim <- mvtnorm::rmvnorm(n = S, mean = theta, sigma = self$VC_mat)
theta_trans_sim <- t(apply(theta_sim, MARGIN = 1, FUN = self$transform_theta))
theta_trans_vc <- var(theta_trans_sim)
## Pack in dummy class and print coef test
names(theta_trans) <- self$get_theta_names()
dc <- make_dummyclass(theta_trans)
if (print_out) {
lmtest::coeftest(x = dc, vcov. = theta_trans_vc)
} else {
out <- list(coef = theta_trans, se = sqrt(diag(theta_trans_vc)))
return(out)
}
}
))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.