Nothing
# source.R common functions among three
# distributions
gammas_init <- function(dat, num_Z = 0, Z_names = NULL) {
if (sum(dat$Mobs == 0) == 0) {
out <- c(-Inf, rep(0, num_Z + 1))
} else {
dat$ind_Del <- (dat$Mobs == 0) * 1
fm <- as.formula(paste0("ind_Del~", paste0(c("X",
Z_names), collapse = "+"), collapse = ""))
logitmodel <- summary(glm(fm, family = "binomial",
data = dat))
out <- logitmodel$coefficients[, "Estimate"]
}
return(out)
}
trans <- function(dat, theta, K, xval, ...) {
UseMethod("trans")
}
ComputeInit <- function(dat, K, ...) {
UseMethod("ComputeInit")
}
# negative expectation of log-likelihood
# function with respect to conditional
# distribution of 1(Ci = k) given data and
# current estimates for group 1: -Q1
negQ_G1 <- function(dat, theta, K, ...) {
UseMethod("negQ_G1")
}
G1_init <- function(dat, init, K, num_Z, Z_names,
XMint, initials_for_full = F) {
gammas <- gammas_init(dat, num_Z, Z_names)
# from G1 initials to
if (initials_for_full) {
# to set initials for parameters in G2
# for full analysis
initials <- c(init[1:2], 0, init[3], rep(0,
XMint[1] * 1), init[4:length(init)],
gammas, 0.01)
} else {
# to set parameters not estimating in
# G1 at fixed values
initials <- c(init[1:2], 0, init[3], rep(0,
ifelse(XMint[2], 1, 2)), init[4:length(init)],
gammas, 0.01)
}
# zilonm, with beta5 initials <-
# c(init[1:2], 0, init[3], 0, init[4],
# init[4 + 1:(num_Z + (2 + num_Z) * K + 2 +
# K - 1)], gammas, 0.01)
# zinbm, no beta5 initials <- c(init[1:2],
# 0, init[3], 0, init[3 + 1:(num_Z + (2 +
# num_Z) * K + 2 + K - 1)], gammas, 0.01)
# zipm, no beta5 initials <- c(init[1:2],
# 0, init[3], 0, init[3 + 1:(num_Z + (2 +
# num_Z) * K + 1 + K - 1)], gammas, 0.01)
return(initials)
}
# fix parameter not estimating in group 1
# analysis
negQ2_G1 <- function(dat, init, K, num_Z, Z_names,
XMint, B = NULL, tauG1 = NULL, calculate_tau = F,
calculate_ll = F) {
initials <- G1_init(dat, init, K, num_Z, Z_names,
XMint)
out <- negQ_G1(dat, theta = initials, K, num_Z,
Z_names, B, tauG1, calculate_tau, calculate_ll)
return(out)
}
compare_mu <- function(dat, init2, group, K, num_Z,
Z_names, XMint, B = NULL) {
if (group == 1) {
initials <- G1_init(dat, init2, K, num_Z,
Z_names, XMint)
s <- 3 + XMint[2] + num_Z
dat2 <- dat[which(dat$Mobs > 0), ]
} else {
initials <- init2
s <- 4 + XMint[1] + XMint[2] + num_Z
dat2 <- dat
}
if (num_Z == 0) {
zval <- NULL
} else {
zval <- colMeans(dat2[, Z_names, drop = F])
}
theta_trans <- trans(dat, initials, K, xval = mean(dat2$X),
num_Z, zval)
if ("zipm" %in% class(dat)) {
mean_name <- "lambda_ik"
n_M2nd_param <- 0
} else {
mean_name <- "mu_ik"
n_M2nd_param <- 1
}
ord <- order(theta_trans[[mean_name]])
if (sum(ord != 1:K) > 0) {
init2[s + 1 + 1:((2 + num_Z) * K)] <- c(matrix(theta_trans[["alpha_k"]],
ncol = K)[, ord])
init2[s + 1 + (2 + num_Z) * K + n_M2nd_param +
1:(K - 1)] <- theta_trans[["psi_k"]][ord][-K]
}
if (group == 1) {
tau2 <- negQ2_G1(dat, init2, K, num_Z, Z_names,
XMint, calculate_tau = T)
} else {
tau2 <- negQ(dat, init2, K, num_Z, Z_names,
XMint, B, calculate_tau = T)
}
return(list(init2 = init2, tau2 = tau2))
}
bounds <- function(dat, K, group, num_Z, XMint) {
UseMethod("bounds")
}
ComputeInit2 <- function(dat, K, B, ...) {
UseMethod("ComputeInit2")
}
# negative expectation of log-likelihood
# function with respect to conditional
# distribution of 1(Ci = k) given data and
# current estimates for group 2: -Q2
negQ_G2 <- function(dat, theta, K, ...) {
UseMethod("negQ_G2")
}
# fix parameter not estimating in full analysis
# (group12)
G12_init <- function(theta, XMint) {
theta <- c(theta[1:4], rep(0, !XMint[1]), theta[5:length(theta)])
theta <- c(theta[1:5], rep(0, !XMint[2]), theta[6:length(theta)])
return(theta)
}
# Q for all
negQ <- function(dat, theta, K, num_Z, Z_names, XMint,
B, tau = NULL, calculate_tau = F, calculate_ll = F) {
if (sum(dat$Mobs == 0) > 0) {
# fix parameter not estimating in full
# analysis (group12)
theta <- G12_init(theta, XMint)
if (!calculate_tau) {
if (!calculate_ll) {
out <- negQ_G1(dat, theta, K, num_Z,
Z_names, B, tauG1 = tau$tauG1,
calculate_tau, calculate_ll) +
negQ_G2(dat, theta, K, num_Z, Z_names,
B, tauG2 = tau$tauG2, calculate_tau,
calculate_ll)
} else {
out <- negQ_G1(dat, theta, K, num_Z,
Z_names, B, tauG1 = NULL, calculate_tau,
calculate_ll) + negQ_G2(dat, theta,
K, num_Z, Z_names, B, tauG2 = NULL,
calculate_tau, calculate_ll)
}
# print(theta);print(out)
} else {
out <- list(tauG1 = negQ_G1(dat, theta,
K, num_Z, Z_names, B = NULL, tauG1 = NULL,
calculate_tau, calculate_ll), tauG2 = negQ_G2(dat,
theta, K, num_Z, Z_names, B, tauG2 = NULL,
calculate_tau, calculate_ll))
}
# if (is.nan(out)) {
# print(theta);print(out) }
} else {
if (!calculate_tau) {
if (!calculate_ll) {
out <- negQ2_G1(dat, theta, K, num_Z,
Z_names, XMint, B, tauG1 = tau$tauG1,
calculate_tau, calculate_ll)
} else {
out <- negQ2_G1(dat, theta, K, num_Z,
Z_names, XMint, B, tauG1 = NULL,
calculate_tau, calculate_ll)
}
# print(theta);print(out)
} else {
out <- list(tauG1 = negQ2_G1(dat, theta,
K, num_Z, Z_names, XMint, B = NULL,
tauG1 = NULL, calculate_tau, calculate_ll))
}
}
return(out)
}
# for calculating observed I (EM approximation)
g <- function(dat, x, y, K, num_Z, Z_names, XMint,
B) {
tau_f <- negQ(dat, y, K, num_Z, Z_names, XMint,
B, calculate_tau = T)
out <- negQ(dat, x, K, num_Z, Z_names, XMint,
B, tau_f)
return(out)
}
effects <- function(dat, theta, x1, x2, K, ...) {
UseMethod("effects")
}
parameter_names <- function(dat, K, num_Z, Z_names,
XMint) {
variation <- if ("zipm" %in% class(dat))
NULL else ifelse("zilonm" %in% class(dat), "xi0",
"r")
out <- c(paste0("beta", c(0:5, Z_names)), "delta",
paste0("alpha", rep(1:K, each = 2 + num_Z),
rep(c(0:1, Z_names), K)), variation,
paste0("psi", 1:K)[-K], paste0("gamma", c(0:1,
Z_names)), "eta")
if (sum(XMint) != 2) {
out <- out[-c(5, 6)[!XMint]]
}
return(out)
}
G1_index <- function(dat, K, num_Z, XMint) {
c(1:2, 4, (4 + XMint[1] + XMint[2]) * XMint[2],
4 + XMint[1] + XMint[2] + 1:(num_Z + 1 +
(2 + num_Z) * K + ifelse("zipm" %in%
class(dat), 0, 1) + (K - 1)))
}
analysis <- function(dat, K, num_Z, Z_names, XMint,
B, limits, seed) {
cinit <- init2 <- ComputeInit2(dat, K, num_Z,
Z_names, XMint, B, limits, explicit = T)
# cinit <- init2 <- true2
if (sum(dat$Mobs == 0) == 0) {
index <- G1_index(dat, K, num_Z, XMint)
cinit <- init2 <- init2[index]
tau2 <- negQ(dat, init2, K, num_Z, Z_names,
XMint, B, calculate_tau = T)
p <- length(init2)
countEM <- NA
} else {
tau2 <- negQ(dat, init2, K, num_Z, Z_names,
XMint, B, calculate_tau = T)
p <- length(init2)
init <- rep(0, p)
countEM <- 0
bd <- bounds(dat, K, group = 2, num_Z, XMint)
# M-step
while (euclidean_dist(init, init2) > limits) {
init <- init2
tau <- tau2
# m2 <-
# nlminb(init,function(x)negQ(dat,x,K,num_Z,Z_names,
# XMint,B,tau), lower =
# c(rep(-1000,6 + 2*K),rep(0,
# K-1),-1000,-1000,1e-6,1e-6),
# upper = c(rep(1000,6 +
# 2*K),rep(1, K-1),rep(1000,4)),
# control =
# list(eval.max=5000,iter.max=5000))
m <- constrOptim(init, function(x) negQ(dat,
x, K, num_Z, Z_names, XMint, B, tau),
grad = NULL, ui = bd$ui, ci = bd$ci,
outer.iterations = 500, control = list(maxit = 50000))
# negQ(dat,true,K,num_Z,Z_names,
# XMint,B,negQ(dat,true,K,num_Z,Z_names,
# XMint,B,calculate_tau=T))
if (m$convergence != 0) {
print(paste0("seed=", seed, ", countEM=",
countEM, ", convergence=", m$convergence))
print(m)
init2 <- NA
break
} else {
init2 <- m$par
}
switch <- compare_mu(dat, init2, group = 2,
K, num_Z, Z_names, XMint, B)
init2 <- switch$init2
tau2 <- switch$tau2
countEM <- countEM + 1
# print(countEM)
}
}
est <- init2
n <- dim(dat)[1]
negll <- negQ(dat, est, K, num_Z, Z_names, XMint,
B, calculate_ll = T)
AIC <- 2 * negll + 2 * p
BIC <- 2 * negll + p * log(n)
out <- list(init = cinit, est = est, countEM = countEM,
AIC = AIC, BIC = BIC, tau2 = tau2)
return(out)
}
analysis2 <- function(dat, K, B, est, tau2, x1, x2,
num_Z, Z_names, XMint, zval, mval) {
# method 1: hessian from log-likelihood
# function, parameter estimation cannot do
# this
hess <- pracma::hessian(function(x) negQ(dat,
x, K, num_Z, Z_names, XMint, B, calculate_ll = T),
x = est)
vcovar <- solve(hess)
var <- diag(vcovar)
# if (sum(var < 0) > 0) { hess <-
# numDeriv::hessian(function(x) negQ(dat,
# x, K, num_Z, Z_names, XMint, B,
# calculate_ll = T), x = est) vcovar <-
# solve(hess) var <- diag(vcovar)
if (sum(var < 0) > 0) {
# method 2: hessian matrix from EM
# approximation
h.1 <- pracma::hessian(function(x) negQ(dat,
x, K, num_Z, Z_names, XMint, B, tau2),
x = est)
# grad() function in numDeriv
g2 <- function(y) {
numDeriv::grad(function(x) g(dat, x,
y, K, num_Z, Z_names, XMint, B),
x = est)
}
# jacobian() function in numDeriv
h.2 <- numDeriv::jacobian(g2, est)
hess <- h.1 + h.2
vcovar <- solve(hess)
var <- diag(vcovar)
if (sum(var < 0) > 0) {
print("All methods var < 0.")
}
}
# }
se <- sqrt(var)
# mediation effects
Group1 <- sum(dat$Mobs == 0) == 0
MedZIM <- effects(dat, theta = est, x1, x2, K,
num_Z, zval, mval, XMint, calculate_se = T,
vcovar, Group1)
out <- list(se = se, MedZIM = MedZIM)
return(out)
}
realanalysis <- function(dat, distM_sequence, K_sequence,
selection, XMint, x1, x2, zval, num_Z, Z_names,
mval, limits, B, seed, ncore) {
set.seed(seed)
iter <- data.frame(distM = rep(distM_sequence,
each = length(K_sequence)), K = rep(K_sequence,
length(distM_sequence)))
niter <- nrow(iter)
# t0 <- Sys.time()
cl <- parallel::makeCluster(ncore, outfile = "")
fun <- c("analysis", "bounds", "compare_mu",
"ComputeInit", "ComputeInit2", "gammas_init",
"euclidean_dist", "expit", "k_to_ik", "loghicpp_all",
"logbinom", "loghik_zinbm", "loghik_zipm",
"negQ", "negQ_G1", "negQ_G2", "negQ2_G1",
"trans")
parallel::clusterExport(cl = cl, varlist = c("nodes",
"weights", fun), envir = parent.env(environment()))
doParallel::registerDoParallel(cl)
parallel::clusterSetRNGStream(cl = cl, seed)
i <- numeric()
models <- foreach(i = seq_len(niter), .multicombine = T,
.packages = c("stats", "flexmix", "MASS"),
.combine = "list", .errorhandling = "pass") %dopar%
{
dat2 <- dat
class(dat2) <- c(iter$distM[i], class(dat2))
model_i <- tryCatch({
analysis(dat2, K = iter$K[i], num_Z,
Z_names, XMint, B, limits, seed)
}, error = function(e) {
# print(e)
list(init = NA, est = NA, countEM = NA,
AIC = Inf, BIC = Inf, tau2 = NA,
e = e)
})
# model_i <- analysis(dat2, K =
# iter$K[i], num_Z, Z_names, XMint,
# B, limits, seed)
print(paste0("distM = ", iter$distM[i],
", K = ", iter$K[i], " completed."))
return(model_i)
}
parallel::stopCluster(cl)
if (niter == 1) {
models <- list(models)
}
names(models) <- apply(iter, 1, function(t) paste0(t,
collapse = "_K"))
# print(models)
AICs <- sapply(models, function(x) x[["AIC"]])
BICs <- sapply(models, function(x) x[["BIC"]])
selection_criteria <- sapply(models, function(x) x[[selection]])
selected_model <- models[[which.min(selection_criteria)]]
selected_model_name <- names(models)[[which.min(selection_criteria)]]
selected_dist <- strsplit(selected_model_name,
"_K")[[1]][1]
selected_K <- as.numeric(strsplit(selected_model_name,
"_K")[[1]][2])
KBIC <- K_sequence[which.min(BICs)]
KAIC <- K_sequence[which.min(AICs)]
class(dat) <- c(selected_dist, class(dat))
# t1 <- Sys.time()
analysis2_out <- tryCatch({
analysis2(dat, K = selected_K, B, est = selected_model$est,
tau2 = selected_model$tau2, x1, x2, num_Z,
Z_names, XMint, zval, mval)
}, error = function(e) {
print(e)
list(se = NA, MedZIM = data.frame(eff = rep(NA,
3), eff_se = rep(NA, 3)), e = e)
})
# t2 <- Sys.time() print(paste0('EM = ',
# format(difftime(t1, t0)), ', hessian = ',
# format(difftime(t2, t1))))
results_parameters <- results(est = selected_model$est,
se = analysis2_out$se, init = selected_model$init)
paranames <- parameter_names(dat, K = selected_K,
num_Z, Z_names, XMint)
if (sum(dat$Mobs == 0) == 0) {
index <- G1_index(dat, K = selected_K, num_Z,
XMint)
paranames <- paranames[index]
}
rownames(results_parameters) <- paranames
# mediation and direct effects
out_MedZIM <- analysis2_out$MedZIM
results_effects <- results(out_MedZIM$eff, out_MedZIM$eff_se)
rownames(results_effects) <- c("NIE1", "NIE2",
"NIE", "NDE", "CDE")
out <- list(results_effects = results_effects,
results_parameters = results_parameters,
selected_model_name = selected_model_name,
BIC = BICs[which.min(selection_criteria)],
AIC = AICs[which.min(selection_criteria)],
models = models, analysis2_out = analysis2_out)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.