Nothing
# the weighted bivariate ordinal/linear model
# YR 08 March 2020 (replacing the old lav_polyserial.R routines)
#
# - polyserial (and biserial) correlations
# - bivariate ordinal/linear regression
# - using sampling weights wt
# polyserial correlation
#
# Y1 = linear
# Y2 = ordinal
lav_bvmix_cor_twostep_fit <- function(Y1, Y2, eXo = NULL, wt = NULL,
fit.y1 = NULL, fit.y2 = NULL,
Y1.name = NULL, Y2.name = NULL,
optim.method = "nlminb1", # 0.6-7
optim.scale = 1.0,
init.theta = NULL,
control = list()) {
if (is.null(fit.y1)) {
fit.y1 <- lav_uvreg_fit(y = Y1, X = eXo, wt = wt)
}
if (is.null(fit.y2)) {
fit.y2 <- lav_uvord_fit(y = Y2, X = eXo, wt = wt)
}
# create cache environment
cache <- lav_bvmix_init_cache(fit.y1 = fit.y1, fit.y2 = fit.y2, wt = wt)
# optim.method
minObjective <- lav_bvmix_min_objective
minGradient <- lav_bvmix_min_gradient
minHessian <- lav_bvmix_min_hessian
if (optim.method == "nlminb" || optim.method == "nlminb2") {
# nothing to do
} else if (optim.method == "nlminb0") {
minGradient <- minHessian <- NULL
} else if (optim.method == "nlminb1") {
minHessian <- NULL
}
# optimize
if (is.null(control$trace)) {
control$trace <- ifelse(lav_verbose(), 1, 0)
}
# init theta?
if (!is.null(init.theta)) {
start.x <- init.theta
} else {
start.x <- cache$theta
}
# try 1
optim <- nlminb(
start = start.x, objective = minObjective,
gradient = minGradient, hessian = minHessian,
control = control,
scale = optim.scale, lower = -0.995, upper = +0.995,
cache = cache
)
# try 2
if (optim$convergence != 0L) {
optim <- nlminb(
start = start.x, objective = minObjective,
gradient = NULL, hessian = NULL,
control = control,
scale = optim.scale, lower = -0.995, upper = +0.995,
cache = cache
)
}
# try 3
if (optim$convergence != 0L) {
optim <- nlminb(
start = 0, objective = minObjective,
gradient = NULL, hessian = NULL,
control = control,
scale = 10, lower = -0.995, upper = +0.995,
cache = cache
)
}
# try 4 -- new in 0.6-8
if (optim$convergence != 0L) {
optim <- optimize(
f = minObjective, interval = c(-0.995, +0.995),
cache = cache, tol = .Machine$double.eps
)
if (is.finite(optim$minimum)) {
optim$convergence <- 0L
optim$par <- optim$minimum
}
}
# check convergence
if (optim$convergence != 0L) {
if (!is.null(Y1.name) && !is.null(Y2.name)) {
lav_msg_warn(gettextf(
"estimation polyserial correlation did not converge
for variables %1$s and %2$s", Y1.name, Y2.name))
} else {
lav_msg_warn(gettext(
"estimation polyserial correlation(s) did not always converge"))
}
rho <- cache$theta # starting value
} else {
rho <- optim$par
}
rho
}
# Y1 = linear
# Y2 = ordinal
lav_bvmix_init_cache <- function(fit.y1 = NULL,
fit.y2 = NULL,
wt = NULL,
scores = FALSE,
parent = parent.frame()) {
# data
Y1 <- fit.y1$y
Y2 <- fit.y2$y
eXo <- fit.y1$X
# extract parameters
# Y1
y1.VAR <- fit.y1$theta[fit.y1$var.idx]
y1.SD <- sqrt(y1.VAR)
y1.ETA <- fit.y1$yhat
Z <- (Y1 - y1.ETA) / y1.SD
# Y2
th.y2 <- fit.y2$theta[fit.y2$th.idx]
# exo?
if (is.null(eXo)) {
nexo <- 0L
} else {
nexo <- ncol(eXo)
}
# nobs
if (is.null(wt)) {
N <- length(Y1)
} else {
N <- sum(wt)
}
# starting value -- Olsson 1982 eq 38
if (nexo > 0L) {
# exo
if (is.null(wt)) {
COR <- cor(Z, Y2, use = "pairwise.complete.obs")
SD <- sd(Y2, na.rm = TRUE) * sqrt((N - 1) / N)
} else {
tmp <- na.omit(cbind(Z, Y2, wt))
COR <- cov.wt(x = tmp[, 1:2], wt = tmp[, 3], cor = TRUE)$cor[2, 1]
SD <- sqrt(lav_matrix_var_wt(tmp[, 2], wt = tmp[, 3]))
}
rho.init <- (COR * SD / sum(dnorm(th.y2)))
} else {
# no exo
if (is.null(wt)) {
COR <- cor(Y1, Y2, use = "pairwise.complete.obs")
SD <- sd(Y2, na.rm = TRUE) * sqrt((N - 1) / N)
} else {
tmp <- na.omit(cbind(Y1, Y2, wt))
COR <- cov.wt(x = tmp[, 1:2], wt = tmp[, 3], cor = TRUE)$cor[2, 1]
SD <- sqrt(lav_matrix_var_wt(tmp[, 2], wt = tmp[, 3]))
}
rho.init <- (COR * SD / sum(dnorm(th.y2)))
}
# sanity check
if (is.na(rho.init)) {
rho.init <- 0.0
} else if (abs(rho.init) > 0.9) {
rho.init <- rho.init / 2
}
# parameter vector
theta <- rho.init # only
# different cache if scores or not
if (scores) {
out <- list2env(
list(
nexo = nexo, theta = theta, N = N,
y1.VAR = y1.VAR, eXo = eXo,
y2.Y1 = fit.y2$Y1, y2.Y2 = fit.y2$Y2,
Y1 = Y1, y1.SD = y1.SD, y1.ETA = y1.ETA, Z = Z,
fit.y2.z1 = fit.y2$z1, fit.y2.z2 = fit.y2$z2
),
parent = parent
)
} else {
out <- list2env(
list(
nexo = nexo, theta = theta, N = N,
Y1 = Y1, y1.SD = y1.SD, y1.ETA = y1.ETA, Z = Z,
fit.y2.z1 = fit.y2$z1, fit.y2.z2 = fit.y2$z2
),
parent = parent
)
}
out
}
# casewise likelihoods, unweighted!
lav_bvmix_lik_cache <- function(cache = NULL) {
with(cache, {
rho <- theta[1L]
R <- sqrt(1 - rho * rho)
# p(Y2|Y1)
tauj.star <- (fit.y2.z1 - rho * Z) / R
tauj1.star <- (fit.y2.z2 - rho * Z) / R
py2y1 <- pnorm(tauj.star) - pnorm(tauj1.star)
# TODO, check when to use 1 - pnorm()
py2y1[py2y1 < .Machine$double.eps] <- .Machine$double.eps
# p(Y1)
py1 <- dnorm(Y1, mean = y1.ETA, sd = y1.SD)
# lik
lik <- py1 * py2y1
# catch very small values
lik.toosmall.idx <- which(lik < sqrt(.Machine$double.eps))
lik[lik.toosmall.idx] <- as.numeric(NA)
return(lik)
})
}
lav_bvmix_logl_cache <- function(cache = NULL) {
with(cache, {
lik <- lav_bvmix_lik_cache(cache) # unweighted!
if (!is.null(wt)) {
logl <- sum(wt * log(lik), na.rm = TRUE)
} else {
logl <- sum(log(lik), na.rm = TRUE)
}
return(logl)
})
}
lav_bvmix_gradient_cache <- function(cache = NULL) {
with(cache, {
rho <- theta[1L]
y.Z1 <- dnorm(tauj.star)
y.Z2 <- dnorm(tauj1.star)
pyx.inv.R3 <- 1 / (py2y1 * R * R * R)
# rho
d1 <- fit.y2.z1 * rho - Z
d2 <- fit.y2.z2 * rho - Z
dx <- pyx.inv.R3 * (y.Z1 * d1 - y.Z2 * d2)
# to be consistent with (log)lik_cache
if (length(lik.toosmall.idx) > 0L) {
dx[lik.toosmall.idx] <- as.numeric(NA)
}
if (is.null(wt)) {
dx.rho <- sum(dx, na.rm = TRUE)
} else {
dx.rho <- sum(wt * dx, na.rm = TRUE)
}
return(dx.rho)
})
}
# YR 29 March 2020
# obtained by using 'Deriv' (from package Deriv) on the
# gradient function, and cleaning up
# correct, but not good enough
lav_bvmix_hessian_cache <- function(cache = NULL) {
with(cache, {
rho <- theta[1L]
R2 <- R * R
t1 <- Z - rho * tauj.star / R
t2 <- Z - rho * tauj1.star / R
tmp <- (y.Z1 * (d1 * ((3 * rho / R2) + tauj.star * t1 / R)
+ fit.y2.z1 + dx * R2 * t1)
- y.Z2 * (d2 * ((3 * rho / R2) + tauj1.star * t2 / R)
+ fit.y2.z2 + dx * R2 * t2)
)
# to be consistent with (log)lik_cache
if (length(lik.toosmall.idx) > 0L) {
tmp[lik.toosmall.idx] <- as.numeric(NA)
}
if (is.null(wt)) {
H <- sum(tmp * pyx.inv.R3, na.rm = TRUE)
} else {
H <- sum(wt * (tmp * pyx.inv.R3), na.rm = TRUE)
}
dim(H) <- c(1L, 1L) # for nlminb
return(H)
})
}
# compute total (log)likelihood, for specific 'x' (nlminb)
lav_bvmix_min_objective <- function(x, cache = NULL) {
cache$theta <- x
-1 * lav_bvmix_logl_cache(cache = cache) / cache$N
}
# compute gradient, for specific 'x' (nlminb)
lav_bvmix_min_gradient <- function(x, cache = NULL) {
# check if x has changed
if (!all(x == cache$theta)) {
cache$theta <- x
tmp <- lav_bvmix_logl_cache(cache = cache)
}
-1 * lav_bvmix_gradient_cache(cache = cache) / cache$N
}
# compute hessian, for specific 'x' (nlminb)
lav_bvmix_min_hessian <- function(x, cache = NULL) {
# check if x has changed
if (!all(x == cache$theta)) {
tmp <- lav_bvmix_logl_cache(cache = cache)
tmp <- lav_bvmix_gradient_cache(cache = cache)
}
-1 * lav_bvmix_hessian_cache(cache = cache) / cache$N
}
lav_bvmix_cor_scores_cache <- function(cache = NULL,
sigma.correction = FALSE,
na.zero = FALSE) {
with(cache, {
rho <- theta[1L]
R <- sqrt(1 - rho * rho)
tauj.star <- (fit.y2.z1 - rho * Z) / R
tauj1.star <- (fit.y2.z2 - rho * Z) / R
y.Z1 <- dnorm(tauj.star)
y.Z2 <- dnorm(tauj1.star)
# p(Y2|Y1)
py2y1 <- pnorm(tauj.star) - pnorm(tauj1.star)
py2y1[py2y1 < .Machine$double.eps] <- .Machine$double.eps
pyx.inv <- 1 / py2y1
# mu.y1
y.Z1.y.Z2 <- y.Z1 - y.Z2
dx.mu.y1 <- 1 / y1.SD * (Z + (pyx.inv * (rho / R) * y.Z1.y.Z2))
if (!is.null(wt)) {
dx.mu.y1 <- wt * dx.mu.y1
}
# var.y1
dx.var.y1 <- 1 / (2 * y1.VAR) * (((Z * Z) - 1) +
(pyx.inv * rho * Z / R) * y.Z1.y.Z2)
if (!is.null(wt)) {
dx.var.y1 <- wt * dx.var.y1
}
# th.y2
dx.th.y2 <- (y2.Y1 * y.Z1 - y2.Y2 * y.Z2) * 1 / R * pyx.inv
if (!is.null(wt)) {
dx.th.y2 <- wt * dx.th.y2
}
# sl.y1
dx.sl.y1 <- NULL
if (nexo > 0L) {
dx.sl.y1 <- dx.mu.y1 * eXo
# if(!is.null(wt)) {
# dx.mu.y1 had already been weighted
# }
}
# sl.y2
dx.sl.y2 <- NULL
if (nexo > 0L) {
dx.sl.y2 <- (y.Z2 - y.Z1) * eXo * 1 / R * pyx.inv
if (!is.null(wt)) {
dx.sl.y2 <- wt * dx.sl.y2
}
}
# rho
TAUj <- y.Z1 * (fit.y2.z1 * rho - Z)
TAUj1 <- y.Z2 * (fit.y2.z2 * rho - Z)
dx.rho <- pyx.inv * 1 / (R * R * R) * (TAUj - TAUj1)
if (!is.null(wt)) {
dx.rho <- wt * dx.rho
}
# FIXME: only tested for non_exo!
# used by pml_deriv1()
if (sigma.correction) {
dx.rho.orig <- dx.rho
dx.var.y1.orig <- dx.var.y1
# sigma
dx.rho <- dx.rho.orig / y1.SD
# var
COV <- rho * y1.SD
dx.var.y1 <- (dx.var.y1.orig -
1 / 2 * COV / y1.VAR * 1 / y1.SD * dx.rho.orig)
}
out <- list(
dx.mu.y1 = dx.mu.y1, dx.var.y1 = dx.var.y1,
dx.th.y2 = dx.th.y2,
dx.sl.y1 = dx.sl.y1, dx.sl.y2 = dx.sl.y2,
dx.rho = dx.rho
)
return(out)
})
}
# casewise scores
#
# Y1 = linear
# Y2 = ordinal
lav_bvmix_cor_scores <- function(Y1, Y2, eXo = NULL, wt = NULL,
rho = NULL,
fit.y1 = NULL, fit.y2 = NULL,
evar.y1 = NULL, beta.y1 = NULL,
th.y2 = NULL, sl.y2 = NULL,
sigma.correction = FALSE,
na.zero = FALSE) {
if (is.null(fit.y1)) {
fit.y1 <- lav_uvreg_fit(y = Y1, X = eXo, wt = wt)
}
if (is.null(fit.y2)) {
fit.y2 <- lav_uvord_fit(y = Y2, X = eXo, wt = wt)
}
# update z1/z2 if needed (used in pml_deriv1() in lav_model_gradient_pml.R)
fit.y1 <- lav_uvreg_update_fit(
fit.y = fit.y1, evar.new = evar.y1,
beta.new = beta.y1
)
fit.y2 <- lav_uvord_update_fit(
fit.y = fit.y2, th.new = th.y2,
sl.new = sl.y2
)
# create cache environment
cache <- lav_bvmix_init_cache(
fit.y1 = fit.y1, fit.y2 = fit.y2, wt = wt,
scores = TRUE
)
cache$theta <- rho
SC <- lav_bvmix_cor_scores_cache(
cache = cache,
sigma.correction = sigma.correction,
na.zero = na.zero
)
SC
}
# logl - no cache
lav_bvmix_logl <- function(Y1, Y2, eXo = NULL, wt = NULL,
rho = NULL,
fit.y1 = NULL, fit.y2 = NULL,
evar.y1 = NULL, beta.y1 = NULL,
th.y2 = NULL, sl.y2 = NULL) {
if (is.null(fit.y1)) {
fit.y1 <- lav_uvreg_fit(y = Y1, X = eXo, wt = wt)
}
if (is.null(fit.y2)) {
fit.y2 <- lav_uvord_fit(y = Y2, X = eXo, wt = wt)
}
# update z1/z2 if needed (used in pml_deriv1() in lav_model_gradient_pml.R)
fit.y1 <- lav_uvreg_update_fit(
fit.y = fit.y1, evar.new = evar.y1,
beta.new = beta.y1
)
fit.y2 <- lav_uvord_update_fit(
fit.y = fit.y2, th.new = th.y2,
sl.new = sl.y2
)
# create cache environment
cache <- lav_bvmix_init_cache(
fit.y1 = fit.y1, fit.y2 = fit.y2, wt = wt,
scores = TRUE
)
cache$theta <- rho
lav_bvmix_logl_cache(cache = cache)
}
# lik - no cache
lav_bvmix_lik <- function(Y1, Y2, eXo = NULL, wt = NULL,
rho = NULL,
fit.y1 = NULL, fit.y2 = NULL,
evar.y1 = NULL, beta.y1 = NULL,
th.y2 = NULL, sl.y2 = NULL,
.log = FALSE) {
if (is.null(fit.y1)) {
fit.y1 <- lav_uvreg_fit(y = Y1, X = eXo, wt = wt)
}
if (is.null(fit.y2)) {
fit.y2 <- lav_uvord_fit(y = Y2, X = eXo, wt = wt)
}
# update z1/z2 if needed (used in pml_deriv1() in lav_model_gradient_pml.R)
fit.y1 <- lav_uvreg_update_fit(
fit.y = fit.y1, evar.new = evar.y1,
beta.new = beta.y1
)
fit.y2 <- lav_uvord_update_fit(
fit.y = fit.y2, th.new = th.y2,
sl.new = sl.y2
)
# create cache environment
cache <- lav_bvmix_init_cache(
fit.y1 = fit.y1, fit.y2 = fit.y2, wt = wt,
scores = TRUE
)
cache$theta <- rho
lik <- lav_bvmix_lik_cache(cache = cache) # unweighted
if (.log) {
lik <- log(lik)
}
if (!is.null(wt)) {
if (.log) {
lik <- wt * lik
} else {
tmp <- wt * log(lik)
lik <- exp(tmp)
}
}
lik
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.