#' Fit monotone-constrained polynomial LMM with an EM algorithm
#'
#' EXPIREMENTAL: Fit polynomial constrained mixed effect model with monotonic mean and (potentially) monotonic random effects (\eqn{r <= 2} for monotone random effects).
#' See \code{\link{constrained_lmm_mcem}} for (\eqn{r > 2}).
#' For use with block-random effects (non-crossed), e.g. subject-specific random effects.
#'
#' The interface and default actions of this function are under development and may change without warning. See the mixed effects vignette for usage details.
#'
#' @param model model specification made by \code{\link{make_em_model_specs}}.
#' @param maxit maximum number of iterations.
#' @param tol relative tolerance for convergence of parameter values.
#' @param start start values for parameters. Defaults to NULL.
#' @param verbose output current iteration and quasi-log-likelihood value.
#' @param save_steps should each em step be saved and return in a list?
#' @param check_up should the algorithm check if the quasi-log-likelihood is improving each iteration?
#' @keywords internal
#' @return list including estimated mixed effects model.
constrained_lmm_em <- function(model, maxit = 200, tol, start = NULL, verbose = F, save_steps = F, check_up = F){
if(missing(model)) stop("model argument must be specified. Use gcreg:::make_em_model_specs().")
# Algorithm constants, functions
# create orthonormal polynomial basis
pl_base <- make_disc_orthonormal_basis(x = model$dat$x, deg = model$specs$p - 1)
# gradient of basis
Dpl_base <- deriv(pl_base)
# converter matrices between bases for mean and random effects
mean_pl_cv <- gen_poly_basis_converters(pl_base)
grp_pl_cv <- gen_poly_basis_converters(pl_base[1:model$specs$r])
# oracle function
monotone_oracle <- function(b) is_monotone(p = mean_pl_cv$to_mono(b), region = model$mcontr_region)
# Orthonormal X matrix from basis and x - mean curve
Xo <- poly_basis_design_matrix(poly_basis = pl_base, x = model$dat$x)
# observation matrix
Y <- model$dat$y
# Orthonormal Z matrices - group curve
Zo_i <- list()
for(gr in 1:model$specs$g){
Zo_i[[gr]] <- Xo[model$dat$grp == model$group_ids[gr], 1:model$specs$r, drop=F]
}
Zo <- bdiag(Zo_i)
# Model calculated constants
Xo_t_Y <- crossprod(Xo, Y) # unconstrained solution for fixed effect regression
Xo_t_Zo <- crossprod(Xo, Zo)
Zo_t_Xo <- t(Xo_t_Zo)
Zo_t_Y <- crossprod(Zo, Y)
Zo_t_Zo <- crossprod(Zo)
#### Functions ####
#### E-step functions ####
E_step <- function(em_list){
.em <- em_list
## last iterates' variance matrices
R <- bdiag(rep(list(.em$sig2),times = model$specs$N))
H <- make_L_mat(.em$omeg) %>% tcrossprod()
G <- bdiag(rep(list(H),times = model$specs$g))
## unconstrained mean/variance matrices of random effects
M_U_uc <- mean_re_unconstrained(gam = .em$gam, G = G, R = R)
V_U_uc <- var_re_unconstrained(G = G, R = R)
if(with(model$specs, r == 1 | !constr_r)){
.em$re_mean <- M_U_uc
.em$re_var <- V_U_uc
} else if(model$specs$r == 2){ # constrained and r==2
## constrained mean/variance matrices of random effects
# Also refered to as -c(beta). Calculates linear distance from monotonic boundary for r = 2 to be used in truncation of R.E.
random_slope_trunc <- - linear_dist_to_monotone_boundary_em(gam = .em$gam)$dist
# truncation vector for random intercept and slope
lower_trunc <- rep(c(-Inf, random_slope_trunc), times = model$specs$g)
# mean and variance from mtmvnorm (wrapper function)
trunc_MV <- mean_var_re_constrained2(mean_uncon = as.vector(M_U_uc), var_uncon = V_U_uc, lower_trunc = lower_trunc, em_list = .em)
# constrained mean and variance matrices
.em$re_mean <- trunc_MV$tmean
.em$re_var <- trunc_MV$tvar
} else if(model$specs$r == 3){
stop("r = 3 and constrained random effects not yet implemented")
} else {
stop("r > 3 and constrained random effects not implemented")
}
return(.em)
}
# helper functions - E step
# make lower diagonal matrix from omega coefficients of lower-diagonal log-Cholesky decomposition
make_L_mat <- function(omeg){
L <- matrix(nrow = model$specs$r, ncol = model$specs$r)
diag(L) <- exp(omeg[1:model$specs$r])
L[upper.tri(L)] <- omeg[(model$specs$r+1):length(omeg)] # populates along row...
L[lower.tri(L)] <- 0
return(L)
}
mean_re_unconstrained <- function(gam, G, R){
G_Zo_t <- tcrossprod(G, Zo)
G_Zo_t %*% solve(Zo %*% G_Zo_t + R, Y - Xo %*% gam)
}
var_re_unconstrained <- function(G, R){
G_Zo_t <- tcrossprod(G, Zo)
G - G_Zo_t %*% solve(Zo %*% G_Zo_t + R, t(G_Zo_t))
}
mean_var_re_constrained2 <- function(mean_uncon, var_uncon, lower_trunc, em_list){
# store components of bdaig matrix
trunc_moments_list <- list()
# find any truncated moments that calculated badly.
bad_r <- NULL
for(i in 1:model$specs$g){
elems <- (2 * i - 1):(2 * i) # elements to access
trunc_moments <- mtmvnorm(mean = mean_uncon[elems],
sigma = var_uncon[elems,elems],
lower = lower_trunc[elems],
doComputeVariance = T,
pmvnorm.algorithm = TVPACK() # can try different algorithms
)
# check if returned NaN, use previous mean/var instead
if(any(is.nan(trunc_moments$tmean)) | any(is.nan(trunc_moments$tvar))){
if(is.null(em_list$re_mean) & !is.null(em_list$re_var)){
trunc_moments_list[[i]] <- with(em_list, list(tmean = re_mean[elems], tvar = re_var[elems, elems]))
} else {
trunc_moments_list[[i]] <- list(tmean = mean_uncon[elems], tvar = var_uncon[elems, elems])
}
bad_r <- c(bad_r,i)
} else {
trunc_moments_list[[i]] <- trunc_moments
}
}
if(!is.null(bad_r)){
warning("moments of random effects for subject",paste(bad_r, collapse = ", "),
" unable to be truncated, using previous moments this iteration instead",
immediate. = T)
}
return(
list(tmean = lapply(trunc_moments_list, function(x){x$tmean}) %>% c(recursive = T) %>% matrix(ncol = 1),
tvar = lapply(trunc_moments_list, function(x){x$tvar}) %>% bdiag()
)
)
}
# returns in terms of orthonormal basis
linear_dist_to_monotone_boundary_em <- function(gam, EPS = 1e-06){
# polynomial in monomial basis
bet <- polynomial(mean_pl_cv$to_mono(gam))
#check if monotone
is_mono <- is_monotone(bet, region = model$mcontr_region)
if(!is_mono & any(is.infinite(model$mcontr_region))) stop("linear_dist_to_monotone_boundary_em() doesn't deal with infinite support and non-monotone polynomials")
# This effects the optimisation for aim_gamma
#calculate first, second derivative + roots
Dpl <- deriv(bet)
DDpl <-deriv(Dpl)
DDpl_roots <- polyroot(DDpl)
# getting real roots only
re_roots <- Re(DDpl_roots)
im_roots <- Im(DDpl_roots)
DDpl_re_roots <- re_roots[abs(im_roots) < EPS]
DDpl_relevant_roots <- DDpl_re_roots[min(model$mcontr_region) <= DDpl_re_roots & DDpl_re_roots <= max(model$mcontr_region)]
crit_v <- c(model$mcontr_region[is.finite(model$mcontr_region)], DDpl_relevant_roots)
# find minimum
DDpl_crit_vals <- predict(Dpl, newdata = crit_v)
which_min_Dpl <- which.min(DDpl_crit_vals)
location_x <- crit_v[which_min_Dpl]
# in monomial basis
linear_dist <- DDpl_crit_vals[which_min_Dpl]
# # in desired basis
# linear_dist_o <- grp_pl_cv$to_ortho(c(0,linear_dist))[2]
return(list(dist = linear_dist, loc = location_x))
}
####
#### M-step functions ####
M_step <- function(em_list, step_size = 0.1){ #could remove step size
.em <- em_list
par_st <- gamma_init
aim_gamma <- optim(par = em_list$gam,
fn = ql_hood_gamma_wrapper,
gr = calc_Dql_gamma_wrapper,
em_list = em_list,
method = "BFGS",
control = list(maxit = 10)
)$par
.em$gam <- optim_cols(par = em_list$gam,
Y = Y - Zo %*% em_list$re_mean,
X = Xo,
oracle_fun = monotone_oracle,
aim_gamma = aim_gamma
)
# avoid getting stuck on boundary
.em <- bounce_monotone_em(em_list = .em, aim_gam = aim_gamma, maxit_bounces = 4, maxit_bumps = 10, tol = 1e-02)
# errors
new_resid <- Y - Xo %*% .em$gam - Zo %*% .em$re_mean
# residual sum of squares (accounting for mean of random effects)
new_RSS <- (crossprod(new_resid))[1,1]
# Optimise sigma R
.em$sig2 <- (trc(Zo_t_Zo %*% .em$re_var) + new_RSS) / model$specs$N
# Optimise omegas
if(model$specs$r == 1){
.em$omeg <- (trc(.em$re_var) + crossprod(.em$re_mean))[1,1] / model$specs$g
} else {
opt_omegas <- optim(par = .em$omeg,
fn = ql_hood_omega_wrapper,
gr = calc_Dql_omega_wrapper,
em_list = .em,
method = "L-BFGS-B",
lower = with(model$specs, c(rep(-10, times = r), rep(-20, times = (r - 1) * r / 2))),
upper = with(model$specs, c(rep( 10, times = r), rep( 20, times = (r - 1) * r / 2)))
# lower/upper bounds stop numerical difficulty with solve(L). Should generically decide on these based on problem size.
)
.em$omeg <- opt_omegas$par
}
return(.em)
}
# helper functions - M step
# quasi-likelihood derivative wrt gamma
calc_Dql_gamma <- function(em_list){
Drss <- with(em_list, - 2 * (Xo_t_Y - gam - Xo_t_Zo %*% re_mean) / sig2)
if(with(model$specs, r == 2 & constr_r)){
c_beta <- linear_dist_to_monotone_boundary_em(gam = em_list$gam) # boundary for slope RE
Dc_beta <- calc_Dc_beta(gam = em_list$gam, min_x = c_beta$loc)
var_random_slope <- exp(em_list$omeg[2])
z_scr <- - c_beta$dist[1] / var_random_slope
Dpenalty <- 2 * model$specs$g * dnorm(z_scr) * Dc_beta / (var_random_slope * (1 - pnorm(z_scr)))
} else {
c_beta <- 0
Dc_beta <- 0
Dpenalty <- 0
if(with(model$specs, r > 2 & constr_r)) warning("Derivative of quasi-likelihood not defined for constrained random effects and r > 2 ")
}
return(list(drv = matrix(Drss + Dpenalty),
Drss = matrix(Drss),
Dpenalty = matrix(Dpenalty),
c_beta = c_beta,
Dc_beta = Dc_beta
)
)
}
calc_Dc_beta <- function(gam, min_x){ # using evelope theorem
sapply(Dpl_base, predict, newdata = min_x)
}
calc_Dql_gamma_wrapper <- function(gam, em_list){
.em <- em_list
.em$gam <- gam
calc_Dql_gamma(em_list = .em)$drv
}
trc <- function(x){
sum(diag(x))
}
ql_hood <- function(em_list){
# residual sums of squares for gamma, after taking expectation
RSS <- with(em_list, (crossprod(Y - Xo %*% gam - Zo %*% re_mean)[1,1] + trc(Zo %*% re_var %*% t(Zo))) / sig2)
log_det_R <- model$specs$N * log(em_list$sig2)
L <- make_L_mat(em_list$omeg)
if(class(try(solve(L),silent=T)) != "matrix") return(1e08)
L_inv <- solve(L)
G_L_inv <- bdiag(rep(list(L_inv), times = model$specs$g))
G_L_inv_re_mean <- G_L_inv %*% em_list$re_mean
# sum of squares for random effects, after taking expectation
SSRE <- (crossprod(G_L_inv_re_mean) + trc(crossprod(G_L_inv) %*% em_list$re_var))[1,1]
log_det_L <- model$specs$g * log(det(L))
# only works for r = 2
if(with(model$specs, r == 2 & constr_r)){
c_beta <- linear_dist_to_monotone_boundary_em(gam = em_list$gam) #boundary for re slope
penalty <- model$specs$g * log(1 - pnorm(- c_beta$dist / exp(em_list$omeg[2]))) # penalty term # lower.tail = FALSE if monotonically decreasing
} else {
penalty <- 0
}
RSS + SSRE + log_det_R + 2 * (log_det_L + penalty) # deviance scale
}
ql_hood_omega_wrapper <- function(omeg, em_list){
.em <- em_list
.em$omeg <- omeg
ql_hood(em_list = .em)
}
ql_hood_gamma_wrapper <- function(gam, em_list){
.em <- em_list
.em$gam <- gam
ql_hood(em_list = .em)
}
calc_Dql_omega <- function(em_list){
L <- make_L_mat(em_list$omeg)
if(class(try(solve(L),silent=T)) != "matrix") return(rep(1e08, times = length(em_list$omeg)))
L_inv <- solve(L)
G_L_inv <- bdiag(rep(list(L_inv), times = model$specs$g))
H_inv <- crossprod(L_inv)
G_inv <- bdiag(rep(list(H_inv), times = model$specs$g))
DL_omega_diag_elements <- list()
for(i in 1:model$specs$r){
DL_omega_diag_elements[[i]] <- matrix(0, ncol = model$specs$r, nrow = model$specs$r)
DL_omega_diag_elements[[i]][i,i] <- L[i,i]
}
DL_omega_upper_elements <- list()
for(i in 1:(model$specs$r-1)){
for(j in (i+1):model$specs$r){
dL <- matrix(0, nrow = model$specs$r, ncol = model$specs$r)
dL[i,j] <- 1
DL_omega_upper_elements[[length(DL_omega_upper_elements)+1]] <- dL
}
}
DG_L_omega <- c(DL_omega_diag_elements, DL_omega_upper_elements) %>% lapply(FUN = function(dL){
bdiag(rep(list(dL),times = model$specs$g))})
Dql_omega_d <- sapply(DG_L_omega, FUN = function(dGL){
var_temp <- G_inv %*% dGL %*% G_L_inv
dlomega <- 2 * (trc(G_L_inv %*% dGL) - trc(var_temp %*% em_list$re_var) - t(em_list$re_mean) %*% var_temp %*% em_list$re_mean)
dlomega[1,1]
})
# penalty term derivative
if(with(model$specs, r == 2 & constr_r)){
c_beta <- linear_dist_to_monotone_boundary_em(gam = em_list$gam) # boundary for slope RE
var_random_slope <- exp(em_list$omeg[2])
z_scr <- - c_beta$dist[1] / var_random_slope
Dpenalty <- - 2 * model$specs$g * dnorm(z_scr) * c_beta$dist[1] / ((var_random_slope) * (1 - pnorm(z_scr)))
Dql_omega_d[2] <- Dql_omega_d[2] + Dpenalty
}
return(matrix(Dql_omega_d, ncol = 1))
}
calc_Dql_omega_wrapper <- function(omeg, em_list){
.em <- em_list
.em$omeg <- omeg
calc_Dql_omega(em_list = .em)
}
bounce_monotone_em <- function(em_list, aim_gam, maxit_bounces = 2, maxit_bumps = 5, tol = 1e-02){
Dpl_base <- deriv(pl_base)
old_ql_hood <- -Inf
new_ql_hood <- 0
cur_em <- em_list
bb <- 1
#while(abs(new_ql_hood - old_ql_hood) > tol & bb <= maxit_bounces){
while(new_ql_hood > old_ql_hood & bb <= maxit_bounces){
old_ql_hood <- new_ql_hood <- ql_hood(cur_em)
loc <- linear_dist_to_monotone_boundary_em(gam = cur_em$gam)$loc
bump_dir <- sapply(Dpl_base, predict, newdata = loc)
bump_norm <- bump_dir / sqrt(sum(bump_dir^2))
max_bump <- bump_norm
new_start <- cur_em$gam + max_bump
while(!monotone_oracle(new_start)){
max_bump <- max_bump / 2
new_start <- cur_em$gam + max_bump
new_start_neg <- cur_em$gam - max_bump
if(monotone_oracle(new_start_neg) & !monotone_oracle(new_start)){
new_start <- new_start_neg
max_bump <- - max_bump
}
}
bump <- max_bump / 2
for(kk in 1:maxit_bumps){
temp_em <- cur_em
temp_em$gam <- optim_cols(par = new_start, aim_gamma = aim_gam, Y = Y, X = Xo, oracle_fun = monotone_oracle)
temp_ql_hood <- ql_hood(temp_em)
if(temp_ql_hood < old_ql_hood) {
cur_em <- temp_em
new_ql_hood <- temp_ql_hood
break
}
bump <- bump / (2^(kk))
new_start <- cur_em$gam + bump
}
bb <- bb + 1
}
#cat(cur_em$gam,"\n")
return(cur_em)
}
####
# Algorithm storage, initialisation
if(save_steps) em <- list()
# Some choice for initial par in monomial basis
if(is.null(start$beta)){
fe_fit <- cpm(formula = y ~ x, data = model$dat, degree = model$specs$p - 1, constraint = "monotone", c_region = model$mcontr_region)
beta_init <- coef(fe_fit)
} else {
beta_init <- start$beta
}
if(!is_monotone(beta_init, region = model$mcontr_region)) stop("Initial parameter chosen is not monotone over required region")
gamma_init <- mean_pl_cv$to_ortho(beta_init)
# gam = estimate mean polynomial in orthonormal basis
# sig2 = variance of the error
# omeg = paramters for log-Cholesky decomposition of the random effects var-covar matrix
if(is.null(start$sig2) | length(start$sig2) != 1){
sig2_init <- 0.5
} else {
sig2_init <- start$sig2
}
omeg_length <- with(model$specs, r*(r+1)/2)
if(is.null(start$omeg) | length(start$omeg) != omeg_length){
omeg_init <- rep(0.5, times = omeg_length)
} else {
omeg_init <- start$omeg
}
prev_em <- list(gam = gamma_init,
sig2 = sig2_init,
omeg = omeg_init
)
if(save_steps) {
em[[1]] <- prev_em
} else {
em <- F
}
# EM Algorithm Components
param_convg_check <- function(em1, em2, tol){
nms <- c("gam","sig2","omeg")
for(n in nms){
if(!all(abs((em2[[n]] - em1[[n]])/(em1[[n]] + 0.01)) < tol)) return(F)
}
return(T)
}
# Run EM Algorithm
# start vals
convg <- F
convg_status <- paste("did not converge in",maxit, "iterations")
which_min <- 0
i <- 1
ql_hood_vals <- numeric(length = maxit)
ql_hood_vals[1] <- prev_em %>% E_step() %>% ql_hood()
if(verbose) cat("it:", 1, "ql:", ql_hood_vals[1], "\n")
while(!convg & i < maxit){
new_em <- prev_em %>% E_step() %>% M_step()
ql_hood_vals[i+1] <- new_em %>% ql_hood()
# check monotonicity (keeping for testing)
if(!monotone_oracle(new_em$gam)) stop("Gamma not monotone: it = ", i + 1)
# check convergence
if(param_convg_check(prev_em, new_em, tol = tol)){
convg <- T
convg_status <- paste("Parameters converged with tol:", tol)
ql_hood_vals <- ql_hood_vals[1:(i + 1)]
which_min <- i + 1
res <- new_em
} else if(check_up & i > 1){
#ql_hood_check <- prev_em %>% E_step() %>% ql_hood()
ql_hood_check <- ql_hood_vals[i]
if(ql_hood_vals[i+1] > ql_hood_check){ # on divergence scale so should be decreasing
#if(verbose) cat("quasi-log-likelihood did not decrease \n")
convg <- T
convg_status <- paste("quasi-log-likelihood did not decrease")
ql_hood_vals <- ql_hood_vals[1:(i+1)]
best_models <- list(prev2_em, prev_em, new_em) %>%
lapply(function(e){e %>% E_step()})
which_min <- best_models %>% sapply(ql_hood) %>% which.min()
res <- best_models[[which_min]]
}
}
if(save_steps) em[[i + 1]] <- new_em
i <- i + 1
prev_em <- new_em
prev2_em <- prev_em
if(verbose) cat("it:", i, "ql:", ql_hood_vals[i], "\n")
}
if(maxit <= i){
res <- new_em %>% E_step()
}
beta_mean <- mean_pl_cv$to_mono(res$gam)
beta_grp <- lapply(seq(0, nrow(res$re_mean)-1, by = model$specs$r), FUN =
function(i){
beta_mean +
c(grp_pl_cv$to_mono(res$re_mean[i + 1:model$specs$r,]),
rep(0, times = model$specs$p - model$specs$r)
)
}
)
names(beta_grp) <- model$group_ids
return(list(beta_mean = beta_mean,
beta_grp = beta_grp,
H_mat = tcrossprod(make_L_mat(res$omeg)),
sig2 = res$sig2,
res = res,
em_list = em,
ql_vals = ql_hood_vals,
which_min = which_min,
converged = convg,
status = convg_status,
funcs = list(
ql_hood = ql_hood,
mean_pl_cv = mean_pl_cv,
grp_pl_cv = grp_pl_cv,
monotone_oracle = monotone_oracle,
E_step = E_step,
M_step = M_step
),
model = model
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.