#' @export
MErf <- function(X, ...) UseMethod("MErf")
#' @title Mixed Effect random forest
#' @description
#' Trains a Mixed Effect random forest
#' for longitudinal continuous and binary
#' data. A rule based version or these methods
#' using the \code{inTree} package is also implemented(see [1])
#' @param form formula
#' @param dat data.frame with predictors
#' @param groups character name of the column containing the group identifier
#' @param family a GLM family: continuous data set family = "gaussian", binary data set
#' family = "binomial" see \code{glm} and \code{family}
#' @param rand.vars random effect variables
#' @param para named list of gbm training parameters
#' @param glmer.Control \code{glmer} or \code{lmer} control
#' @param tol convergence tolerance
#' @param max.iter maximum number of iterations
#' @param include.RE (logical) to include random effect Zb as predictor in gbm?
#' @param verbose verbose for lme4
#' @param likelihoodCheck (logical) to use log likelihood of glmer to check for convergence?
#' @param type of predictions of gbm to pass to lme4 as population estimates (these will be used as offset)
#' @param \dots Further arguments passed to or from other methods.
#' @return An object of class MEgbm; a list with items
#' \item{rf.fit}{fitted random forest model}
#' \item{glmer.fit}{fitted mixed effect logistic regression model}
#' \item{logLik}{log likelihood of mixed effect logistic regression}
#' \item{random.effects}{random effect parameter estimates}
#' \item{glmer.form}{lmer4 formula}
#' \item{glmer.CI}{estimates of mixed effect logistic regression with
#' approximate confidence intervals on the logit scale. More accurate values
#' can be obtained by bootstrap}
#' \item{threshold}{classification cut-off}
#' \item{predRules}{fitted rules}
#' \item{Y.star}{fitted "transform" outcome. This is the same as the predicted outcome for binary data}
#
#' @author Che Ngufor <Ngufor.Che@@mayo.edu>
#
#' @references
#' Che Ngufor, Holly Van Houten, Brian S. Caffo , Nilay D. Shah, Rozalina G. McCoy
#' Mixed Effect Machine Learning: a framework for predicting longitudinal change in hemoglobin A1c,
#' in Journal of Biomedical Informatics, 2018
#' @import lme4 caret partykit inTrees
#' @export
MErf <- function(form,
dat,
groups = NULL,
family,
rand.vars="1",
para = NULL,
tol= 1e-5,
max.iter =100,
include.RE =FALSE,
verbose = FALSE, maxdepth=5,
glmer.Control=glmerControl(optimizer = "bobyqa"),
nAGQ=0,
likelihoodCheck = TRUE,
K=3,
decay = 0.05, ...){
if(is.null(groups)) stop("please provide grouping variable")
rhs.vars <- rhs.form(form)
resp.vars <- lhs.form(form)
dat$Y <- Y <- dat[, resp.vars]
old.lik <- -Inf
if(likelihoodCheck == FALSE){
n.re <- sum(rand.vars != 1)+1
b.old <-rep(0, n.re*nlevels(factor(dat[, groups]))) ## initial random effect parameters
}
## family
if(is.character(family))
family <- get(family, mode = "function", envir = parent.frame())
if(is.function(family)) family <- family()
if(is.null(family$family)) {
print(family)
stop("'family' not recognized")
}
### initial random effect component: fit a LME model with no fixed effects, ie
### a priori known mean of 0
form.glmer <- as.formula(paste0("Y ~ ", paste0(c(paste0("1", collapse = "+"), "+", "(",
paste0(c(rand.vars), collapse = "+"), "|", groups, ")"), collapse = "")))
glmer.fit <- myglmer(form = form.glmer, dat = dat, family = family, control = glmer.Control, nAGQ=0,
verbose = as.numeric(verbose))
pp = predict(glmer.fit, newdata = dat, type = "response")
if (family$family == "binomial"){
pp <- ifelse(abs(pp -1) <= 1e-16, pp-1e-16, ifelse(pp <= 1e-16, pp+1e-16, pp))
w = pp*(1-pp)
Y.star <- qlogis(pp) + (Y - pp)/w
}
else if(family$family == "gaussian") {
Y.star = Y
w = rep(1, length(Y))
} else {
print(family)
stop("'family' is not yet implemented")
}
### get the random effect component
Zt <- getME(glmer.fit, name = "Zt")
b <- getME(glmer.fit, name = "b")
Zb <- as.numeric(cprod(x = Zt, y = b))
### adjusted target
Target <- Y.star - Zb
dat[, "Target"] <- Target
dat[, "Zb"] <- Zb
if(include.RE) rhs.vars <- unique(c(rhs.vars, "Zb"))
rf.form <- as.formula(paste0("Target ~", paste0(rhs.vars, collapse = "+")))
partvars <- c("TreeConditions")
### glmer formula
glmer.form <- as.formula(paste0("Y ~ ", paste0(c(paste0(partvars, collapse="+"), "+",
"(", paste0(c(rand.vars), collapse = "+"), "|", groups, ")"), collapse = "")))
for(ii in 1:max.iter){
#### fit boosted regression trees
if(para$opt.para) {
fitControl <- trainControl(method = para$method, number = para$number)
mod <- train(form=rf.form, data = dat, method = "RRF", tuneLength = para$tuneLength, trControl = fitControl, metric = "RMSE",
ntree = para$ntree, importance = TRUE, keep.inbag = TRUE, replace = FALSE, proximity=FALSE)
para$mtry <- mod$bestTune$mtry
rf.fit <- RRF(dat[, rhs.form(rf.form)], dat[, lhs.form(rf.form)], ntree=para$ntree, mtry = para$mtry,
importance = FALSE, proximity=FALSE, keep.inbag=TRUE, replace=FALSE)
} else {
rf.fit <- RRF(dat[, rhs.form(rf.form)], dat[, lhs.form(rf.form)], ntree=para$ntree, mtry = max(floor(length(rhs.form(rf.form))/3), 1),
importance = FALSE, proximity=FALSE, keep.inbag=TRUE, replace=FALSE)
}
treeList <- RF2List(rf.fit)
ruleExec = myextractRules(treeList, dat[, rhs.form(rf.form)], ntree=floor(0.5*para$ntree), maxdepth = maxdepth, random=FALSE)
ruleExec <- unique(ruleExec)
# splt <- SplitVector(Target, K = K)
# target <- splt$newV
ruleMetric <- getRuleMetric(ruleExec,dat[,rhs.vars],Target)
ruleMetric <- pruneRule(ruleMetric,dat[,rhs.vars], Target, decay)
ruleMetric <- unique(ruleMetric)
learner <- buildLearner(ruleMetric,dat[,rhs.vars], Target)
pred <- myapplyLearner(learner, dat[,rhs.vars])
dat[, "TreeConditions"] <- factor(pred$predCond)
levels(dat[, "TreeConditions"]) <- paste0("Rule.", 1:nlevels(dat[, "TreeConditions"]))
if(length(unique(dat[, "TreeConditions"])) < 2) dat[, "TreeConditions"] <- 1
glmer.fit <- myglmer(form = glmer.form, dat= dat, family = family, control = glmer.Control,
nAGQ= nAGQ, verbose = as.numeric(verbose))
# get predicted probabilities and compute transformed response
pp <- predict(glmer.fit, newdata = dat, type = "response")
if (family$family == "binomial"){
pp <- ifelse(abs(pp -1) <= 1e-16, pp-1e-16, ifelse(pp <= 1e-16, pp+1e-16, pp))
w = pp*(1-pp)
Y.star <- qlogis(pp) + (Y - pp)/w
}
else if(family$family == "gaussian") {
Y.star = Y
w = rep(1, length(Y))
} else {
print(family)
stop("'family' is not yet implemented")
}
Z <- getME(glmer.fit, name = "Z")
b <- getME(glmer.fit, name = "b")
Zb <- as.numeric(Z%*%b)
Target <- Y.star - Zb
# target <- round(w*(Target+ Zb - qlogis(pp) ) + pp)
dat[, "Target"] <- Target
dat[, "Zb"] <- Zb
### test for convergence
if(likelihoodCheck){
new.lik <- as.numeric(logLik(glmer.fit))
r <- as.numeric(sqrt(t(new.lik - old.lik)%*%(new.lik-old.lik)))
old.lik <- new.lik
} else {
r <- as.numeric(sqrt(t(b - b.old)%*%(b-b.old)))
b.old <- b
}
# if(verbose)
# print(paste("Error: ", r))
if( r < tol) break
} ## for loop
if(r > tol) warning("EM algorithm did not converge")
# fitted = predict(rf.fit, newdata = dat, type = "raw") + Zb
zz <- predict(rf.fit, newdata = dat, type = "response")
fitted = zz + Zb
threshold = NULL
if(family$family == "binomial"){
pp <- sigmoid(fitted)
pp <- ifelse(abs(pp -1) <= 1e-16, pp-1e-16, ifelse(pp <= 1e-16, pp+1e-16, pp))
perf <- Performance.measures(pp, Y)
threshold <- perf$threshold
}
rule.levels <- levels(dat[, "TreeConditions"])
### get confidence intervals for mixed effect logistic regresion: rough estimates using the SEs
se <- sqrt(diag(as.matrix(vcov(glmer.fit))))
tab <- cbind(Est = fixef(glmer.fit), LL = fixef(glmer.fit) - 1.96 * se,
UL = fixef(glmer.fit) + 1.96 *se)
# splitV <- splt$splitV
# newV <- rep(0, nrow(learner))
# labs <- learner[, "pred"]
# xx <- unique(labs)
# ix <- 1:length(xx)
# is <- which(xx == "L1")
# newV[which(labs == "L1")] <- splt$splitV[1]
# ll <- xx[xx != "L1"]
# names(splitV) <- ll
#
# if (length(splt$splitV) >= 2) {
# for (jj in 1:(length(ll)-1)) {
# newV[which(labs == ll[jj])] <- 0.5*(splt$splitV[jj] + splt$splitV[jj+1])
# }
# }
# newV[which(labs == ll[length(ll)])] <- splt$splitV[length(ll)] + 0.5
ruleMetric <- getRuleMetric(ruleExec,dat[,rhs.vars],Y)
ruleMetric <- pruneRule(ruleMetric,dat[,rhs.vars], Y)
ruleMetric <- unique(ruleMetric)
learner <- buildLearner(ruleMetric,dat[,rhs.vars], Y)
readableLearner <- data.frame(presentRules(learner, rhs.vars))
res <- list(rf.fit = rf.fit,
glmer.fit = glmer.fit,
groups = groups, para=para,
rand.vars=rand.vars,
rhs.vars = rhs.vars,
logLik=as.numeric(logLik(glmer.fit)),
random.effects =ranef(glmer.fit),
glmer.form = glmer.form,
glmer.CI =tab,
Y.star = fitted,
rf.form = rf.form,
threshold = threshold,
include.RE=include.RE,
rfRules = learner,
rfReadableRules = readableLearner,
predRules = pred,
rule.levels = rule.levels)
class(res) <- "MErf"
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.