#' @title Fitting generalized linear mixed effect models - serverside function
#' @description glmerSLMADS.assign is the same as glmerSLMADS2 which fits a generalized linear
#' mixed effects model (glme) per study and saves the outcomes in each study
#' @details glmerSLMADS.assign is a serverside function called by ds.glmerSLMA on the clientside.
#' The analytic work engine is the glmer function in R which sits in the lme4 package.
#' glmerSLMADS.assign fits a generalized linear mixed effects model (glme) - e.g. a logistic or
#' Poisson regression model including both fixed and random effects - on data
#' from each single data source and saves the regression outcomes on the serveside.
#' @param formula see help for ds.glmerSLMA
#' @param offset see help for ds.glmerSLMA
#' @param weights see help for ds.glmerSLMA
#' @param dataName see help for ds.glmerSLMA
#' @param family see help for ds.glmerSLMA
#' @param control_type see help for ds.glmerSLMA
#' @param control_value.transmit see help for argument <control_value> for
#' function ds.glmerSLMA
#' @param nAGQ integer scalar, defaulting to 1L. IN PRACTICE, IT MAY BE NECESSARY
#' TO SET nAGQ TO 0L when the model appears to converge perfectly well (e.g. verbose=2
#' demonstrates good initial convergence of both the log-likelihood and
#' regression coefficients) but formal convergence does not get declared -
#' so no output is produced - despite running the model for many iterations.
#' The nAGQ argument is set by the nAGQ argument for ds.glmerSLMA and further
#' details can be found in help(ds.glmerSLMA) and in the native R help for glmer()
#' @param verbose see help for ds.glmerSLMA
#' @param theta see help for argument <start_theta> for
#' function ds.glmerSLMA
#' @param fixef see help for argument <start_fixef> for
#' function ds.glmerSLMA
#' @return writes glmerMod object summarising the fitted model to the serverside.
#' For more detailed information see help for ds.glmerSLMA.
#' @author Demetris Avraam for DataSHIELD Development Team
#' @export
glmerSLMADS.assign <- function(formula, offset, weights, dataName, family,
control_type=NULL, control_value.transmit=NULL, nAGQ=1L, verbose = 0, theta = NULL, fixef = NULL){
errorMessage <- "No errors"
#############################################################
# MODULE 1: CAPTURE THE nfilter SETTINGS
thr <- dsBase::listDisclosureSettingsDS()
nfilter.tab <- as.numeric(thr$nfilter.tab)
nfilter.glm <- as.numeric(thr$nfilter.glm)
#############################################################
# Get the value of the 'data' parameter provided as character on the client side
# Same is done for offset and weights lower down function
if(!is.null(dataName)){
dataDF <- eval(parse(text=dataName), envir = parent.frame())
}else{
dataDF <- NULL
}
# Put pipes back into formula
#formula = as.formula(paste(formula,collapse="|"))
formula <- Reduce(paste, deparse(formula))
formula <- gsub("xxx", "|", formula, fixed = TRUE)
formula <- gsub("yyy", "(", formula, fixed = TRUE)
formula <- gsub("zzz", ")", formula, fixed = TRUE)
formula <- gsub("ppp", "/", formula, fixed = TRUE)
formula <- gsub("qqq", ":", formula, fixed = TRUE)
formula <- stats::as.formula(formula)
# Rewrite formula extracting variables nested in strutures like data frame or list
# (e.g. D$A~D$B will be re-written A~B)
# Note final product is a list of the variables in the model (yvector and covariates)
# it is NOT a list of model terms - these are derived later
# Convert formula into an editable character string
formulatext <- Reduce(paste, deparse(formula))
# First save original model formala
originalFormula <- formulatext
# Convert formula string into separate variable names split by |
formulatext <- gsub(" ", "", formulatext, fixed=TRUE)
formulatext <- gsub("(", "", formulatext, fixed=TRUE)
formulatext <- gsub("(1", "", formulatext, fixed=TRUE)
formulatext <- gsub("(0", "", formulatext, fixed=TRUE)
formulatext <- gsub(")", "", formulatext, fixed=TRUE)
formulatext <- gsub("~", "|", formulatext, fixed=TRUE)
formulatext <- gsub("+", "|", formulatext, fixed=TRUE)
formulatext <- gsub("*", "|", formulatext, fixed=TRUE)
formulatext <- gsub("/", "|", formulatext, fixed=TRUE)
formulatext <- gsub(":", "|", formulatext, fixed=TRUE)
formulatext <- gsub("||", "|", formulatext, fixed=TRUE)
# Remember model.variables and then varnames INCLUDE BOTH yvect AND linear predictor components
model.variables <- unlist(strsplit(formulatext, split="|", fixed=TRUE))
varnames <- c()
for(i in 1:length(model.variables)){
elt <- unlist(strsplit(model.variables[i], split="$", fixed=TRUE))
if(length(elt) > 1){
assign(elt[length(elt)], eval(parse(text=model.variables[i]), envir = parent.frame()), envir = parent.frame())
originalFormula.modified <- gsub(model.variables[i], elt[length(elt)], originalFormula, fixed=TRUE)
varnames <- append(varnames, elt[length(elt)])
}else{
varnames <- append(varnames, elt)
}
}
varnames <- unique(varnames)
if(!is.null(dataName)){
for(v in 1:length(varnames)){
varnames[v]<-paste0(dataName,"$",varnames[v])
test.string.0 <- paste0(dataName,"$","0")
test.string.1 <- paste0(dataName,"$","1")
if(varnames[v]==test.string.0) varnames[v] <- "0"
if(varnames[v]==test.string.1) varnames[v] <- "1"
}
cbindraw.text <- paste0("cbind(", paste(varnames, collapse=","), ")")
}else{
cbindraw.text <- paste0("cbind(", paste(varnames, collapse=","), ")")
}
# Identify and use variable names to count missings
all.data <- eval(parse(text=cbindraw.text), envir = parent.frame())
Ntotal <- dim(all.data)[1]
nomiss.any <- stats::complete.cases(all.data)
nomiss.any.data <- all.data[nomiss.any,]
N.nomiss.any <- dim(nomiss.any.data)[1]
Nvalid <- N.nomiss.any
Nmissing <- Ntotal-Nvalid
formula2use <- stats::as.formula(paste0(Reduce(paste, deparse(formula))), env = parent.frame()) # here we need the formula as a 'call' object
##################################################################
#sort out offset and weights
varname.offset <- paste0(offset)
if(!(is.null(offset))){
cbindtext.offset <- paste0("cbind(", offset,")")
offset <- eval(parse(text=cbindtext.offset), envir = parent.frame())
}
else{
assign(x = 'offset', value = NULL, envir = parent.frame())
}
varname.weights<-paste0(weights)
if(!(is.null(weights))){
cbindtext.weights <- paste0("cbind(", weights,")")
weights <- eval(parse(text=cbindtext.weights), envir = parent.frame())
}
else{
assign(x = 'weights', value = NULL, envir = parent.frame())
}
#### BEFORE going further we use the glm1 checks
# This command creates a formula object without the grouping factors for running the standard glm checks
formula2use.glm <- lme4::nobars(formula2use)
mod.glm.ds <- stats::glm(formula2use.glm, family="gaussian", x=TRUE, offset=offset, weights=weights, data=dataDF)
y.vect<-mod.glm.ds$y
X.mat<-mod.glm.ds$x
pw.vect<-mod.glm.ds$prior.weights
offset.vect<-mod.glm.ds$offset
##############################
#TEST FOR OVERSATURATED MODEL#
##############################
dimX<-dim((X.mat))
glm.saturation.invalid<-0
num.p<-as.numeric(dimX[2])
num.N<-as.numeric(dimX[1])
if(num.p>(nfilter.glm*num.N)){
glm.saturation.invalid<-1
errorMessage.gos<-paste0("ERROR: Model is oversaturated (too many model parameters relative to sample size)",
"leading to a possible risk of disclosure - please simplify model. With ",
num.p," parameters and nfilter.glm = ",round(nfilter.glm,4)," you need ",
round((num.p/nfilter.glm),0)," observations")
}
#########################
#CHECK Y VECTOR VALIDITY#
#########################
y.invalid<-0
#Count number of unique non-missing values - disclosure risk only arises with two levels
unique.values.noNA.y<-unique(y.vect[stats::complete.cases(y.vect)])
#If two levels, check whether either level 0 < n < nfilter.tab
if(length(unique.values.noNA.y)==2){
tabvar<-table(y.vect)[table(y.vect)>=1] #tabvar counts n in all categories with at least one observation
min.category<-min(tabvar)
if(min.category<nfilter.tab){
y.invalid<-1
errorMessage.y<-"ERROR: y (response) vector is binary with one category less than filter threshold for table cell size"
}
}
#Check x matrix validity
#Check no dichotomous x vectors with between 1 and filter.threshold
#observations at either level
Xpar.invalid<-rep(0,num.p)
x.invalid<-0 #Any x parameter invalid
for(pj in 1:num.p){
unique.values.noNA<-unique((X.mat[,pj])[stats::complete.cases(X.mat[,pj])])
if(length(unique.values.noNA)==2){
tabvar<-table(X.mat[,pj])[table(X.mat[,pj])>=1] #tabvar counts n in all categories with at least one observation
min.category<-min(tabvar)
if(min.category<nfilter.tab){
Xpar.invalid[pj]<-1
x.invalid<-1
errorMessage.x<-"ERROR: at least one column in X matrix is binary with one category less than filter threshold for table cell size"
}
}
}
#Check w vector validity
w.invalid<-0
if(!is.null(pw.vect))
{
w.vect<-pw.vect
unique.values.noNA.w<-unique(w.vect[stats::complete.cases(w.vect)])
if(length(unique.values.noNA.w)==2){
tabvar<-table(w.vect)[table(w.vect)>=1] #tabvar counts n in all categories with at least one observation
min.category<-min(tabvar)
if(min.category<nfilter.tab){
w.invalid<-1
errorMessage.w<-"ERROR: weights vector is binary with one category less than filter threshold for table cell size"
}
}
}
#Check o vector validity
o.invalid<-0
if(!is.null(offset.vect))
{
#Keep vector name consistent
o.vect<-offset.vect
unique.values.noNA.o<-unique(o.vect[stats::complete.cases(o.vect)])
if(length(unique.values.noNA.o)==2){
tabvar<-table(o.vect)[table(o.vect)>=1] #tabvar counts n in all categories with at least one observation
min.category<-min(tabvar)
if(min.category<nfilter.tab){
o.invalid<-1
errorMessage.o<-"ERROR: offset vector is binary with one category less than filter threshold for table cell size"
}
}
}
###############################################
#FIT MODEL AFTER CONFIRMING NO DISCLOSURE RISK#
###############################################
disclosure.risk<-0
if(y.invalid>0||w.invalid>0||o.invalid>0||sum(Xpar.invalid)>0||glm.saturation.invalid>0){
disclosure.risk<-1
}
if(disclosure.risk==0)
{
#########################
# set up control object #
#########################
#Temporary code to ensure control.obj is non-null and
#defaults to standard default glmerControl values
control.obj<-lme4::glmerControl()
#Construct control object
control.text<-"lme4::glmerControl("
#Sort control_value
if(!is.null(control_type)){
if(!is.null(control_value.transmit))
{
if(!is.numeric(control_value.transmit))
{
control_value<-as.numeric(control_value.transmit)
}
if(is.numeric(control_value.transmit))
{
control_value<-control_value.transmit
}
}
}
if(is.null(control_type)){
control_value<-NULL
}
#Sort control_type
if(!is.null(control_type)){
#Code for check.conv.grad
if(control_type=="check.conv.grad")
{
default.value<-1.0e-3 #default for glmerControl
if(is.null(control_value)){
control_value<-default.value
}
full.control.text<-paste0(control.text,"check.conv.grad = lme4::.makeCC('warning', tol=",control_value,", relTol=NULL))")
control.obj<-eval(parse(text=full.control.text))
}
}
####################################################################################
#THIS NEXT BLOCK OF CODE REMOVED BECAUSE AT PRESENT ANY NON-DEFAULT OPTIMIZER WILL
#ALMOST CERTAINLY REQUIRE CODE MODIFICATION AND THE DEFAULT CONSISTS OF THE
#TWO DIFFERENT OPTIMIZERS FOR STAGE 1 AND STAGE 2 WHICH IS SO SPECIFIC
#IT IS BETTER TO AVOID ALLOWING THE OPTIMIZER TO BE SELECTED IN THE CLIENTSIDE
#if(!is.null(optimizer)&&optimizer!="bobyqa"&&optimizer!="Nelder_Mead")
# {
# control.obj$optimizer<-"Nelder_Mead" #default for glmerControl
# }
#
#if(is.null(optimizer))
# {
# control.obj$optimizer<-"Nelder_Mead" #default for glmerControl
# }
#############################################################################
# sort out start values
start = NULL
if(!is.null(fixef)){
fixef <- as.numeric(unlist(strsplit(fixef, split=",")))
}
if(!is.null(theta)){
theta <- as.numeric(unlist(strsplit(theta, split=",")))
if(is.null(fixef)){
start = theta
}else{
start = list(theta=theta, fixef=fixef)
}
}
#mg <- lme4::lmer(formula2use, offset=offset, weights=weights, data=dataDF, REML = REML, verbose = verbose, control = control.obj)
mg <- lme4::glmer(formula2use, offset=offset, weights=weights, data=dataDF,
family = family, nAGQ=nAGQ,verbose = verbose, control=control.obj, start = start)
outlist <- mg
}
return(outlist)
}
# ASSIGN FUNCTION
# glmerSLMADS.assign
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.