Nothing
#' @title Fit a Generalized Linear Model (GLM) with pooling via Study Level Meta-Analysis (SLMA)
#' @description Fits a generalized linear model (GLM) on data from single or multiple sources
#' with pooled co-analysis across studies being based on SLMA (Study Level Meta Analysis).
#' @details glmSLMADS.assign is an aggregate function called by clientside function ds.glmSLMA.
#' ds.glmSLMA also calls another aggregate function glmSLMADS2
#' and an assign function glmSLMADS.assign
#' For more detailed information see help for ds.glmSLMA.
#' @param formula a glm formula, specified in call to ds.glmSLMA
#' @param family a glm family, specified in call to ds.glmSLMA
#' @param offset a character string specifying a variable to be used as an offset.
#' Specified in call to ds.glmSLMA.
#' @param weights a character string specifying a variable to be used as regression weights.
#' Specified in call to ds.glmSLMA. Specified in call to ds.glmSLMA.
#' @param newobj a character string specifying the name of the glm object
#' written to the serverside by glmSLMADS.assign. This is either the name
#' specified by the newobj argument in ds.glmSLMA or if newobj was unspecified
#' or NULL it is called new.glm.obj.
#' @param dataName a character string specifying the name of a data.frame
#' holding the data for the model. Specified in call to ds.glmSLMA.
#' @return All quantitative, Boolean, and character objects required to
#' enable the SLMA pooling of the separate glm models fitted to each study -
#' in particular including the study-specific regression coefficients and their corresponding
#' standard errors.
#' @author Paul Burton for DataSHIELD Development Team (14/7/20)
#' @export
glmSLMADS2 <- function(formula, family, offset, weights, newobj, dataName){
#############################################################
#MODULE 1: CAPTURE THE nfilter SETTINGS #
thr <- dsBase::listDisclosureSettingsDS() #
nfilter.tab <- as.numeric(thr$nfilter.tab) #
nfilter.glm <- as.numeric(thr$nfilter.glm) #
#nfilter.subset<-as.numeric(thr$nfilter.subset) #
#nfilter.string<-as.numeric(thr$nfilter.string) #
#############################################################
########################################
############
#Convert transmitable text for special link variance combinations back to full representation
if(family=="quasigamma.link_log")
{family<-"quasi(link=log,variance=mu^2)"}
if(family=="Gamma.link_log")
{family<-"Gamma(link=log)"}
#############
final.family.object<-eval(parse(text=family))
#########################################
errorMessage2<-"No errors"
# Get the value of the 'data' parameter provided as character on the client side
# Same is done for offset and weights lower down function
if(!is.null(dataName)){
dataDF <- eval(parse(text=dataName), envir = parent.frame())
}else{
dataDF<-NULL
}
#formula2use <- stats::as.formula(paste0(Reduce(paste, deparse(originalFormula))), env = parent.frame()) # here we need the formula as a 'call' object
formula2use <- formula
##################################################################
#sort out offset and weights
if(is.null(offset))
{
varname.offset<-NULL
#offset.to.use <- NULL
cbindtext.offset <- paste0("offset.to.use <- NULL")
eval(parse(text=cbindtext.offset), envir = parent.frame())
}else{
varname.offset <- paste0(offset)
}
if(!(is.null(offset)))
{
cbindtext.offset <- paste0("offset.to.use <- cbind(", offset,")")
eval(parse(text=cbindtext.offset), envir = parent.frame())
}
if(is.null(weights))
{
varname.weights<-NULL
cbindtext.weights <- paste0("weights.to.use <- NULL")
eval(parse(text=cbindtext.weights), envir = parent.frame())
#weights.to.use <- NULL
}else{
varname.weights <- paste0(weights)
}
if(!(is.null(weights)))
{
cbindtext.weights <- paste0("weights.to.use <- cbind(", weights,")")
eval(parse(text=cbindtext.weights), envir = parent.frame())
#cbindtext.weights <- paste0("cbind(", weights,")")
#weights.to.use <- eval(parse(text=cbindtext.weights), envir = parent.frame())
}
#This function call now undertaken in glmDS.assign function and is here replaced by
#bringing back in the mg output saved from that previous call
# mg <- stats::glm(formula2use, family=final.family.object, x=TRUE, offset=offset.to.use, weights=weights.to.use, data=dataDF)
activate.text<- paste0("mg<-",newobj)
eval(parse(text=activate.text))
y.vect<-mg$y
X.mat<-mg$x
pw.vect<-mg$prior.weights
offset.vect<-mg$offset
##############################
#TEST FOR OVERSATURATED MODEL#
##############################
dimX<-dim((X.mat))
glm.saturation.invalid<-0
num.p<-as.numeric(dimX[2])
num.N<-as.numeric(dimX[1])
if(num.p>(nfilter.glm*num.N)){
glm.saturation.invalid<-1
errorMessage.gos<-paste0("ERROR: Model is oversaturated (too many model parameters relative to sample size)",
"leading to a possible risk of disclosure - please simplify model. With ",
num.p," parameters and nfilter.glm = ",round(nfilter.glm,4)," you need ",
round((num.p/nfilter.glm),0)," observations")
}
#########################
#CHECK Y VECTOR VALIDITY#
#########################
y.invalid<-0
#Count number of unique non-missing values - disclosure risk only arises with two levels
unique.values.noNA.y<-unique(y.vect[stats::complete.cases(y.vect)])
#If two levels, check whether either level 0 < n < nfilter.tab
if(length(unique.values.noNA.y)==2){
tabvar<-table(y.vect)[table(y.vect)>=1] #tabvar counts n in all categories with at least one observation
min.category<-min(tabvar)
if(min.category<nfilter.tab){
y.invalid<-1
errorMessage.y<-"ERROR: y (response) vector is binary with one category less than filter threshold for table cell size"
}
}
#Check x matrix validity
#Check no dichotomous x vectors with between 1 and filter.threshold
#observations at either level
Xpar.invalid<-rep(0,num.p)
x.invalid<-0 #Any x parameter invalud
for(pj in 1:num.p){
unique.values.noNA<-unique((X.mat[,pj])[stats::complete.cases(X.mat[,pj])])
if(length(unique.values.noNA)==2){
tabvar<-table(X.mat[,pj])[table(X.mat[,pj])>=1] #tabvar counts n in all categories with at least one observation
min.category<-min(tabvar)
if(min.category<nfilter.tab){
Xpar.invalid[pj]<-1
x.invalid<-1
errorMessage.x<-"ERROR: at least one column in X matrix is binary with one category less than filter threshold for table cell size"
}
}
}
#Check w vector validity
w.invalid<-0
if(!is.null(pw.vect))
{
w.vect<-pw.vect
unique.values.noNA.w<-unique(w.vect[stats::complete.cases(w.vect)])
if(length(unique.values.noNA.w)==2){
tabvar<-table(w.vect)[table(w.vect)>=1] #tabvar counts n in all categories with at least one observation
min.category<-min(tabvar)
if(min.category<nfilter.tab){
w.invalid<-1
errorMessage.w<-"ERROR: weights vector is binary with one category less than filter threshold for table cell size"
}
}
}
#Check o vector validity
o.invalid<-0
if(!is.null(offset.vect))
{
#Keep vector name consistent
o.vect<-offset.vect
unique.values.noNA.o<-unique(o.vect[stats::complete.cases(o.vect)])
if(length(unique.values.noNA.o)==2){
tabvar<-table(o.vect)[table(o.vect)>=1] #tabvar counts n in all categories with at least one observation
min.category<-min(tabvar)
if(min.category<nfilter.tab){
o.invalid<-1
errorMessage.o<-"ERROR: offset vector is binary with one category less than filter threshold for table cell size"
}
}
}
###############################################
#FIT MODEL AFTER CONFIRMING NO DISCLOSURE RISK#
###############################################
disclosure.risk<-0
if(y.invalid>0||w.invalid>0||o.invalid>0||sum(Xpar.invalid)>0||glm.saturation.invalid>0){
disclosure.risk<-1
}
if(disclosure.risk==0)
{
# mg <- stats::glm(formula2use, family=final.family.object, offset=offset.to.use, weights=weights.to.use, data=dataDF)
Nvalid <- length(mg$residuals)
Nmissing <- length(mg$na.action)
Ntotal <- Nvalid+Nmissing
outlist<-list(rank=mg$rank, aic=mg$aic,
iter=mg$iter, converged=mg$converged,
boundary=mg$boundary, na.action=options("na.action"), call=summary(mg)$call, terms=summary(mg)$terms,
contrasts=summary(mg)$contrasts, aliased=summary(mg)$aliased, dispersion=summary(mg)$dispersion,
data=dataName, df=summary(mg)$df, Ntotal=Ntotal, Nvalid=Nvalid, Nmissing=Nmissing,
cov.unscaled=summary(mg)$cov.unscaled, cov.scaled=summary(mg)$cov.scaled,
offset=varname.offset, weights=varname.weights,VarCovMatrix=NA,CorrMatrix=NA,
deviance.null=mg$null.deviance, df.null=mg$df.null, deviance.resid=mg$deviance, df.resid=mg$df.residual,
formula=mg$formula, family=mg$family,coefficients=summary(mg)$coefficients)
}else{
errorMessage.d1<-"ERROR: Model failed in this source because of an enhanced risk of disclosure"
errorMessage.d2<-"The following message(s) identify the cause of this enhanced risk"
outlist.1<-list(errorMessage.1=errorMessage.d1)
outlist.2<-list(errorMessage.2=errorMessage.d2)
outlist.gos<-NULL
if(glm.saturation.invalid==1){
outlist.gos<-list(errorMessage.gos=errorMessage.gos)
}
outlist.y<-NULL
if(y.invalid==1){
outlist.y<-list(errorMessage.y=errorMessage.y)
}
outlist.x<-NULL
if(x.invalid==1){
outlist.x<-list(errorMessage.x=errorMessage.x)
}
outlist.w<-NULL
if(w.invalid==1){
outlist.w<-list(errorMessage.w=errorMessage.w)
}
outlist.o<-NULL
if(o.invalid==1){
outlist.o<-list(errorMessage.o=errorMessage.o)
}
outlist<-list(outlist.1,outlist.2,outlist.gos,outlist.y,outlist.x,outlist.w,outlist.o)
}
#tidy up in parent.frame()
eval(quote(rm(offset.to.use)), envir = parent.frame())
eval(quote(rm(weights.to.use)), envir = parent.frame())
return(outlist)
}
# AGGREGATE FUNCTION
# glmSLMADS2
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.