Nothing
#' @title Fit a Generalized Linear Model (GLM) with pooling via Study Level Meta-Analysis (SLMA)
#' @description Fits a generalized linear model (GLM) on data from single or multiple sources
#' with pooled co-analysis across studies being based on SLMA (Study Level Meta Analysis).
#' @details glmSLMADS.assign is an aggregate function called by clientside function ds.glmSLMA.
#' ds.glmSLMA also calls another aggregate function glmSLMADS2
#' and an assign function glmSLMADS.assign
#' For more detailed information see help for ds.glmSLMA.
#' @param formula a glm formula, specified in call to ds.glmSLMA
#' @param family a glm family, specified in call to ds.glmSLMA
#' @param offset a character string specifying a variable to be used as an offset.
#' Specified in call to ds.glmSLMA.
#' @param weights a character string specifying a variable to be used as regression weights.
#' Specified in call to ds.glmSLMA. Specified in call to ds.glmSLMA.
#' @param data a character string specifying the name of a data.frame
#' holding the data for the model. Specified as dataName in call to ds.glmSLMA.
#' @return assesses and returns information about failure to pass disclosure traps
#' such as test of model complexity (saturation).
#' For more detailed information see help for ds.glmSLMA.
#' @author Paul Burton for DataSHIELD Development Team (14/7/20)
#' @export
glmSLMADS1<- function(formula, family, weights, offset, data){
errorMessage="No errors"
#############################################################
#MODULE 1: CAPTURE THE nfilter SETTINGS #
thr <- dsBase::listDisclosureSettingsDS() #
nfilter.tab<-as.numeric(thr$nfilter.tab) #
nfilter.glm<-as.numeric(thr$nfilter.glm) #
#nfilter.subset<-as.numeric(thr$nfilter.subset) #
#nfilter.string<-as.numeric(thr$nfilter.string) #
#############################################################
########################################
############
#Convert transmitable text for special link variance combinations back to full representation
if(family=="quasigamma.link_log")
{family<-"quasi(link=log,variance=mu^2)"}
if(family=="Gamma.link_log")
{family<-"Gamma(link=log,variance=mu^2)"}
#############
final.family.object<-eval(parse(text=family))
#########################################
# get the value of the 'data' and 'weights' parameters provided as character on the client side
if(is.null(data)){
dataTable <- NULL
}else{
dataTable <- eval(parse(text=data), envir = parent.frame())
}
formula2use <- formula
mod.glm.ds <- stats::glm(formula2use, family=final.family.object, x=TRUE, control=stats::glm.control(maxit=1), contrasts=NULL, data=dataTable)
X.mat <- as.matrix(mod.glm.ds$x)
dimX<-dim((X.mat))
y.vect<-as.vector(mod.glm.ds$y)
##############################################################
#FIRST TYPE OF DISCLOSURE TRAP - TEST FOR OVERSATURATED MODEL#
#TEST AGAINST nfilter.glm #
##############################################################
glm.saturation.invalid<-0
num.p<-dimX[2]
num.N<-dimX[1]
if(num.p>nfilter.glm*num.N){
glm.saturation.invalid<-1
errorMessage<-"ERROR: Model has too many parameters, there is a possible risk of disclosure - please simplify model"
}
coef.names<-names(mod.glm.ds$coefficients)
if(is.null(weights)){
w.vect <- rep(1,length(y.vect))
}else{
ftext <- paste0("cbind(",weights,")")
w.vect <- eval(parse(text=ftext), envir = parent.frame())
}
if(is.null(offset)){
offsetvar <- rep(1,length(y.vect))
}else{
active.text <- paste0("cbind(",offset,")")
offsetvar <- eval(parse(text=active.text), envir = parent.frame())
}
################################
#SECOND TYPE OF DISCLOSURE TRAP#
################################
#If y, X or w data are invalid but user has modified clientside
#function (ds.glm) to circumvent trap, model will get to this point without
#giving a controlled shut down with a warning about invalid data.
#So as a safety measure, we will now use the same test that is used to
#trigger a controlled trap in the clientside function to destroy the
#score.vector and information.matrix in the study with the problem.
#CHECK Y VECTOR VALIDITY
y.invalid<-0
#COUNT NUMBER OF UNIQUE NON-MISSING VALUES - DISCLOSURE RISK ONLY ARISES WITH TWO LEVELS
unique.values.noNA.y<-unique(y.vect[stats::complete.cases(y.vect)])
#IF TWO LEVELS, CHECK WHETHER EITHER LEVEL 0 < n < nfilter.tab
if(length(unique.values.noNA.y)==2){
tabvar<-table(y.vect)[table(y.vect)>=1] #tabvar COUNTS N IN ALL CATEGORIES WITH AT LEAST ONE OBSERVATION
min.category<-min(tabvar)
if(min.category<nfilter.tab){
y.invalid<-1
errorMessage<-"ERROR: y vector is binary with one category less than filter threshold for table cell size"
stop(errorMessage, call. = FALSE)
}
}
#CHECK X MATRIX VALIDITY
#Check no dichotomous X vectors with between 1 and filter.threshold
#observations at either level
dimX<-dim((X.mat))
num.Xpar<-dimX[2]
Xpar.invalid<-rep(0,num.Xpar)
for(pj in 1:num.Xpar){
unique.values.noNA<-unique((X.mat[,pj])[stats::complete.cases(X.mat[,pj])])
if(length(unique.values.noNA)==2){
tabvar<-table(X.mat[,pj])[table(X.mat[,pj])>=1] #tabvar COUNTS N IN ALL CATEGORIES WITH AT LEAST ONE OBSERVATION
min.category<-min(tabvar)
if(min.category<nfilter.tab){
Xpar.invalid[pj]<-1
errorMessage<-"ERROR: at least one column in X matrix is binary with one category less than filter threshold for table cell size"
stop(errorMessage, call. = FALSE)
}
}
}
#CHECK W VECTOR VALIDITY
w.invalid<-0
if(!is.null(w.vect))
{
unique.values.noNA.w<-unique(w.vect[stats::complete.cases(w.vect)])
if(length(unique.values.noNA.w)==2){
tabvar<-table(w.vect)[table(w.vect)>=1] #tabvar COUNTS N IN ALL CATEGORIES WITH AT LEAST ONE OBSERVATION
min.category<-min(tabvar)
if(min.category<nfilter.tab){
w.invalid<-1
errorMessage<-"ERROR: w vector is binary with one category less than filter threshold for table cell size"
stop(errorMessage, call. = FALSE)
}
}
}
#Check o vector validity
o.invalid<-0
if(!is.null(offsetvar))
{
#Keep vector name consistent
o.vect<-offsetvar
unique.values.noNA.o<-unique(o.vect[stats::complete.cases(o.vect)])
if(length(unique.values.noNA.o)==2){
tabvar<-table(o.vect)[table(o.vect)>=1] #tabvar counts n in all categories with at least one observation
min.category<-min(tabvar)
if(min.category<nfilter.tab){
o.invalid<-1
errorMessage<-"ERROR: offset vector is binary with one category less than filter threshold for table cell size"
stop(errorMessage, call. = FALSE)
}
}
}
#If y, X, weights or offset data are invalid, or the model is overparameterized, this will be detected by glmDS1
#and passed to ds.glm resulting in a warning and a controlled shut down of the function.
#But in case someone modifies the client side function to circumvent the trap, so the
#error is only apparent once the main main iterations have started via glmDS2
#the equivalent tests in glmDS2 will destroy the info.matrix and score.vector in the affected study so
#the model fitting will simply terminate.
if(!(y.invalid>0||w.invalid>0||o.invalid>0||sum(Xpar.invalid)>0||glm.saturation.invalid>0)){
errorMessage<-"No errors"
}else
{
errorMessage<-"STUDY DATA OR APPLIED MODEL INVALID FOR THIS SOURCE"
}
return(list(dimX=dimX,coef.names=coef.names,y.invalid=y.invalid,Xpar.invalid=Xpar.invalid,
w.invalid=w.invalid,o.invalid=o.invalid,
glm.saturation.invalid=glm.saturation.invalid,errorMessage=errorMessage))
}
#AGGREGATE FUNCTION
# glmSLMADS1
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.