Infomatic <- R6::R6Class(
"Infomatic",
class=TRUE, ## this and the next
cloneable=FALSE, ## should improve performance https://r6.r-lib.org/articles/Performance.html ###
public=list(
operator=NULL,
caller=NULL,
model_type=NULL,
model=NULL,
formula=NULL,
distribution=NULL,
family=NULL,
link=NULL,
direction=NULL,
rcall=NULL,
call=NULL,
emmeans = NULL,
predict = "response",
fit = NULL,
deptype = NULL,
depnlevels = NULL,
calloptions = NULL,
r2 = NULL,
optimized = FALSE,
df = NULL,
comparison = "Difference",
posthoc_adjust = c("bonferroni","holm","sidak","tukey","scheffe"),
omnibus_test= NULL,
extra_info = NULL,
initialize=function(options,datamatic,formulas) {
self$model_type <- options$model_type
if (options$model_type=="lmer" && options$res_struct!="id")
self$model_type<-"lme"
if (utils::hasName(options,"input_method")) {
if (options$model_type=="logistic" && options$input_method=="success")
self$model_type<-"logistic_success"
if (options$model_type=="logistic" && options$input_method=="total")
self$model_type<-"logistic_total"
if (options$model_type=="probit" && options$input_method=="success")
self$model_type<-"probit_success"
if (options$model_type=="probit" && options$input_method=="total")
self$model_type<-"probit_total"
}
self$formula <- formulas$formula64()
self$caller <- options$.caller
dep <- options$dep
dlevs <- datamatic$variables[[tob64(dep)]]$levels_labels
if (utils::hasName(options,"omnibus"))
self$omnibus_test<-toupper(options$omnibus)
if (self$caller=="glm") {
self$fit<-c("lik" , "aic", "bic", "dev", "dfr", "over")
self$r2 <-list(list(model=""))
}
if (self$caller=="glmer") {
self$fit<-c("lik" , "aic", "bic", "dev", "dfr", "over")
self$r2<-list(list(type="Marginal"),list(type="Conditional"))
}
if (self$model_type=="lm") {
self$model <- c("Linear Model","OLS Model for continuous y")
self$distribution <- "gaussian"
self$call <- "lm"
self$rcall <- "stats::lm"
self$deptype <- c("numeric","integer")
self$r2 <-list(list(model=""))
}
if (self$model_type=="linear") {
self$model <- c("Linear Model","ML Model for continuous y")
self$distribution <- "gaussian"
self$call <- CALLS[[self$caller]]
self$rcall <- FUNCS[[self$caller]]
self$link <- "identity"
self$direction <- c("y","Dependent variable scores")
self$deptype <- c("numeric","integer")
}
if (self$model_type=="logistic") {
self$model <- c("Logistic Model","Model for binary y")
self$distribution <- "binomial"
self$family <- "binomial"
self$call <- CALLS[[self$caller]]
self$rcall <- FUNCS[[self$caller]]
if (self$caller=="glmer")
self$calloptions<-list(control=lme4::glmerControl(optimizer = "bobyqa"))
self$link <- "logit"
self$emmeans <- "probabilities"
self$comparison <- "OR"
self$deptype <- "factor"
self$depnlevels <- 2
### compute directions ###
dlevs <- datamatic$variables[[tob64(options$dep)]]$levels
actual <- paste("P(",dep,"=",dlevs[2],") / P(",dep,"=",dlevs[1],")")
theory <- "P(y=1)/P(y=0)"
self$direction <- c(theory,actual)
}
if (self$model_type=="logistic_success") {
dep1 <- options$dep
dep2 <- options$dep2
self$model <- c("Logistic Model","Model for success over failure")
self$distribution <- "binomial"
self$family <- "binomial"
self$call <- "acall"
self$rcall <- "stats::glm"
form <- paste("cbind(",tob64(dep1),",",tob64(dep2),") ",formulas$rhsfixed_formula64())
self$calloptions <- list(formula=form,family=binomial())
self$link <- "logit"
self$emmeans <- "probabilities"
self$comparison <- "OR"
self$deptype <- "integer"
### compute directions ###
actual <- paste("P(",dep1,") / P(",dep2,")")
theory <- "P(y=1)/P(y=0)"
self$direction <- c(theory,actual)
}
if (self$model_type=="logistic_total") {
dep1 <- options$dep
dep2 <- options$dep2
self$model <- c("Logistic Model","Model for success over totals")
self$distribution <- "binomial"
self$family <- "binomial"
self$call <- "acall"
self$rcall <- "stats::glm"
form <- paste(tob64(dep1),"/",tob64(dep2),formulas$rhsfixed_formula64())
self$calloptions <- list(formula=form,family=binomial(),weights = str2lang(tob64(dep2)))
self$link <- "logit"
self$emmeans <- "probabilities"
self$comparison <- "OR"
self$deptype <- "integer"
### compute directions ###
actual <- paste(dep1," / ",dep2)
theory <- "P(y=1)/P(y=0)"
self$direction <- c(theory,actual)
}
if (self$model_type=="probit") {
self$model <- c("Probit Model","Model for binary y")
self$distribution <- "binomial"
self$family <- "binomial(link='probit')"
self$call <- CALLS[[self$caller]]
self$rcall <- FUNCS[[self$caller]]
self$link <- "probit"
self$emmeans <- "probabilities"
self$deptype <- "factor"
self$depnlevels <- 2
self$comparison <- "OR"
### compute direction ###
dlevs <- datamatic$variables[[tob64(options$dep)]]$levels
theory <- "P(y=1)"
actual <- paste("P(",dep,"=",dlevs[2],")")
self$direction <- c(theory,actual)
}
if (self$model_type=="poisson") {
self$model <- c("Poisson Model","Model for count y")
self$distribution <- "poisson"
self$family <- "stats::poisson()"
self$call <- CALLS[[self$caller]]
self$rcall <- FUNCS[[self$caller]]
self$link <- "log"
self$emmeans <- "expected counts"
self$direction <- c("y","Dependent variable counts")
self$deptype <- "integer"
self$comparison <- "Ratio"
}
if (self$model_type=="probit_success") {
dep1 <- options$dep
dep2 <- options$dep2
self$model <- c("Logistic Model","Model for success over failure")
self$distribution <- "binomial"
self$family <- "binomial(link='probit')"
self$call <- "acall"
self$rcall <- "stats::glm"
form <- paste("cbind(",tob64(dep1),",",tob64(dep2),") ",formulas$rhsfixed_formula64())
self$calloptions <- list(formula=form,family=binomial(link='probit'))
self$link <- "logit"
self$emmeans <- "probabilities"
self$comparison <- "OR"
self$deptype <- "integer"
### compute directions ###
actual <- paste("P(",dep1,") / P(",dep2,")")
theory <- "P(y=1)/P(y=0)"
self$direction <- c(theory,actual)
}
if (self$model_type=="probit_total") {
dep1 <- options$dep
dep2 <- options$dep2
self$model <- c("Logistic Model","Model for success over totals")
self$distribution <- "binomial"
self$family <- "binomial"
self$call <- "acall"
self$rcall <- "stats::glm"
form <- paste(tob64(dep1),"/",tob64(dep2),formulas$rhsfixed_formula64())
self$calloptions <- list(formula=form,family=binomial(link="probit"),weights = str2lang(tob64(dep2)))
self$link <- "logit"
self$emmeans <- "probabilities"
self$comparison <- "OR"
self$deptype <- "integer"
### compute directions ###
actual <- paste(dep1," / ",dep2)
theory <- "P(y=1)/P(y=0)"
self$direction <- c(theory,actual)
}
if (self$model_type=="poiover") {
self$model <- c("Poisson Model","Model for overdispersed count y")
self$distribution <- "quasi-poisson"
self$family <- "stats::quasipoisson()"
self$call <- CALLS[[self$caller]]
self$rcall <- FUNCS[[self$caller]]
self$link <- "log"
self$emmeans <- "expected counts"
self$deptype <- "integer"
self$comparison <- "Ratio"
### compute direction ###
self$direction <- c("y","Dependent variable counts")
self$deptype <- "integer"
}
if (self$model_type=="nb") {
self$model <- c("Negative Binomial Model","Model for overdispersed count y")
self$call <- "glm(er).nb"
self$distribution <- "nb"
self$rcall <- NB[[self$caller]]
self$link <- "log"
self$emmeans <- "expected counts"
self$direction <- c("y","Dependent variable counts")
self$deptype <- "integer"
self$comparison <- "Ratio"
}
if (self$model_type=="beta") {
self$model <- c("Beta regression","Model for proportions")
self$call <- "betareg"
self$distribution <- "beta"
self$rcall <- "betareg::betareg"
self$link <- "logit"
self$emmeans <- "prop."
self$direction <- c("y","Dependent variable proportion")
self$deptype <- "numeric"
if (options$preds_phi)
self$formula <- paste(formulas$fixed_formula64(),strsplit(formulas$fixed_formula64(),"~")[[1]][[2]],sep = "|")
}
if (self$model_type=="custom") {
self$model <- c("User Model","Generalized model")
self$family <- paste0(options$custom_family,"('",options$custom_link,"')")
self$distribution <- options$custom_family
self$call <- CALLS[[self$caller]]
self$rcall <- FUNCS[[self$caller]]
self$link <- options$custom_link
self$emmeans <- "the response metric"
self$direction <- c("y","Dependent variable scores")
self$deptype <- c("numeric","integer","factor")
}
if (self$model_type=="ordinal") {
self$model <- c("Cumlative Link Model","Proportional odds logistic")
self$distribution <- "logistic"
self$call <- ORDINAL[[self$caller]]
self$rcall <- ORDINAL[[self$caller]]
self$calloptions <- list(model=TRUE, Hess=TRUE)
self$link <- "logit"
self$emmeans <- "expected class"
self$comparison <- "Ratio"
self$predict <- "class"
### compute direction ###
theory <- paste('P(Y',greek_vector['leq'],'j)/P(Y', greek_vector['gt'],' j)')
actual <- paste("j=",paste(dlevs,collapse = " | "))
self$direction <- c(theory, actual)
#########################
self$deptype <- "factor"
self$fit <- c("lik" , "aic", "bic", "dev")
}
if (self$model_type=="multinomial") {
self$model <- c("Multinomial Model","Model for categorical y")
self$distribution <- "multinomial"
if (self$caller=="glm") {
self$call <- "nnet::multinom"
self$rcall <- "nnet::multinom"
self$calloptions <- list(model=TRUE)
}
if (self$caller=="glmer") {
self$call <- "mclogit::mblogit"
self$rcall <- "mclogit::mblogit"
self$formula <- NULL
self$calloptions <- list(formula=as.formula(formulas$fixed_formula64()),
random=formulas$listify_random_formulas64(),
model=TRUE,
maxit=50,
estimator="REML",
trace=FALSE,
trace.inner=FALSE)
self$r2 <-list(list(type="McFadden"))
}
self$link <- "logit"
self$emmeans <- "probabilities"
self$predict <- "probs"
### compute direction ###
theory <- "P(Y=j)/P(Y=0)"
actual <- paste(paste0("P(",dep,"=",dlevs[-1],")"),paste0("P(",dep,"=",dlevs[1],")"),sep="/",collapse = " , ")
self$direction <- c(theory, actual)
#########################
self$deptype <- "factor"
self$depnlevels <- -3
self$fit <- c("lik" , "aic", "bic", "dev")
if (self$caller=="glmer") self$fit <- c("aic", "bic", "dev")
}
if (self$model_type=="lmer") {
self$model <- c("Mixed Model","Linear Mixed model for continuous y")
self$distribution <- "gaussian"
self$call <- "lmer"
self$rcall <- "estimate_lmer"
self$calloptions <- list(reml=options$reml,optimizers=c("bobyqa","Nelder_Mead","nloptwrap"))
self$optimized <- TRUE
self$direction <- c("y","Dependend variable scores")
self$deptype <- c("numeric","integer")
self$fit <- c("lik" , "aic", "bic")
self$r2 <- list(list(type="Marginal"),list(type="Conditional"))
self$df <- options$df_method
}
if (self$model_type=="lme") {
self$formula <- NULL
self$model <- c("Mixed Model","Linear Mixed model for continuous y")
self$distribution <- "gaussian"
self$call <- "lme"
self$rcall <- "estimate_lme"
form <- paste("~1|",tob64(options$cluster[1]))
if (options$res_struct=="cs") cor<-nlme::corCompSymm
if (options$res_struct=="un") cor<-nlme::corSymm
if (options$res_struct=="ar1") cor<-nlme::corAR1
if (options$reml) method="REML" else method="ML"
self$calloptions <- list(fixed=as.formula(formulas$fixed_formula64()),
random=formulas$listify_random_formulas64(),
cor=cor,
form=form,
method=method
)
self$optimized <- FALSE
self$direction <- c("y","Dependend variable scores")
self$deptype <- c("numeric","integer")
self$fit <- c("lik" , "aic", "bic")
self$r2 <- list(list(type="Marginal"),list(type="Conditional"))
if (options$res_struct=="un") res<-"Unstructured" else res<-"AutoRegressive 1"
self$extra_info <- list(info="Residuals",value=res,specs=paste("within cluster",options$cluster[1]))
}
},
info_table=function() {
alist<-list()
alist[["model"]] <- private$.model()
alist[["call"]] <- private$.call()
alist[["dist"]] <- private$.dist()
if (is.something(self$link))
alist[["link"]] <- private$.link()
if (is.something(self$direction))
alist[["dir"]] <- private$.dir()
if (self$optimized)
alist[["optim"]]<-list(info="Optimizer",value="",specs="")
if (is.something(self$omnibus_test))
alist[["omnibus"]]<-list(info="Omnibus Tests",value=self$omnibus_test,specs="")
if (is.something(self$df))
alist[["df"]]<-list(info="DF method",value=self$df,specs="")
if (is.something(self$extra_info))
alist[["extra_info"]]<-self$extra_info
alist[["sample"]] <- list(info="Sample size",value=".",specs="")
alist[["conv"]] <- list(info="Converged",value="no yet", specs="")
return(alist)
},
info_fit=function() {
if (is.null(self$fit))
return()
alist<-list()
for (f in self$fit) {
alist[[f]]<-FIT[[f]]
}
return(alist)
}
), # end of public
private = list(
.model=function() {
return(list(info="Model Type", value=self$model[[1]], specs=self$model[[2]]))
},
.call=function() {
list(info="Model",value=self$call,specs="")
},
.dist=function() {
list(info="Distribution",value=DINFO[[self$distribution]][[1]],specs=DINFO[[self$distribution]][[2]])
},
.link=function() {
list(info="Link function",value=LINFO[[self$link]][[1]],specs=LINFO[[self$link]][[2]])
},
.dir=function() {
list(info="Direction",value=self$direction[1],specs=self$direction[2])
}
) # end of private
)
###### info definition for models ##########
DINFO<-list()
DINFO[["gaussian"]]<-c("Gaussian","Normal distribution of residuals")
DINFO[["poisson"]]<-c("Poisson","Model for count data")
DINFO[["binomial"]]<-c("Binomial","Dichotomous event distribution of y")
DINFO[["multinomial"]]<-c("Multinomial","Multi-event distribution of y")
DINFO[["nb"]]<-c("Negative binomial","Rare event with overdispersion")
DINFO[["quasi-poisson"]]<-c("Quasi-Poisson","Rare event with overdispersion")
DINFO[["Gamma"]]<-c("Gamma","Skewed continuous distribution")
DINFO[["logistic"]]<-c("Logistic","")
LINFO<-list()
LINFO[["identity"]] <- c("Identity","Coefficients in the same scale of y")
LINFO[["log"]] <- c("log","Coefficients are in the log(y) scale")
LINFO[["logit"]] <- c("Logit","Log of the odd of y")
LINFO[["slogit"]] <- c("Logit","Log of the odd of each level of y over y=0")
LINFO[["probit"]] <- c("Probit","Inverse of normal CDF for P(y=1)")
LINFO[["1/mu^2"]] <- c("1/mu^2","Inverse of y squared")
LINFO[["inverse"]] <- c("1/mu","Inverse of y")
LINFO[["sqrt"]] <- c("Square root","Square root of y")
CALLS<-list()
CALLS[["glm"]]<-"stats::glm"
CALLS[["glmer"]]<-"lme4::glmer"
FUNCS<-list()
FUNCS[["glm"]]<-"stats::glm"
FUNCS[["glmer"]]<-"lme4::glmer"
NB<-list()
NB[["glm"]]<-"MASS::glm.nb"
NB[["glmer"]]<-"lme4::glmer.nb"
ORDINAL<-list()
ORDINAL[["glm"]]<-"ordinal::clm"
ORDINAL[["glmer"]]<-"ordinal::clmm"
FIT<-list()
FIT[["lik"]] <- list(info="LogLikelihood", specs="")
FIT[["aic"]] <- list(info="AIC", specs="Less is better")
FIT[["bic"]] <- list(info="BIC", specs="Less is better")
FIT[["dev"]] <- list(info="Deviance", specs="Less is better")
FIT[["dfr"]] <- list(info="Residual DF", specs="")
FIT[["over"]] <- list(info="Chi-squared/DF", specs="Overdispersion indicator")
.explainPrediction=function() {
if (!utils::hasName(self$tab_info,"dir"))
return()
dlevs<-self$datamatic$variables[[tob64(self$options$dep)]]$levels
dep<-self$options$dep
if (self$options$model_type %in% c("logistic")) {
self$tab_info[["dir"]]$value<-paste("P(",dep,"=",dlevs[2],") / P(",dep,"=",dlevs[1],")")
}
if (self$options$model_type %in% c("probit")) {
self$tab_info[["dir"]]$value<-paste("P(",dep,"=",dlevs[2],")")
}
if (self$options$model_type %in% c("multinomial")) {
self$tab_info[["dir"]]$value<-paste(paste0("P(",dep,"=",dlevs[-1],")"),paste0("P(",dep,"=",dlevs[1],")"),sep="/",collapse = " , ")
}
if (self$tab_info[["dir"]]$value=="")
self$tab_info[["dir"]]$value<-paste(dep,"scores")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.