Nothing
.get_gof_stats <- function(projectors, rawstatempdist,
gdimfn=.Infusion.data$options$gof_nstats_fn,
importance_fn=importance
) {
stats <- unique(unlist(lapply(projectors, attr, which="stats"))) ## all stats used by all projectors
rawstatempdist <- rawstatempdist[,stats, drop=FALSE] # some simulated raw stats might not have been used in the projections
## We need to check for linear redundancies in the raw stats
rawstatempdist <- check_raw_stats(rawstatempdist,statNames = stats,remove = TRUE, verbose=0.5) # silent if not problem detected, but still verbose otherwise
stats <- colnames(rawstatempdist)
##
projnames <- ls(projectors)
#allranger <- all(sapply(projnames, function(v) inherits( projectors[[v]],"ranger")))
imp_methods <- unlist(lapply(projnames, function(v) projectors[[v]]$"importance"))
if (length(imp_methods)==length(projnames)) {
if (length(u_imp <- unique(imp_methods))==1L) {
if ( ! u_imp %in% c("impurity_corrected","permutation")) {
warning("Importance method not 'permutation' (preferred) nor 'impurity_corrected': user is responsible to make sure that GoF test will be meaningful.",
immediate.=TRUE)
} # In particular, the importance measure must indeed increase with importance...
} else stop("Different importance methods used for different projectors, cannot be used together for GoF test.")
} else stop("Variable importance method not found for all projectors.")
# rel_imps are importances for each projector, standardized by the RMSE of oob prediction for each projector
rel_imps <- try(lapply(projnames, function(v) importance_fn(projectors[[v]])/projectors[[v]]$prediction.error), silent=TRUE)
if ( ! inherits(rel_imps,"try-error")) {
for (it in seq_along(rel_imps)) rel_imps[[it]] <- rel_imps[[it]][intersect(names(rel_imps[[it]]),stats)]
rel_imps <- do.call(unlist,list(rel_imps)) # list() important here...
nr <- nrow(rawstatempdist)
gdim <- gdimfn(nr=nr, nstats=length(stats))
# find the minimum importance of each raw stat over projectors, and sort raw stats by increasing importance:
stats <- names(head(sort(sapply(unique(names(rel_imps)),
function(v) {min(rel_imps[grep(v, names(rel_imps), fixed=TRUE)] )})),
n = gdim))
}
stats
}
goftest <- function(object, nsim=99L, method="", stats=NULL, plot.=TRUE,
nb_cores=NULL, Simulate=attr(object$logLs,"Simulate"),
packages=attr(object$logLs,"packages"), env=attr(object$logLs,"env"),
verbose=interactive(),
cl_seed=.update_seed(object),
get_gof_stats=.get_gof_stats
) {
feasible <- FALSE
stat_obs <- t(attr(object$logLs,"stat.obs"))
if (method=="mixture") { # Result won't be a valid test of goodness of fit !
statdens <- .conditional_Rmixmod(object$jointdens,#fittedPars=object$colTypes$statNames,
given=object$MSL$MSLE, expansion=1) # stat dens|ML parameter estimates
plotmain <- "Obs. vs. mixture model"
statempdist <- .simulate.MixmodResults(statdens, nsim=1L, size=nsim, drop=TRUE) # directly in projected space
plotframe <- as.data.frame(rbind(statempdist,stat_obs))
} else { # actual method: requires resimulation of processus
if (is.null(Simulate)) {
stop("'Simulate' function no available. Hint: its default value in goftest() call\n assumes it was made available through the original add_reftable() call.")
}
MSLErep <- t(object$MSL$MSLE)
MSLErep <- as.data.frame(MSLErep[rep(1,nsim),])
MSLErep <- cbind(MSLErep,object$colTypes$fixedPars) ## add fixedPars for simulation
MSLErep <- MSLErep[,object$colTypes$allPars,drop=FALSE] ## column reordering and remove polluting things (cumul_iter in fixedPars!: _F I X M E_)
if (inherits(object,"SLik_j")) {
rawstatempdist <- add_reftable(Simulate=Simulate, par.grid=MSLErep, verbose=verbose,
control.Simulate=attr(object$logLs,"control.Simulate"),
nb_cores=nb_cores, packages=packages$add_simulation, env=env,
cl_seed=cl_seed)
} else {
rawstatempdist <- add_simulation(Simulate=Simulate, par.grid=MSLErep,verbose=verbose,
control.Simulate=attr(object$logLs,"control.Simulate"),
nb_cores=nb_cores, packages=packages$add_simulation, env=env,
cl_seed=cl_seed)
}
rawstatempdist <- na.omit(rawstatempdist)
if ( ! is.null(projectors <- object$projectors)) {
plotmain <- "Obs. vs. process: summ.stat. residuals."
feasible <- TRUE
statempdist <- project(rawstatempdist,projectors=projectors) # if composite param were fitted, the projectors are predictors of composite params
statempdist <- as.matrix(statempdist[,object$colTypes$statNames,drop=FALSE])
# A good GoF stats should be defined by consideration of some alternative model, but this concept is not available here
if (is.null(stats)) stats <- .get_gof_stats(projectors, rawstatempdist=rawstatempdist) # trying to get variable with least importance
if (verbose) cat(paste("The following raw summary statistics were retained for the GoF test:\n",
paste(stats,collapse=", "),"\n"))
#
rawstatempdist <- rawstatempdist[,stats,drop=FALSE]
statsFitToProj <- qr.solve(statempdist,rawstatempdist) # regression to projected stats (potentially predictors of composite params)
colnames(statsFitToProj) <- stats
gofStats <- rawstatempdist - statempdist %*% statsFitToProj # residuals from fit to projected stats
gofObsStat <- attr(stat_obs,"raw_data")[stats] - stat_obs %*% statsFitToProj # residuals idem
plotframe <- as.data.frame(rbind(gofStats,gofObsStat))
colnames(plotframe) <- paste0("Res(",colnames(plotframe),")")
} else {
plotmain <- "Obs. vs. process"
statempdist <- rawstatempdist[,plot.,drop=FALSE]
plotframe <- as.data.frame(rbind(statempdist,stat_obs))
}
}
# interpret plot. as character strings belonging to colnames(plotframe):
plotnames <- colnames(plotframe)
if (is.logical(plot.)) {
if (plot.) {
plot. <- plotnames[seq(min(8L,length(plotnames)))]
} else plot. <- c()
} else if (is.character(plot.)) {
plot. <- intersect(plot., plotnames)
} else if (is.numeric(plot.)) { # simplest way to force more than 8:
plot. <- plotnames[plot.]
}
if (length(plot.)) {
if (length(plot.)<length(plotnames)) message(paste0("Plotting ",length(plot.)," GoF statistics out of ", length(plotnames),":"))
plot(plotframe,
main=plotmain,
col=c(rep("black",nsim),"orange"),
pch=c(rep(20,nsim),21),
bg="orange")
}
if (feasible) {
safe_nbCluster <- get_nbCluster_range(projdata=gofStats) # this checks that one cluster (or more) can be fitted (.get_gof_stats() presumably handles that, but it provides only a default value.)
using <- Infusion.getOption("mixturing")
gofStats <- as.data.frame(gofStats)
if (using=="mclust") {
if ( ! "package:mclust" %in% search()) stop("'mclust' should be loaded first.")
models <- vector("list",length(safe_nbCluster))
for (it in seq_along(safe_nbCluster)) {
models[[it]] <- .do_call_wrap("densityMclust",
list(data=gofStats,modelNames=Infusion.getOption("mclustModel"),
G=safe_nbCluster[it], verbose=FALSE),
pack="mclust")
}
gofdens <- .get_best_mclust_by_IC(models)
} else {
gofdens <- .do_call_wrap("mixmodCluster",list(data=gofStats,
nbCluster=safe_nbCluster,
models=.do_call_wrap("mixmodGaussianModel",list(listModels=Infusion.getOption("mixmodGaussianModel"))),
seed=123,
strategy=eval(Infusion.getOption("strategy"))))
gofdens <- .get_best_mixmod_by_IC(gofdens)
}
solve_t_chol_sigma <- lapply(gofdens@parameters["variance"], function(mat) solve(t(chol(mat))))
emp_logLs <- predict(gofdens,newdata=gofStats,solve_t_chol_sigma=solve_t_chol_sigma,log=TRUE) ## pas de binFactor!
obs_logL <- predict(gofdens,newdata =gofObsStat,solve_t_chol_sigma=solve_t_chol_sigma,log=TRUE)
pval <- (sum(obs_logL>emp_logLs)+1L)/(nsim+1L)
return(list(pval=pval))
} else {
message("No feasible test")
return(list(pval=NULL))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.