R/jointDens.R

Defines functions plot.MixmodResults .RMSEwrapper.SLik_j .boot.SLik_j .boo_SLik_joint profile.SLik_j .bad_rparam_from_SLik_j .rparam_from_SLik_j_post .rparam_from_SLik_j focal_refine .rparam_dMixmod_around_focal print.SLik_j refine.SLik_j confint.SLik_j .predict_SLik_j_mclust .get_dens_from_GMM .get_dens_from_GMM.Rmixmod predict.MixmodResults infer_SLik_joint get_nbCluster_range seq_nbCluster .solve_t_cholfn .get_best_mixmod_by_IC .marginalize_mclust .conditional_mclust .conditional_Rmixmod .marginalize_Rmixmod .check_data_post_boundaries

Documented in confint.SLik_j focal_refine get_nbCluster_range infer_SLik_joint plot.MixmodResults predict.MixmodResults print.SLik_j profile.SLik_j refine.SLik_j seq_nbCluster

.check_data_post_boundaries <- function(data) {
  data <- na.omit(data)
  if (nNAlines <- length(attr(data,"na.action"))) {
    message(paste0(nNAlines," lines of reference table still contained NA's after handling 'boundaries', and were removed. see ?handling_NAs for details."))
  }
  checkDegenerate <- cov(data)
  #if ( ! anyNA(checkDegenerate)) { # cf  use="na.or.complete" => NA if no complete cases
  eigvals <- eigen(checkDegenerate, only.values = TRUE)$values
  if (any(abs(eigvals<1e-14))) warning(paste(
    c("The covariance matrix of the (parameters,statistics) table seems singular,\n",
      "implying linear relationships between the variables. Problems will likely happen.\n",
      "Redundant variables should be eliminated."
    )),immediate. = TRUE)
  #}
  data
}


## fast version of dmvnorm that uses precomputed version of chol(sigma) and minimal checking
.fast_dmvnorm <- function (x, mean, solve_t_chol_sigma, log = FALSE) {
  if ( ! is.matrix(mean)) { # originally written for single mean vector and possibly multiple x vectors
    if ( ! is.matrix(x)) dim(x) <- c(1L,length(x)) # versus is.vector ->  typically not the stat.obs vector which typically has attributes in add. to names => is.vector() is FALSE!
    if (nrow(x)==1L) { # single x vector
      tmp <- tcrossprod(solve_t_chol_sigma, x-mean)
      rss <- sum(tmp^2)
    } else { # multiple x vectors
      tmp <- solve_t_chol_sigma %*% (t(x)-mean) # backsolve(chol_sigma, t(x) - mean, transpose = TRUE)
      rss <- .colSums(tmp^2,m=nrow(tmp), n=ncol(tmp))
    }
  } else { # multiple mean vectors
    if ( ! is.matrix(x)) { # single x vector
      tmp <- solve_t_chol_sigma %*% (x-mean) # substract x from each col of 'mean'
    } else { # multiple x vectors or 1-row matrix
      if (nrow(x)==1L) x <- x[1,]
      tmp <- solve_t_chol_sigma %*% (x-mean) # backsolve(chol_sigma, t(x) - mean, transpose = TRUE)
    }
    rss <- .colSums(tmp^2,m=nrow(tmp), n=ncol(tmp))
  }
  logdet <- attr(solve_t_chol_sigma,"logdet")
  if (is.null(logdet)) logdet <- sum(log(diag(solve_t_chol_sigma))) # in (2017) method, precomputation is not interesting 
  logretval <- logdet - 0.5 * ncol(solve_t_chol_sigma) * log(2 * pi) - 0.5 * rss
  names(logretval) <- rownames(x)
  if (log) { logretval } else exp(logretval)
}

.marginalize_Rmixmod <- function(jointdens, colNames, For, over) {
  nbCluster <- jointdens@nbCluster
  margdens <- jointdens
  MEAN <- margdens@parameters@mean
  colnames(MEAN) <- colNames
  margdens@parameters@mean <- MEAN[,For,drop=FALSE]
  for (clu_it in seq_len(nbCluster)) {
    COV <- margdens@parameters@variance[[clu_it]]
    colnames(COV) <- rownames(COV) <- colNames
    margdens@parameters@variance[[clu_it]] <- COV[For,For,drop=FALSE]
  }
  margdens@varNames <- For
  margdens@simuls_activeBoundaries <- NULL
  #margdens_fP@criterionValue <- margdens_fP@likelihood <- "removed for safety"  
  return(margdens)
}

.conditional_Rmixmod <- function(jointdens, #fittedPars, 
                                 given, expansion=Infusion.getOption("expansion")) {  # expansion=1 to get the conditional distribution
  nbCluster <- jointdens@nbCluster
  conddens <- jointdens
  MEAN <- conddens@parameters@mean
  givenNames <- names(given)
  colnames(MEAN) <- colNames <- jointdens@varNames
  For <- setdiff(colNames,givenNames) 
  conddens@parameters@mean <- MEAN[,For,drop=FALSE] # resizes, but will be modified
  condprop <- conddens@parameters@proportions
  for (clu_it in seq_len(nbCluster)) {
    COV <- conddens@parameters@variance[[clu_it]]
    colnames(COV) <- rownames(COV) <- colNames
    sig22 <-  COV[givenNames,givenNames,drop=FALSE]
    sig12 <-  COV[For,givenNames,drop=FALSE]
    mean2 <- MEAN[clu_it,givenNames]
    conddens@parameters@mean[clu_it,] <- MEAN[clu_it,For] + sig12 %*% solve(sig22,given-mean2)
    sig11 <- COV[For,For,drop=FALSE]
    conddens@parameters@variance[[clu_it]] <- expansion* (sig11 - sig12 %*% solve(sig22,t(sig12))) 
    condprop[clu_it] <- log(condprop[clu_it])+dmvnorm(t(given), # dmvnorm() tests is.vector(x) which returns FALSE if x has attributes other than names.
                                                 mean2, sigma= sig22, log=TRUE)
  }
  maxlog <- max(condprop)
  condprop <- condprop-maxlog
  condprop <- exp(condprop)
  condprop <- condprop/sum(condprop)
  conddens@parameters@proportions <- condprop
  conddens@varNames <- For
  conddens@simuls_activeBoundaries <- NULL
  #margdens_fP@criterionValue <- margdens_fP@likelihood <- "removed for safety"  
  return(conddens)
}

.conditional_mclust <- function(jointdens, fittedPars, given, 
                                expansion=Infusion.getOption("expansion")) { # expansion=1 to get the conditional distribution
  nbCluster <- jointdens$G
  conddens <- jointdens
  For <- fittedPars 
  conddens$data <- conddens$data[,For]
  conddens$parameters$variance$d <- ncol(conddens$data)
  MEAN <- conddens$parameters$mean
  givenNames <- names(given)
  colNames <- c(fittedPars, givenNames)
  conddens$parameters$mean <- MEAN[For,,drop=FALSE] # resizes, but will be modified
  sigma11 <- conddens$parameters$variance$sigma[For,For,,drop=FALSE] # resizes, but will be modified
  condprop <- conddens$parameters$pro
  for (clu_it in seq_len(nbCluster)) {
    sigma_it <- conddens$parameters$variance$sigma[,,clu_it] # from single array for all clusters # drops the clu_it dimension
    sig22 <-  sigma_it[givenNames,givenNames]
    sig12 <-  sigma_it[For,givenNames]
    mean2 <- MEAN[givenNames,clu_it]
    conddens$parameters$mean[,clu_it] <- MEAN[For,clu_it] + sig12 %*% solve(sig22,given-mean2)
    sigma11[,,clu_it] <- expansion* (sigma_it[For,For] - sig12 %*% solve(sig22,t(sig12))) 
    condprop[clu_it] <- condprop[clu_it]*dmvnorm(t(given), # dmvnorm() tests is.vector(x) which returns FALSE if x has attributes other than names.
                                                 mean2, sigma= sig22, log=FALSE)
  }
  maxlog <- max(condprop)
  condprop <- condprop-maxlog
  condprop <- exp(condprop)
  condprop <- condprop/sum(condprop) # normalisation (even in non-safe-log-exp version) was missing for a long time!
  conddens$parameters$pro <- condprop
  conddens$parameters$variance <- .do_call_wrap("sigma2decomp", 
                                                list(sigma=sigma11,tol=-1), # tol to avoid a simplification in the structure of $orientation that may hit a but in mclust...
                                                pack="mclust") 
  # sigma2decomp determines it is of model "VVV". We must copy this info so that the right simulation function is called by Mclust:
  conddens$modelName <- conddens$parameters$variance$modelName
  return(conddens)
}


.marginalize_mclust <- function(jointdens, colNames, For, over) {
  nbCluster <- jointdens$G
  margdens <- jointdens
  margdens$data <- margdens$data[,For, drop=FALSE]
  margdens$parameters$variance$d <- ncol(margdens$data)
  margdens$parameters$mean <- margdens$parameters$mean[For,,drop=FALSE]
  COV <- margdens$parameters$variance$sigma # single array for all clusters 
  COV <- COV[For,For,]
  if (length(For)==1L) {
    if (length(COV)==1L) {
      margdens$parameters$variance <- list(modelName="X", d=1, G=1L, sigmasq=COV)
    } else {
      # "V" may not be implied by the original fit, but we don't count dfs on this object
      margdens$parameters$variance <- list(modelName="V", d=1, G=length(COV), sigmasq=COV, scale=COV) 
    }
  } else {
    margdens$parameters$variance <- .do_call_wrap("sigma2decomp", 
                                                  list(sigma=COV,tol=-1), # tol to avoid a simplification in the structure of $orientation that may hit a but in mclust...
                                                  pack="mclust") 
    if (margdens$modelName=="VVV") {
      cholsigma <- array(0,dim=dim(COV))
      for (it in seq_len(dim(cholsigma)[3L])) cholsigma[,,it] <- chol(COV[,,it])
      margdens$parameters$variance$cholsigma <- cholsigma
    }
  }
  # Not necess useful here, but following thesamelogic as in .conditional_mclust():
  margdens$modelName <- margdens$parameters$variance$modelName
  return(margdens)
}

.get_best_mixmod_by_IC <- function(cluObject, which=Infusion.getOption("criterion"))  {
  if (inherits(cluObject,"try-error")) return(cluObject) ## passes original error info rather than hiding it under another error
  results <- cluObject@results # assume that an nbCluster range was tried
  if (length(results)==1L) return(results[[1L]])
  anyNaN <- logical(length(results))
  # patch for odd bug of Rmixmod: NaN in parameters but no error reported, and lik & BIC are real.
  for (it in seq_along(results)) anyNaN[it] <- anyNA(results[[it]]@proba)
  results <- results[ ! anyNaN]
  BICs <- logLs <- numeric(length(results))
  for (it in seq_along(results)) {
    BICs[it] <- results[[it]]@criterionValue
    logLs[it] <- results[[it]]@likelihood
  }
  if (which=="BIC") return(results[[which.min(BICs)]])
  # ELSE
  dfs <- (2*logLs+BICs)/(log(cluObject@nbSample))
  AICs <- -2*logLs+2*dfs
  return(results[[which.min(AICs)]])
}

# .get_best_clu_by_AIC <- function(cluObject) { ## cluObject: mixmodCluster object
#   if (inherits(cluObject,"try-error")) return(cluObject) ## passes original error info rather than hiding it under another error
#   BICs <- unlist(lapply(cluObject@results,slot,name="criterionValue"))
#   logLs <- unlist(lapply(cluObject@results,slot,name="likelihood"))
#   dfs <- (2*logLs+BICs)/(log(cluObject@nbSample))
#   AICs <- -2*logLs+2*dfs
#   return(cluObject@results[[which.min(AICs)]])
# }

.solve_t_cholfn <- function(mat, smoothing_mat=NULL) { # smoothing_mat arg added to allow optimization via iterative smmothing (which does not work)
  if (!is.null(smoothing_mat)) mat <- mat+smoothing_mat
  resu <- solve(t(chol(mat)))
  return(structure(resu,logdet=sum(log(diag(resu)))))
}

seq_nbCluster <- function(nr) {do.call(Infusion.getOption("seq_nbCluster"), list(nr=nr))}

get_nbCluster_range <- function(projdata, nr= nrow(projdata), nc=ncol(projdata), 
                                nbCluster=seq_nbCluster(nr)) { # data must be the mixmod data
  maxnb <- do.call(.Infusion.data$options$maxnbCluster, list(nr=nr,nc=nc))
  if (maxnb<1L) {
    warning("Too few simulations: Gaussian mixture modelling may fail; single cluster inferred", immediate. = TRUE)
    return(1L)
  } else if (any(nbCluster> maxnb)) {
    message(paste0("Gaussian mixture modelling constrained to at most ",maxnb," clusters by the number of columns."))
    if (identical(attr(nbCluster,"max"),TRUE)) { # then nbCluster was seq_nbCluster ... hence:
      maxnb
    } else return(nbCluster[nbCluster<=maxnb])
  } else if (identical(attr(nbCluster,"max"),TRUE)) {
    max(nbCluster)
  } else nbCluster 
}

infer_SLik_joint <- function(data, ## reference table ~ abc
                            stat.obs,
                            logLname=Infusion.getOption("logLname"), ## not immed useful
                            Simulate=attr(data,"Simulate"), ## may be NULL
                            nbCluster=seq_nbCluster(nr=nrow(data)),
                            using=Infusion.getOption("mixturing"),
                            verbose=list(most=interactive(), ## must be explicitly set to FALSE in knitr examples
                                         pedantic=FALSE,
                                         final=FALSE),
                            marginalize=TRUE
) {
  if ( ! is.data.frame(data)) {
    stop(paste("'object' is not a data.frame.\n Did you mean to call infer_logLs() rather than infer_Slik_joint() ?"))
  }
  if (is.null(raw_data <- attr(data,"raw_data"))) {
    if (is.null(attr(data,"LOWER"))) { # Temporary patch? stop() instead of warning() ?
      warning('Some required attributes seem to be missing from the "data" [see "Value" in help("add_reftable")].\n Further execution could fail.')
    }
  } else if (is.null(attr(raw_data,"LOWER"))) { # Temporary patch? stop() instead of warning() ?
    warning('Some required attributes seem to be missing from attr(data,"raw_data" [see "Value" in help("add_reftable")].\n Further execution could fail.')
  }
  if ( ! is.null( cn <- colnames(stat.obs))) {
    message("Note: 'stat.obs' should be a numeric vector, not a matrix or data.frame. Converting...")
    stat.obs <- drop(stat.obs)
    names(stat.obs) <- cn ## minimal patch so that names() can be used, not colnames()
  }
  if (!is.list(verbose)) verbose <- as.list(verbose)
  if (is.null(names(verbose))) names(verbose) <- c("most","final")[seq_len(length(verbose))]
  if (is.null(verbose$most)) verbose$most <- interactive()
  if (is.null(verbose$pedantic)) verbose$pedantic <- FALSE
  if (is.null(verbose$final)) verbose$final <- FALSE
  if (length(unique(colnames(data)))<ncol(data)) {
    stop("Some columns of the 'data' have identical names. Please correct.")
  }
  statNames <- names(stat.obs)
  if (nrow(data)<20000) check_raw_stats(na.omit(data),statNames = statNames,remove = FALSE, verbose=FALSE) # silent if not problem detected, but still verbose otherwise
  for (st in statNames) {
    tabstat <- table(data[,st])
    #which_mass <- as.numeric(names(which(tabstat>1L)))
    if (any(tabstat>Infusion.getOption("nodesize"))) { # this is a projector-specific value yet we use a single value fro the slik object
      # There is <ranger object>$min.node.size but no such info in the RandomForest results.
      massvalues <- as.numeric(names(which.max(tabstat))) 
      names(massvalues) <- rep(st,length(massvalues))
      attr(stat.obs,"boundaries") <- c(attr(stat.obs,"boundaries"),massvalues)
    }
  }
  if (is.null(allPars <- attr(data,"allPars"))) {  
    allPars <- setdiff(colnames(data),statNames) ## first iteration
  } ## else do not add the cumul_iter variable to the parameters !
  whichvar <- apply(data[,allPars,drop=FALSE],2L,function(v) length(unique(range(v)))>1L)
  fittedPars <- allPars[whichvar]
  fixedPars <- data[1L,!whichvar,drop=FALSE] ## has both names and values
  rownames(fixedPars) <- NULL## avoids warning on rownames when later used in cbind()
  nbCluster <- eval(nbCluster) # allowing evaluation of quoted default arg passed from refine.default()
  if ( identical(nbCluster,"max")) nbCluster <- structure(seq_nbCluster(nr=nrow(data)), max=TRUE)
  if (! is.list(nbCluster)) nbCluster <- list(jointdens=nbCluster, pardens=nbCluster)
  nbCluster <- lapply(nbCluster,eval.parent)
  if (using=="mclust") {
    if ( ! "package:mclust" %in% search()) stop("'mclust' should be loaded first.")
    if (verbose$most) cat(paste("Densities modeling:",nrow(data),"points;\n"))
    
    if (length(nbCluster$jointdens)==1L) {
      jointdens <- .do_call_wrap("densityMclust",
                                 list(data=data[,c(fittedPars,statNames)],modelNames=Infusion.getOption("mclustModel"), 
                                      G=nbCluster$jointdens,verbose=FALSE),
                                 pack="mclust")
    } else { 
      models <- vector("list",length(nbCluster$jointdens))
      for (it in seq_along(nbCluster$jointdens)) {
        models[[it]] <- .do_call_wrap("densityMclust",
                                      list(data=data[,c(fittedPars,statNames)],modelNames=Infusion.getOption("mclustModel"), 
                                           G=nbCluster$jointdens[it],verbose=FALSE),
                                      pack="mclust")
      }
      jointdens <- .get_best_mclust_by_IC(models) 
    }
    if (verbose$most) cat(paste(" joint density modeling:",jointdens$G,"clusters;\n"))
    if (verbose$pedantic) if (jointdens$G==max(nbCluster$jointdens)) message("Inferred # of clusters= max of allowed range.")
    if (marginalize) {
      pardens <- .marginalize_mclust(jointdens, colNames= c(fittedPars,statNames),
                              For=fittedPars, over=statNames)
      # defective but should be sufficient for prediction. Some garde-fou:
      #pardens@proba <- matrix(NA_real_)
    } else {
      if (length(nbCluster$pardens)==1L) {
        pardens <- .do_call_wrap("densityMclust",
                                 list(data=data[,fittedPars,drop=FALSE],modelNames=Infusion.getOption("mclustModel"), G=nbCluster$pardens,verbose=FALSE),
                                 pack="mclust")
      } else { 
        models <- vector("list",length(nbCluster$pardens))
        for (it in seq_along(nbCluster$pardens)) {
          models[[it]] <- .do_call_wrap("densityMclust",
                                        list(data=data[,fittedPars,drop=FALSE],modelNames=Infusion.getOption("mclustModel"), 
                                             G=nbCluster$pardens[it], verbose=FALSE),
                                        pack="mclust")
        }
        pardens <- .get_best_mclust_by_IC(models) 
      }
      if (verbose$most) cat(paste(" parameter modeling:",pardens$G,"clusters.\n"))
    }
    resu <- list(jointdens=jointdens,pardens=pardens)
    # pdl <- vector("list", pardens$G)
    # sigma <- resu$pardens$parameters$variance$sigma
    # for (it in seq_len(pardens$G)) pdl[[it]] <- solve(t(chol(sigma[,,it])))
    # jdl <- vector("list", jointdens$G)
    # sigma <- resu$jointdens$parameters$variance$sigma
    # for (it in seq_len(jointdens$G)) jdl[[it]] <- solve(t(chol(sigma[,,it])))
    # resu$clu_params$solve_t_chol_sigma_lists <- list(pardens=pdl, jointdens=jdl) 
  } else if (using=="xLLiM") {
    summstats <- data[,statNames,drop=FALSE]
    RGPpars <- data[,fittedPars,drop=FALSE]

    # conddens <- .gllim(responses=t(summstats), predictors=t(RGPpars), nbCluster=nbCluster$jointdens)
    
    gllimobj <- .wrap_gllim(RGPpars=t(RGPpars), summstats=t(summstats), nbCluster=nbCluster$jointdens)
    
    if (verbose$most) cat(paste(" conditional density modeling:",ncol(gllimobj$c),"clusters;\n"))
    #
    resu <- list(gllimobj=gllimobj)    
  } else {
    models <- .do_call_wrap("mixmodGaussianModel",list(listModels=Infusion.getOption("mixmodGaussianModel")))
    if (verbose$most) cat(paste("Densities modeling:",nrow(data),"points;\n"))
    locarglist <- list(data=data[,c(fittedPars,statNames)],nbCluster=nbCluster$jointdens, 
                       seed=Infusion.getOption("mixmodSeed") , models=models)
    if (FALSE) { ## older code; still useful for devel
      jointdens <- try(.do_call_wrap("mixmodCluster",locarglist),silent = TRUE)
      jointdens <- .get_best_mixmod_by_IC(jointdens)
    }
    jointdens <- try(do.call(".densityMixmod",c(locarglist,list(stat.obs=stat.obs))),silent=TRUE) # using seed in locarglist ie that from geoOption
    while ((inherits(jointdens,"try-error") || anyNA(jointdens@parameters@mean)) && max(locarglist$nbCluster)>1L) {
      # Generate better (smaller) values with two cases whether a range or a single value were tried.
      if (length(locarglist$nbCluster)==1L) { # typical of RMSE bootstrap sample. Reduce by 1 or more
        nbCluster <- min(locarglist$nbCluster-1L, max(get_nbCluster_range(projdata=data, nbCluster=seq(locarglist$nbCluster))))
      } else  nbCluster <- get_nbCluster_range(projdata=data, nbCluster=seq(max(locarglist$nbCluster))) # generate a more standard range
      if ( length(setdiff(nbCluster, locarglist$nbCluster))) { # if there are new values to try
        locarglist$nbCluster <- nbCluster
        jointdens <- try(do.call(".densityMixmod",c(locarglist,list(stat.obs=stat.obs))),silent=TRUE) # using seed in locarglist ie that from geoOption
      } else break # = no more ideas to fix problem
    } ## else .densityMixmod has already run .get_best_mixmod_by_IC()
    if (inherits(jointdens,"try-error"))  {
      stop(jointdens) # i.e stop(<error object>) # (But in RMSE bootstrap, new bootstrap samples may be analyzed if a few failed) 
    } else if (anyNA(jointdens@parameters@mean)) stop("Rmixmod returned NaN's in parameter estimates.")
    # plotCluster(jointdens,data=locarglist$data,variable1="theta_p",variable2="theta") # var1: stat (prediction of projection); var2: actual param
    if (verbose$most) cat(paste(" joint density modeling:",jointdens@nbCluster,"clusters;\n"))
    if (verbose$pedantic) if (jointdens@nbCluster==max(nbCluster$jointdens)) message("Inferred # of clusters= max of allowed range.")
    if (marginalize) {
      pardens <- .marginalize_Rmixmod(jointdens, colNames= c(fittedPars,statNames),
                              For=fittedPars, over=statNames)
      # defective but should be sufficient for prediction. Some garde-fou:
      pardens@proba <- matrix(NA_real_)
      pardens@parameters@nbFreeParam <- NA_integer_
    } else {
      locarglist <- list(data=data[,fittedPars,drop=FALSE], nbCluster=nbCluster$pardens, 
                         seed=123 , models=models)
      pardens <- try(do.call(".densityMixmod",c(locarglist,list(stat.obs=stat.obs))),silent = TRUE)
      if (inherits(pardens,"try-error")) {
        # ignore any non-default infer_SLik_joint(.,nbCluster) argument
        locarglist$nbCluster <- get_nbCluster_range(projdata=data)
        pardens <- do.call("mixmodCluster",locarglist)
        pardens <- .get_best_mixmod_by_IC(pardens) 
      }                               
      if (verbose$most) cat(paste(" parameter modeling:",pardens@nbCluster,"clusters.\n"))
    }
    #plotCluster(pardens,data=data[,fittedPars]) to plot @results[[1L]] which is a 'MixModResults', not a 'mixmodCluster' object.
    # Rmixmod::plot(<mixmodCluster object>) with Rmixmod:: to plot from any envir, not only the global one
    resu <- list(jointdens=jointdens,pardens=pardens)
    resu$clu_params <- list(logproportions=log(jointdens@parameters["proportions",]),
                            jointdens_means=t(jointdens@parameters["mean",]),
                            pardens_means=t(pardens@parameters["mean",]),
                            solve_t_chol_sigma_lists=list(
                              pardens=lapply(resu$pardens@parameters["variance"], .solve_t_cholfn),
                              jointdens=lapply(resu$jointdens@parameters["variance"], .solve_t_cholfn)
                            ) 
                            )
  }
  attr(resu,"EDFstat") <- "[see this string in infer_SLik_joint()]" ## 
  resu$logLs <- structure(data,stat.obs=stat.obs,Simulate=Simulate) ## as in infer_surface.logLs
  # attr(resu,"callfn") <- "infer_SLik_joint"
  resu$projectors <- attr(data,"projectors")
  resu$`Infusion.version` <- packageVersion("Infusion")
  resu$colTypes <- list(allPars=allPars, ## keeps the order of the columns
                                fittedPars=fittedPars,
                                fixedPars=fixedPars,
                                logLname=logLname,
                                statNames=statNames)
  resu$lower <- sapply(data[,fittedPars,drop=FALSE],min) ## really this
  resu$upper <- sapply(data[,fittedPars,drop=FALSE],max) # ... used by MSL -> optim
  if (any(is.na(c(resu$lower,resu$upper)))) stop("NA(s) in c(lower,upper))") 
  resu$LOWER <- attr(data,"LOWER") # used in many places
  if (is.null(resu$LOWER)) resu$LOWER <- resu$lower
  resu$UPPER <- attr(data,"UPPER") # ... used by confintAll
  if (is.null(resu$UPPER)) resu$UPPER <- resu$upper
  attr(resu,"Infusion.version") <- packageVersion("Infusion")
  class(resu) <- c("SLik_j",class(resu))
  resu$raw_data <- attr(data,"raw_data")
  resu$using <- using
  return(resu)
}


## was infer_SLik_joint -> predict -> predict.SLik_j -> predict.MixmodResults
# except that now                                    -> .get_dens_from_GMM.Rmixmod -> predict.dMixmod
predict.MixmodResults <- function(object, newdata,log=TRUE, solve_t_chol_sigma_list,...) {
  Sobs_activeBoundaries <- atb <- freqs <- NULL ## FR->FR tempo
  nbCluster <- object@nbCluster
  if (is.null(nrow(newdata)) ) {
    newdata <- t(as.matrix(newdata)) # as.matrix keeps names
  } else if (inherits(newdata,"data.frame")) {
    newdata <- matrix(unlist(newdata,recursive = FALSE, use.names = FALSE), nrow=nrow(newdata),
                      dimnames=list(NULL,colnames(newdata)) ) # newdata <- as.matrix(newdata)
  }
  if (log) { 
    if (nbCluster>0L) {
      density <- matrix(nrow=nrow(newdata),ncol=nbCluster)
      for (k in 1:nbCluster) {
        density[,k] <- log(object@parameters["proportions", k]) + 
          .fast_dmvnorm(newdata, object@parameters["mean", k], solve_t_chol_sigma= solve_t_chol_sigma_list[[k]],log=log)
      }
      maxlogs <- matrixStats::rowMaxs(density)
      normalizedlogs <- apply(density,2L,`-`,maxlogs) ## highest value per row is 0
      ## apply return mess again: 
      if (is.vector(normalizedlogs)) dim(normalizedlogs) <- c(1L, length(normalizedlogs))
      mixture <- .rowSums(exp(normalizedlogs),m=nrow(normalizedlogs), n=ncol(normalizedlogs)) ## exp(normalizedlogs) in (0,1); sum(exp(logLi-maxlog))= exp(-maxlog) sum(exp(logLi))= exp(-maxlog) sum(Li)
      mixture <- log(mixture) + maxlogs ## log(sum(Li))
      if ( !is.null(Sobs_activeBoundaries)) mixture <- mixture + atb*log(freqs)
    } else mixture <- atb*log(freqs)
  } else {
    if (nbCluster>0L) {
      density <- matrix(nrow=nrow(newdata),ncol=nbCluster)
      for (k in 1:nbCluster) {
        density[,k] <- object@parameters["proportions", k] * 
          .fast_dmvnorm(newdata, object@parameters["mean", k], solve_t_chol_sigma_list[[k]],log=log)
      }
      mixture <- .rowSums(density,m=nrow(density), n=ncol(density)) ## sum(Li) 
      if ( !is.null(Sobs_activeBoundaries)) mixture <- mixture*freqs
    } else mixture <- freqs
  }
  return(mixture) # not fully protected from NaN's - for newdata *far* from the represented hypervolume.
}

.get_dens_from_GMM.Rmixmod <- function(X, # parameters only 
                                  object, 
                                  tstat.obs, # 1-row matrix as otherwise more cases should be considered for cbind'ing
                                  log, 
                                  which, # "safe" version ignores, by correcting, spuriously high logL in area of low parameter density.
                                  solve_t_chol_sigma_lists=object$clu_params$solve_t_chol_sigma_lists, # added to allow optimization via iterative smmothing (which does not work)
                                  ...)  {
  if (is.null(dim(X))) {
    newjointX <- cbind(t(X),tstat.obs) # cbind two 1-row matrices
  } else {
    if (length(intersect(colnames(X),colnames(tstat.obs)))) stop("'X' should contain only parameters, not summary statistics")
    newjointX <- cbind(X,tstat.obs[rep(1,nrow(X)),,drop=FALSE]) # cbind two nrow(X)-row matrices
  }
  if (which!="parvaldens") {
    jointvaldens <- predict(object$jointdens,
                            newdata=newjointX,
                            solve_t_chol_sigma_list=solve_t_chol_sigma_lists$jointdens,
                            logproportions=object$clu_params$logproportions,
                            clu_means=object$clu_params$jointdens_means,
                            log=log,...)
  }
  if (which=="jointvaldens") return(jointvaldens)
  parvaldens <- predict(object$pardens,
                        newdata=newjointX, # statistics will be ignored
                        solve_t_chol_sigma_list=solve_t_chol_sigma_lists$pardens,
                        logproportions=object$clu_params$logproportions,
                        clu_means=object$clu_params$pardens_means,
                        log=log,...) 
  if (which=="parvaldens") return(parvaldens)
  # ELSE: "lik", or "safe" for safe version of "lik" using object$thr_dpar
  if (log) {
    condvaldens <- jointvaldens - parvaldens
    if (which=="safe") condvaldens <- condvaldens + pmin(0,parvaldens-object$thr_dpar) # decreasing returned logL when parvaldens<object$thr_dpar
  } else {
    condvaldens <- jointvaldens/parvaldens
    if (which=="safe") condvaldens <- condvaldens*pmin(1,parvaldens/object$thr_dpar)
  }
  return(condvaldens) # vector if X was a matrix
}

.get_dens_from_GMM <- function(X, object,  # SLik_j object
                               tstat.obs, log, which, solve_t_chol_sigma_lists) {
  if (inherits(object$jointdens,"dMixmod")) {
    maybelogl <- .get_dens_from_GMM.Rmixmod(X=X, object=object, tstat.obs=tstat.obs, log=log, which=which, 
                                            solve_t_chol_sigma_lists=solve_t_chol_sigma_lists)
    ## RMSEs computation uses names of CI bounds, to be retained here, to name its results
    # apply(newdata,1L, .get_dens_from_GMM.Rmixmod, object=object, tstat.obs=tstat, log=log, which=which)
  } else if (inherits(object$jointdens,"densityMclust")) {
    maybelogl <- .predict_SLik_j_mclust(object=object, newdata=X, tstat.obs=tstat.obs, log=log, which=which, 
                                   solve_t_chol_sigma_lists=solve_t_chol_sigma_lists)
  } else if (inherits(object$gllimobj,"gllim")) {
    maybelogl <- .get_dens_from_GMM.gllim(X=X, 
                                          object=object,
                                          tstat.obs=tstat.obs, log=log, which=which)
  } 
  names(maybelogl) <- rownames(X)
  maybelogl # logl or safe loglor RGPdens...
}

.predict_SLik_j_mclust <- function(object, newdata, tstat.obs, log, which, solve_t_chol_sigma_lists=object$clu_params$solve_t_chol_sigma_lists)  {
  if (which!="parvaldens") jointvaldens <- predict(object$jointdens, newdata=cbind(data.frame(newdata),tstat.obs), 
                                                   solve_t_chol_sigma_lists=solve_t_chol_sigma_lists) # ! order of elements in newdata must be that of fittedPars as in object$jointdens$data
  if (which=="jointvaldens") return(jointvaldens)
  parvaldens <- predict(object$pardens,newdata=newdata, 
                        solve_t_chol_sigma_lists=solve_t_chol_sigma_lists) 
  if (which=="parvaldens") return(parvaldens)
  if (log) {
    jointvaldens <- pmax(jointvaldens, .Machine$double.xmin) # F I X M E for the value
    condvaldens <- log(jointvaldens) - log(parvaldens)
  } else condvaldens <- jointvaldens/parvaldens
  if (any(is.infinite(condvaldens))) warning("any(is.infinite(condvaldens)) is TRUE: expect an error.") 
  return(condvaldens)
}

predict.SLik_j <- function (object, 
                            newdata, ## requests new fittedPars values! 
                            log=TRUE, 
                            which="lik", # may be still preferable, to "safe", for drawing new points
                            tstat= t(attr(object$logLs,"stat.obs")), # 1-row matrix...
                            solve_t_chol_sigma_lists=object$clu_params$solve_t_chol_sigma_lists, # added to allow optimization via iterative smmothing (which does not work)
                            ...) {
  if (is.null(nrow(newdata)) ) newdata <- t(as.matrix(newdata)) # as.matrix keeps names
  if (is.null(colnames(newdata))) colnames(newdata) <- object$colTypes$fittedPars
  # not useful bc predict.MixmodResults does not handle it:
  #newdata <- cbind(newdata, t(replicate(nrow(newdata),attr(object$logLs,"stat.obs"))))
  logl <- .get_dens_from_GMM(X=newdata, object=object, tstat.obs=tstat, log=log, which=which, solve_t_chol_sigma_lists=solve_t_chol_sigma_lists)

  if ( ! is.null(object$prior_logL)) logl <- logl + object$prior_logL(newdata)
  return(logl)
}

confint.SLik_j <- function(object, parm, ## parm is the parameter which CI is sought 
                         level=0.95, verbose=interactive(),fixed=NULL,which=c(TRUE,TRUE), 
                         #Bartlett=0L, 
                         ...) {
  .confintAll(object=object, parm=parm, ## parm is the parameter which CI is sought 
             givenmax = object$MSL$maxlogL,
             level= - qchisq(level,df=1)/2, ## df=1 for 1D profile; /2 to compare to logLR rather than 2 logLR  
             #Bartlett=Bartlett,
             verbose=verbose,fixed=fixed,which=which,...)
}

refine.SLik_j <- function(object, method=NULL, using=object$using, ...) {
  if (is.null(method)) method <- "mixmodCluster" ## no clear effect but distinct from SLik case
  if (is.null(using)) using <- Infusion.getOption("mixturing") # in case an object from version < 2.0.4 is input...
  # if it's not NULL, any user's non-default for 'using' is passed to the next call and heeded. 
  resu <- refine.default(object, surfaceData=object$logLs, method=method, using=using, ...)
  # resu may simply be a table of new parameter values...:
  if (inherits(resu,"SLik_j")) assign("bootreps_list", list(), envir=resu$bootLRTenv) # or reset the env by resu$bootLRTenv <- list2env(list(bootreps_list=list()))?
  resu
}

summary.SLik_j <- summary.SLik

print.SLik_j <- function(x,...) {summary.SLik_j(x,...)} 

.rparam_dMixmod_around_focal <- function(object, # *dMixmod* object
                                 focal, ## single point # recall that there exists .rparam_from_SLik_j_post(), 
                                        # which is similar is 'object' is the posterior density, EXCEPT that it does not handle this additional constraint
                                 solve_t_chol_sigma_list,
                                 size) {
  varNames <- object@varNames
  if (is.null(dim(focal))) {
    datanames <- names(focal) # before they are erased by the next dim() assignment (!)
    dim(focal) <- c(1L, length(focal))
    if (is.null(datanames)) {
      colnames(focal) <- varNames
    } else colnames(focal) <- datanames
  } else if (inherits(focal,"data.frame")) {
    focal <- matrix(unlist(focal,recursive = FALSE, use.names = FALSE), nrow=1L,
                    dimnames=list(NULL,colnames(focal)) ) # newdata <- as.matrix(newdata)
  }
  if ( !is.null(Sobs_activeBoundaries <- object@Sobs_activeBoundaries)) { # if the Sobs matches some boundaries, identified in Sobs_activeBoundaries
    # then looks whether the newdata match all of the boundaries met by Sobs
    boundsdata <-  focal[,names(Sobs_activeBoundaries),drop=FALSE]
    atb <- apply(boundsdata,1L,`==`,y=Sobs_activeBoundaries)
    # If not (i.e. only partial matching) the object does not predict correctly the density => warning + heuristic patch:
    if (!all(atb)) {warning("'predict.dMixmod' cannot compute joint out-of-boundary density from conditional at-boundary density. ")}
    freq <- object@freq
    freqs <- atb*freq+(1-atb)*(1-freq) ## uses 1-freq instead of the unknown density of the variable(s) in boundaries 
    densitydata <- focal[,varNames,drop=FALSE]
  } else if ( !is.null(simuls_activeBoundaries <- object@simuls_activeBoundaries)) {
    ## do not reduce data in this case
    densitydata <- focal 
    ## only for the warning:
    boundsdata <-  focal[,names(simuls_activeBoundaries),drop=FALSE]
    atb <- apply(boundsdata,1L,`==`,y=simuls_activeBoundaries)
    if (any(atb)) {
      warning("'predict.dMixmod' cannot compute conditional at-boundary density from joint out-of-boundary density.")
      # return value is the same as for predict(,tcstat.obs=<newdata>) 
    }
  } else densitydata <- focal[,varNames,drop=FALSE]
  
  nbCluster <- object@nbCluster
  
  density <- numeric(nbCluster)
  for (k in 1:nbCluster) {
    density[k] <- # without the 'prior' cluster proba bc aim is not to predict the cluster given these 'prior' freqs but to modify this 'prior'  
      .fast_dmvnorm(densitydata, object@parameters["mean", k], solve_t_chol_sigma= solve_t_chol_sigma_list[[k]],log=TRUE)
  }
  k <- which.max(density)
  trypoints <- rmvnorm(size, 
                       mean=object@parameters["mean", k], 
                       sigma= object@parameters["variance",k])
  colnames(trypoints) <- varNames
  trypoints
}

focal_refine <- function(object, focal, size, plotprof=TRUE, ...) {
  fittedPars <- object$colTypes$fittedPars
  prof <- profile(fitted=object,value=focal, return.optim=TRUE)
  fullfocal <- c(focal,prof$par)[fittedPars]
  posteriordens <- .conditional_Rmixmod(object$jointdens, given=attr(object$logLs,"stat.obs"), expansion=1) 
  solve_t_chol_sigma_list <- lapply(posteriordens@parameters["variance"], .solve_t_cholfn)
  trypoints <- .rparam_dMixmod_around_focal(posteriordens, focal = fullfocal, solve_t_chol_sigma_list=solve_t_chol_sigma_list, 
                                            size=size)
  for (vv in fittedPars) trypoints <- trypoints[trypoints[,vv]>object$LOWER[vv],,drop=FALSE]
  for (vv in fittedPars) trypoints <- trypoints[trypoints[,vv]<object$UPPER[vv],,drop=FALSE]
  trypoints <- data.frame(trypoints)
  trypoints <- cbind(trypoints,object$colTypes$fixedPars) ## add fixedPars for simulation
  trypoints <- trypoints[,object$colTypes$allPars,drop=FALSE] ## column reordering and remove polluting things
  object <- refine(object, trypoints=trypoints, ...)
  if (plotprof) {
    pars <- names(focal)
    if (length(pars)==1L) {
      plot1Dprof(object, pars = pars, ...)
    } else if (length(pars)==2L) {
      plot2Dprof(object, pars = pars, ...)
    } 
  }
  object
}

.rparam_from_SLik_j <- function(object, 
                                frac, 
                                target_size=NULL,
                                fittedPars=NULL,level,tol=0) {
  if (is.null(fittedPars)) fittedPars <- object$colTypes$fittedPars
  CI_LRstat <- qchisq(level,df=length(fittedPars))/2 
  size_first_iter <- attr(object$logLs,"n_first_iter")
  if (is.null(size_first_iter)) size_first_iter <- length(which(object$logLs$cumul_iter==1L)) ## FR->FR temporary back compat
  prev_n_iter <- max(object$logLs$cumul_iter)
  if (is.null(target_size)) target_size <- frac*max(size_first_iter/2, size_first_iter*(prev_n_iter+1L)-nrow(object$logLs))
  #
  ceil_size <- target_size
  if ( ! is.null(freq_good <- attr(object$logLs,"freq_good")$default)) ceil_size <- ceiling(ceil_size/freq_good)
  #
  if (inherits(object$jointdens,"densityMclust")) {
    trypoints <- do.call("sim", c(object$jointdens[c("modelName", "parameters")],list(n=ceil_size)))
    trypoints <- trypoints[,1L+seq_len(length(fittedPars)),drop=FALSE]  
  # } else if (FALSE &&
  #            # Inhibit block for generation of specific param points when profiling has found a new maximum: 
  #            (identical(object$MSL$updated_from_prof,TRUE) || # object$MSL envir recreated by user after plot1Dprof, plot2Dprof or plot() found a new max
  #             ! is.null(object$MSL$init_from_prof) # only .MSL_update() run -> object$MSL$MSLE modified but envir not recreated, $init_from_prof still there.
  #            )
  #           ) {
  #   { ##### begin never-executed block
  #     # if we sample always in the posterior = prior*lik, we will end sampling according to the lik, so too much concentrated. 
  #     size_from_pardens <- ceil_size* (1-1/object$jointdens@nbCluster)
  #     if (inherits(object$gllimobj,"gllim")) {
  #       trypoints <- .simulate.gllimX(object$gllimobj, nsim=1L, size=size_from_pardens, drop=TRUE, colTypes=object$colTypes)
  #     } else {
  #       # trypoints <- .simulate.MixmodResults(object$jointdens, nsim=1L, size=size_from_pardens, drop=TRUE) ## simulates joint
  #       # trypoints <- trypoints[,seq_len(length(fittedPars)),drop=FALSE] # sample in marginal distrib of params from sample in jointdens 
  #       # ## : samples the joint and keep only the params => distrib of trypoints=current distr of param (pardens) => more simply:
  #       trypoints <- .simulate.MixmodResults(object$pardens, nsim=1L, size=size_from_pardens, drop=TRUE) ## simulates joint
  #     }
  #     if (FALSE) {
  #       postpoints <- .rparam_dMixmod_around_focal(object$jointdens, # dMixmod object
  #                                                  focal=c(object$MSL$MSLE,attr(object$logLs,"stat.obs")), 
  #                                                  solve_t_chol_sigma_list=object$clu_params$solve_t_chol_sigma_lists$jointdens,
  #                                                  size= ceil_size - nrow(trypoints))
  #       # this samples the joint density around the focal point so +/- conditionally on stat.obs. 
  #       # Then next line +/- samples in posterior distrib of params given stat.obs  
  #       postpoints <- postpoints[,seq_len(length(fittedPars)),drop=FALSE] 
  #     } else {
  #       posteriordens <- .conditional_Rmixmod(object$jointdens, given=attr(object$logLs,"stat.obs"), expansion=1) 
  #       solve_t_chol_sigma_list <- lapply(posteriordens@parameters["variance"], .solve_t_cholfn)
  #       postpoints <- .rparam_dMixmod_around_focal(posteriordens, # dMixmod object
  #                                                  focal=object$MSL$MSLE, 
  #                                                  solve_t_chol_sigma_list=solve_t_chol_sigma_list,
  #                                                  size= ceil_size - nrow(trypoints))
  #     }
  #     trypoints <- rbind(trypoints,postpoints)
  #   }  ##### en never-executed block
  } else if (inherits(object$gllimobj,"gllim")) {
    trypoints <- .simulate.gllimX(object$gllimobj, nsim=1L, size=ceil_size, drop=TRUE, colTypes=object$colTypes)
  } else {
    # trypoints <- .simulate.MixmodResults(object$jointdens, nsim=1L, size=ceil_size, drop=TRUE) ## simulates joint
    # trypoints <- trypoints[,seq_len(length(fittedPars)),drop=FALSE] # sample in marginal distrib of params from sample in jointdens
    # ## : samples the joint and keep only the params => distrib of trypoints=current distr of param (pardens) => more simply:
    trypoints <- .simulate.MixmodResults(object$pardens, nsim=1L, size=ceil_size, drop=TRUE) ## simulates joint
  }
  colnames(trypoints) <- fittedPars
  for (vv in fittedPars) trypoints <- trypoints[trypoints[,vv]>object$LOWER[vv],,drop=FALSE]
  for (vv in fittedPars) trypoints <- trypoints[trypoints[,vv]<object$UPPER[vv],,drop=FALSE]
  #
  prior <- predict(object,trypoints,which="parvaldens") 
  logLs <- predict(object,trypoints,which="lik") ## likelihood fn  
  flatnd_max <- max(logLs)-CI_LRstat ## defines an upper slice of logLs values.
  upperslice <- ( logLs> flatnd_max )
  weights <- numeric(length(logLs))
  weights[ upperslice] <- max(prior[upperslice])-prior[upperslice] ## in log density units
  max_upper <- max(weights[ upperslice]) ## in log density units
  weights[ ! upperslice] <- logLs[ ! upperslice] -prior[ ! upperslice] 
  max_lower <- max(weights[ ! upperslice])
  weights[ ! upperslice] <- weights[ ! upperslice] + max_upper-max_lower # hence same maximum 'max_upper' in the two regions.
  # Refinement of the same idea but with lesser impact of logLs variation outside the upperslice:
  weights[ ! upperslice] <- ((1-tol)*weights[ ! upperslice]+tol*max_upper) ## still in log density units
  # we could correct  ! upperslice acording to 'prior' but this not be worth the effort. 
  #
  weights <- exp(weights-max(weights)) 
  wei_runif <- runif(n=length(weights))<weights ## allows some exploration by sampling some 'bad' points
  good <- which(wei_runif) ## samples uniformly in the current fitted distrib of params with logL>threshold,
                                                  ## with tapering around
  freq_good <- length(good)/ceil_size 
  if (length(good)>target_size) {
    good <- good[sample(length(good),size = target_size)]
  } else if (length(good)>target_size-10L) { # just missing a few points... 
    bad <- which(!wei_runif)
    order_bad <- order(weights[bad],decreasing=TRUE)
    supplement <- bad[order_bad][seq(min(length(bad),target_size-length(good)))]
    good <- c(good,supplement)
  } else if (freq_good < 1/5) {
    bad <- which(!wei_runif)
    order_bad <- order(weights[bad],decreasing=TRUE)
    supplement <- bad[order_bad][seq(ceil_size/5-length(good))]
    good <- c(good,supplement)
  } # else may stay a few points below target, rather than add points that were not retained bc of low weights (it's better to wait next iteration)
  trypoints <- trypoints[good,,drop=FALSE]
  trypoints <- (cbind(trypoints,object$colTypes$fixedPars)) ## add fixedPars for simulation
  trypoints <- trypoints[,object$colTypes$allPars,drop=FALSE] ## column reordering
  trypoints <- data.frame(trypoints)
  return(list(trypoints=trypoints, 
              freq_good=max(1/5,freq_good) # max() to avoid explosion of # of sampled points
             ))
}


.rparam_from_SLik_j_post <- function(object , frac) {
  if (frac==0) return(list(trypoints=NULL, freq_good=NA))
  size_first_iter <- attr(object$logLs,"n_first_iter")
  if (is.null(size_first_iter)) size_first_iter <- length(which(object$logLs$cumul_iter==1)) ## FR->FR temporary back compat
  prev_n_iter <- max(object$logLs$cumul_iter)
  ceil_size <- target_size <- frac*max(size_first_iter/2, size_first_iter*(prev_n_iter+1L)-nrow(object$logLs))
  if ( ! is.null(freq_good <- attr(object$logLs,"freq_good")$posterior)) ceil_size <- ceiling(ceil_size/freq_good)
  fittedPars <- object$colTypes$fittedPars
  if (inherits(object$postdens,"densityMclust")) {
    trypoints <- do.call("sim", c(object$postdens[c("modelName", "parameters")],list(n=ceil_size)))
    trypoints <- trypoints[,1L+seq_len(length(fittedPars)),drop=FALSE]
  } else {
    trypoints <- .simulate.MixmodResults(object$postdens, nsim=1L, size=ceil_size, drop=TRUE)
  } 
  colnames(trypoints) <- fittedPars
  trypoints <- (cbind(trypoints,object$colTypes$fixedPars)) ## add fixedPars for simulation
  trypoints <- trypoints[,object$colTypes$allPars,drop=FALSE] ## column reordering
  trypoints <- data.frame(trypoints)
  for (vv in fittedPars) trypoints <- trypoints[trypoints[,vv]>object$LOWER[vv],,drop=FALSE]
  for (vv in fittedPars) trypoints <- trypoints[trypoints[,vv]<object$UPPER[vv],,drop=FALSE]
  freq_good <- nrow(trypoints)/ceil_size
  return(list(trypoints=trypoints, freq_good=freq_good))
}




## FR->FR define rparam as generic with methods SLik_j, etc ?
.bad_rparam_from_SLik_j <- function(object, size=1L,CIweight=Infusion.getOption("CIweight")) {
  locpredict <- function(x) {predict.SLik_j(object,newdata=x)}
  MSLE <- object$MSL$MSLE
  fittedNames <- names(MSLE)
  RMSEs <- get_from(object,"RMSEs")
  #
  nvec <- .make_n(RMSEs=RMSEs, fittedPars=fittedNames, n=size, CIweight=CIweight)
  #
  trypoints <- matrix(NA,ncol=length(MSLE),nrow=0L)
  if (nvec["MSL"]>0L) {
    hess <- hessian(locpredict,x=MSLE)
    sigma <- - solve(hess) # *(RMSEs["MSL"])
    rmv <- rmvnorm(nvec["MSL"],mean=MSLE,sigma=sigma) # 1st sampling
    trypoints <- rbind(trypoints,rmv) ## centers points on the MSLE 
  }
  CIpoints <- object$CIobject$bounds
  for (kt in seq_len(NROW(CIpoints))) {
    focalpt <- CIpoints[kt,] 
    locst <- rownames(CIpoints)[kt]
    if (nvec[locst]>0L) {
      dotpos <- regexpr(".",locst,fixed=TRUE)[1L]
      parm <- substring(locst,dotpos+1L)
      parpos <- which(fittedNames==parm)
      curv <- hessian(locpredict,x=focalpt)
      curv[parpos,] <- 0
      curv[,parpos] <- 0
      curv[parpos,parpos] <- -grad(locpredict,x=focalpt)[parpos]^2
      sigma <- - solve(curv) # *(RMSEs[locst])
      rmv <- rmvnorm(nvec[locst],mean=focalpt,sigma=sigma)
      trypoints <- rbind(trypoints,rmv)
    }
  }
  # those for which no CI bound is available:
  for (st in object$CIobject$missingBounds) {
    if (nvec[st]>0L) {
      dotpos <- regexpr(".",st,fixed=TRUE)[1L]
      parm <- substring(st,dotpos+1L)
      side <- substring(st,0,dotpos-1L)
      focalpt <- object$MSL$MSLE
      if (side=="low") {
        bnd <- object$LOWER[parm]
      } else bnd <- object$UPPER[parm]
      if (abs(bnd-focalpt[parm])>(object$UPPER[parm]-object$LOWER[parm])/1e4) { ## if MSLE not at boundary
        focalpt[parm] <- bnd*0.999+0.001*focalpt[parm] ## create refpoint near boundary 
        parpos <- which(fittedNames==parm)
        curv <- hessian(locpredict,x=focalpt)
        curv[parpos,] <- 0
        curv[,parpos] <- 0
        curv[parpos,parpos] <- -grad(locpredict,x=focalpt)[parpos]^2
        sigma <- - solve(curv) ## proceeds as if RMSE = 1
        rmv <- rmvnorm(nvec[st],mean=focalpt,sigma=sigma)
        trypoints <- rbind(trypoints,rmv)
      }
    }
  }
  colnames(trypoints) <- fittedNames
  for (vv in fittedNames) trypoints <- trypoints[trypoints[,vv]>object$LOWER[vv],]
  for (vv in fittedNames) trypoints <- trypoints[trypoints[,vv]<object$UPPER[vv],]
  trypoints <- (cbind(trypoints,object$colTypes$fixedPars)) ## add fixedPars for simulation
  trypoints <- trypoints[,object$colTypes$allPars,drop=FALSE] ## column reordering
  return(trypoints)
}
#debug(rparam_from_SLik_j)
#rparam_from_SLik_j(mjd,7)

profile.SLik_j <- function(fitted,...) profile.SLik(fitted=fitted,...) ## no need for two distinct methods here


.boo_SLik_joint <- function(simul, debug_level=0L, boo, stat.obs, nbCluster_SVs, using) {
  boo[] <- simul
  if (debug_level<2L) { # return valid, NULL or try-error
    densv <- suppressWarnings( ## suppress warnings for clustering failure
      try(infer_SLik_joint(boo,stat.obs=stat.obs, nbCluster=nbCluster_SVs, using=using,
                           verbose=list(most=FALSE,final=FALSE)),silent=TRUE))
    if (inherits(densv,"try-error") && debug_level==0L) {
      return(NULL) ## used in which valid <- which( ! sapply(resu,is.null)) below
    } else return(densv)
  } else { # return error -> useful in serial mode only
    densv <- suppressWarnings( ## suppress warnings for clustering failure
      infer_SLik_joint(boo,stat.obs=stat.obs, nbCluster=nbCluster_SVs, using=using,
                       verbose=list(most=FALSE,final=FALSE)))
    return(densv)
  }
}


# Called by .RMSEwrapper.SLik_j();
# this does not call the process-simulating function. Instead, it performs a sort of parametric bootstrap from the Gaussian mixture model,
# i.e., it jointly simulates parameters and projected statistics from the jointdens...
# An artefact is that it can draw meaningless values from the jointdens, e.g. negative values for variance parameters
.boot.SLik_j <- function(object,nsim=2L, force=FALSE, verbose = TRUE, seed=NULL, 
                         parent_cores_info=NULL, # defined cluster by parent *.boot.SLik_j* calls
                         packages=attr(object$logLs,"packages"), env=attr(object$logLs,"env"), # not required if parent_cores_info is provided.
                         cluster_args=list(),
                         cl_seed=.update_seed(object)
                         ) {
  if (!force && nsim<2L) stop("'nsim' must be > 1")
  if (inherits(object$jointdens,"densityMclust")) {
    bootrepls <- replicate(nsim, do.call("sim", c(object$jointdens[c("modelName", "parameters")],
                                                  list(n=nrow(object$logLs))))[,-1L],simplify=FALSE)
    nbCluster_SVs <- list(jointdens=object$jointdens$G,
                          pardens=object$pardens$G)
  } else if (inherits(object$gllimobj,"gllim")) {
    bootrepls <- .simulate.gllimX(object$gllimobj, nsim=nsim, size=nrow(object$logLs), colTypes=object$colTypes) 
    nclu <- length(object$gllimobj$pi)
    nbCluster_SVs <- list(jointdens=nclu, pardens=nclu)
    
    bootrepls <- .gllim.condsimul.stats(object$gllimobj, RGPpars=bootrepls, size=1L, drop=FALSE, cbind.=TRUE, 
                                 colTypes=object$colTypes) 
  } else {
    bootrepls <- .simulate.MixmodResults(object$jointdens, nsim=nsim, size=nrow(object$logLs),
                                         drop=FALSE) 
    nbCluster_SVs <- list(jointdens=object$jointdens@nbCluster,
                      pardens=object$pardens@nbCluster) # single values so that this exposes to clustering failure.
  } ##  marginal=current distr of param
  
  boo <- object$logLs[,with(object$colTypes,c(fittedPars,statNames))]
  attr(boo, "allPars") <- object$colTypes$fittedPars # late 2021/01/25 correction as otherwise infer_SLik_joint() tries to use the original allPars that include fixed ones
  stat.obs <- attr(object$logLs,"stat.obs")
  prevmsglength <- 0L
  resu <- vector("list")
  #
  if (is.null(parent_cores_info)) {
    cluster_args <- .set_cluster_type(cluster_args, nb_cores=NULL) # PSOCK vs FORK
    cores_info <- .init_cores(cluster_args=cluster_args)
    cl <- cores_info$cl
    if ( ! is.null(cl)) {
      parallel::clusterExport(cl, "packages",envir=environment()) ## passes the list of packages to load
      # Infusion not loaded in child process !
      #parallel::clusterExport(cl, list("nRealizations"),envir=environment()) 
      #parallel::clusterCall(cl,fun=Infusion.options,nRealizations=nRealizations)
      if ( ! is.null(env)) parallel::clusterExport(cl=cl, ls(env),envir=env)
    }
  } else cores_info <- parent_cores_info
  cl <- cores_info$cl
  if ( ! is.null(cl) && ! is.null(cl_seed) ) parallel::clusterSetRNGStream(cl = cl, cl_seed)
  resu <- pblapply(bootrepls, .boo_SLik_joint, cl = cl, boo=boo, stat.obs=stat.obs, 
                   nbCluster_SVs=nbCluster_SVs, using=object$using)
  whichvalid <- which( ! sapply(resu,is.null))
  resu <- resu[whichvalid]
  if (length(resu)==0L) {
    message("All bootstrap replicates failed; this suggests a problem that cannot be solved by computing more replicates.\n Trying to diagnose the problem...")
    if (cores_info$nb_cores>1L) {
      message("\nIn parallel mode, first replicate gives:")
      resu <- pblapply(bootrepls[1],.boo_SLik_joint,cl = cores_info$cl, debug_level=1, boo=boo, 
                       stat.obs=stat.obs, nbCluster_SVs=nbCluster_SVs, using=object$using)
      print(resu[[1]][1])
      if (identical(cluster_args$debug_info,TRUE)) { # ready-to-use debug code:
        sessioninfo <- utils::sessionInfo()
        infoptions <- Infusion.options()
        using <- object$using
        globoptions <- options()
        tmpname <-  .generateFileName("debug_info",ext=".rda")
        print(paste0("Saving info in file ", tmpname,":"))
        save(bootrepls, sessioninfo, infoptions, globoptions, object, nbCluster_SVs, using, verbose, prevmsglength,
             file=tmpname)
        # loading the .rda allows to run .boo_SLik_joint(bootrepls[[1]]) (and controlling the options, if needed)
      } else message(paste("\nSet cluster_args$debug_info=TRUE to save some debugging info in a file\n",
                           "and see the source of Infusion:::.boot.SLik_j() for further details about it.")) 
      message("\nTesting whether this generates an error in serial mode:") # might not fail if pb only in parallel mode
    } else  message("\nIn serial mode, first replicate gives:") # should fail again
    abyss <- lapply(bootrepls[1],.boo_SLik_joint, debug_level=2, boo=boo, stat.obs=stat.obs, nbCluster_SVs=nbCluster_SVs,
                    using=object$using)
    return(NULL) # Only if parallel failed and serial did not. 
  }
  while (length(resu)< nsim) {
    message(paste("Mixture modelling with given nbCluster failed for",nsim-length(resu),"replicate(s); drawing sample(s) again..."))
    moreresu <- .boot.SLik_j(object, nsim=nsim-length(resu), force=TRUE, verbose=verbose, parent_cores_info=cores_info) # recursive call uses cores_info but not cluster_args.
    resu <- c(resu,moreresu)
  }
  if (is.null(parent_cores_info)) .close_cores(cores_info)
  invisible(resu)
}

# Called by MSL()
.RMSEwrapper.SLik_j <- function(object, CIpoints=object$CIobject$bounds, useCI=TRUE, nsim=10L,verbose=interactive(),
                                cluster_args=list()) {
  bootrepls <- .boot.SLik_j(object,nsim=nsim,verbose=verbose,cluster_args=cluster_args) # returns gaussian mixture models for each resample; no MSL as not needed
  if( useCI && ! is.null(CIpoints) ) {
    locdata <- data.frame(rbind(MSLE=object$MSL$MSLE,CIpoints))
    covmat <- cov(do.call(rbind,lapply(bootrepls,predict,newdata=locdata, which="lik"))) ## covmat of predict=of logL
    MSEs <- c(MSL=covmat[1,1],diag(covmat[-1,-1,drop=FALSE])+covmat[1,1]-2*covmat[1,-1])
  } else {
    locdata <- object$MSL$MSLE
    MSEs <- structure( var( unlist(lapply(bootrepls,predict,newdata=locdata, which="lik")))  ,names="MSL")
  }
  RMSEs <- sqrt(MSEs)
  if (length(MSEs)>1L) {
    headline <-  "* RMSEs of summary log-L maximum and of its ratio at CI bounds: *\n"
  } else {
    headline <-  "* RMSEs of summary log-L maximum: *\n"
  }
  if(verbose) {
    cat(headline)
    print(RMSEs)
  }
  return(RMSEs)
}

# overcome problems with plot Rmixmod which may need more than the info i na best result
# chercher 
# setMethod(
#   f="plot",
#   signature=c("Mixmod"),
# dans les sources pour savoir ce que Rmixmod fait.
# cf also plotCluster(lik_j$jointdens,data=slik_j$logLs)
# .plotClusters <- function(object,which,...) {
#   plotCluster(object[[which]],data=object$logLs,...)
#}

plot.MixmodResults <- function(x,...) {message('Use plotCluster(.,data=<slik object>$logLs,variable1=.,variable2=.)\n to plot a "MixModResults" object')}

Try the Infusion package in your browser

Any scripts or data that you put into this service are public.

Infusion documentation built on May 3, 2023, 5:10 p.m.