#' An R6 Class to represent a collection of PLNfit
#'
#' @description super class for [`PLNPCAfamily`] and [`PLNnetworkfamily`].
#'
#' @param responses the matrix of responses common to every models
#' @param covariates the matrix of covariates common to every models
#' @param offsets the matrix of offsets common to every models
#' @param weights the vector of observation weights
#' @param var value of the parameter (`rank` for PLNPCA, `sparsity` for PLNnetwork) that identifies the model to be extracted from the collection. If no exact match is found, the model with closest parameter value is returned with a warning.
#' @param control list controlling the optimization and the model
#' @param index Integer index of the model to be returned. Only the first value is taken into account.
#'
#' @inherit PLN details
#'
#' @rdname PLNfamily
#' @include PLNfamily-class.R
#' @importFrom R6 R6Class
#' @importFrom purrr map reduce map_chr
PLNfamily <-
R6Class(
classname = "PLNfamily",
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## PUBLIC MEMBERS ------
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
public = list(
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## Public fields ---------------------
#' @field responses the matrix of responses common to every models
responses = NULL, # the Y matrix
#' @field covariates the matrix of covariates common to every models
covariates = NULL, # the X matrix
#' @field offsets the matrix of offsets common to every models
offsets = NULL, # the O matrix
#' @field weights the vector of observation weights
weights = NULL, # the vector of observation weights
#' @field inception a [PLNfit] object, obtained when no sparsifying penalty is applied.
inception = NULL, # the basic model in the collection (no regularization, nor sparsity, nor rank)
#' @field models a list of [PLNfit] object, one per penalty.
models = NULL, # the collection of fitted models
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## Creator functions -----------------
#' @description Create a new [`PLNfamily`] object.
#' @return A new [`PLNfamily`] object
initialize = function(responses, covariates, offsets, weights, control) {
## set data matrice and dimension
self$responses <- responses
self$covariates <- covariates
self$offsets <- offsets
self$weights <- weights
private$n <- nrow(responses)
private$p <- ncol(responses)
private$d <- ncol(covariates)
## set names of the data matrices
if (is.null(rownames(responses))) rownames(self$responses) <- 1:private$n
if (is.null(colnames(responses))) colnames(self$responses) <- 1:private$p
if (is.null(rownames(covariates))) rownames(self$covariates) <- 1:private$n
if (is.null(colnames(covariates)) & private$d > 0) colnames(self$covariates) <- 1:private$d
},
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## Post treatment --------------------
#' @description Update fields after optimization
#' @param config_post a list for controlling the post-treatments (optional bootstrap, jackknife, R2, etc.).
#' @param config_optim a list for controlling the optimization parameters used during post_treatments
postTreatment = function(config_post, config_optim) {
if (config_post$rsquared) {
nullModel <- nullModelPoisson(self$responses, self$covariates, self$offsets, self$weights)
} else {
nullModel <- NULL
}
for (model in self$models)
model$postTreatment(
self$responses,
self$covariates,
self$offsets,
self$weights,
config_post=config_post,
config_optim=config_optim,
nullModel = nullModel
)
},
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## Extractors ------------------------
#' @description
#' Extract a model from a collection of models
# @inheritParams getModel
#' @return A [`PLNfit`] object
#' @seealso [getModel()]
getModel = function(var, index = NULL) {
## Extraction by index
if (!is.null(index) && index <= length(self$models)) {
return(self$models[[index[1]]]$clone())
}
## Extraction by parameter value
id <- match(var, private$params)
if (!is.na(id)) { ## Exact match
return(self$models[[id]]$clone())
} else { ## No exact match
id <- which.min(abs(var - private$params)) ## closest model (in terms of parameter value)
warning(paste("No such a model in the collection. Acceptable parameter values can be found via",
"$ranks (for PCA)",
"$clusters (for mixture models)",
"$penalties (for network)",
paste("Returning model with closest value. Requested:", var, ", returned:", private$params[id]),
sep = "\n"))
return(self$models[[id]]$clone())
}
},
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## Graphical methods -----------------
#' @description
#' Lineplot of selected criteria for all models in the collection
#' @param criteria A valid model selection criteria for the collection of models. Includes loglik, BIC (all), ICL (PLNPCA) and pen_loglik, EBIC (PLNnetwork)
#' @param reverse A logical indicating whether to plot the value of the criteria in the "natural" direction
#' (loglik - penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the natural direction, on
#' the same scale as the log-likelihood.
#' @return A [`ggplot2`] object
plot = function(criteria, reverse) {
stopifnot(!anyNA(self$criteria[criteria]))
dplot <- self$criteria %>%
dplyr::select(dplyr::all_of(c("param", criteria))) %>%
tidyr::gather(key = "criterion", value = "value", -param) %>%
{if (reverse)
dplyr::mutate(
.data = .,
value = -2 * value,
criterion = dplyr::if_else(grepl('loglik', criterion), paste0("-2", criterion), criterion))
else . } %>%
dplyr::group_by(criterion)
p <- ggplot(dplot, aes(x = param, y = value, group = criterion, colour = criterion)) +
geom_line() + geom_point() +
ggtitle(label = "Model selection criteria",
subtitle = if (reverse) "Lower is better" else "Higher is better") +
theme_bw()
p
},
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## Print methods ---------------------
#' @description User friendly print method
show = function() {
cat("--------------------------------------------------------\n")
cat("COLLECTION OF", length(self$models), "POISSON LOGNORMAL MODELS\n")
cat("--------------------------------------------------------\n")
},
#' @description User friendly print method
print = function() { self$show() }
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## Other public members --------------
),
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## PRIVATE MEMBERS ----
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
private = list(
params = NULL, # vector of parameters that indexes the models (either sparsity, rank, number of cluster, etc.)
n = NULL, # number of samples
p = NULL, # number of responses
d = NULL # number of covariates
),
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## ACTIVE BINDINGS ----
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
active = list(
#' @field criteria a data frame with the values of some criteria (approximated log-likelihood, BIC, ICL, etc.) for the collection of models / fits
#' BIC and ICL are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty
criteria = function() {
res <- purrr::map(self$models, 'criteria') %>%
purrr::reduce(rbind)
data.frame(param = private$params, res)
},
#' @field convergence sends back a data frame with some convergence diagnostics associated with the optimization process (method, optimal value, etc)
convergence = function() {
res <- purrr::map(self$models, function(model) {
c(nb_param = model$nb_param, purrr::map_chr(model$optim_par, tail, 1))
}) %>% purrr::reduce(rbind)
data.frame(param = private$params, res, stringsAsFactors = FALSE)
}
)
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## END OF CLASS ----
## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.