Nothing
#' plmm_checks
#' @param design The design object, as created by `create_design()`
#' @param K Similarity matrix used to rotate the data. This should either be (1) a known matrix that reflects the covariance of y, (2) an estimate (Default is \eqn{\frac{1}{p}(XX^T)}), or (3) a list with components 'd' and 'u', as returned by choose_k().
#' @param diag_K Logical: should K be a diagonal matrix? This would reflect observations that are unrelated, or that can be treated as unrelated. Defaults to FALSE.
#' Note: plmm() does not check to see if a matrix is diagonal. If you want to use a diagonal K matrix, you must set diag_K = TRUE.
#' @param eta_star Optional argument to input a specific eta term rather than estimate it from the data. If K is a known covariance matrix that is full rank, this should be 1.
#' @param penalty The penalty to be applied to the model. Either "MCP" (the default), "SCAD", or "lasso".
#' @param init Initial values for coefficients. Default is 0 for all columns of X.
#' @param gamma The tuning parameter of the MCP/SCAD penalty (see details). Default is 3 for MCP and 3.7 for SCAD.
#' @param alpha Tuning parameter for the Mnet estimator which controls the relative contributions from the MCP/SCAD penalty and the ridge, or L2 penalty. alpha=1 is equivalent to MCP/SCAD penalty, while alpha=0 would be equivalent to ridge regression. However, alpha=0 is not supported; alpha may be arbitrarily small, but not exactly 0.
#' @param trace If set to TRUE, inform the user of progress by announcing the beginning of each step of the modeling process. Default is FALSE.
#' @param save_rds Optional: if a filepath and name is specified (e.g., `save_rds = "~/dir/my_results.rds"`), then the model results are saved to the provided location. Defaults to NULL, which does not save the result.
#' @param return_fit Optional: a logical value indicating whether the fitted model should be returned as a `plmm` object in the current (assumed interactive) session. Defaults to TRUE.
#' @param ... Additional arguments to `get_data()`
#'
#' @keywords internal
#'
#' @returns A list of parameters to pass on to model fitting. The list includes the standardized design matrix, the outcome, and meta-data
#'
plmm_checks <- function(design,
K = NULL,
diag_K = NULL,
eta_star = NULL,
penalty = "lasso",
init = NULL,
gamma,
alpha = 1,
trace = FALSE,
save_rds = NULL,
return_fit = TRUE,
...){
# read in X -----------------------------------------------------
# TODO: maybe need to add a 'catch' here for incorrect list input...
if (!(inherits(design, 'plmm_design') | inherits(design, 'character'))) {
stop('Input to "design" in plmm() must be either a plmm_design object output from create_design()
or an .rds filepath to a saved list output from create_design()')
}
if("character" %in% class(design)){
design <- get_data(path = design, trace = trace, ...)
std_X <- design$std_X
} else {
if (inherits(design$std_X, "big.matrix")) {
std_X <- bigmemory::attach.big.matrix(design$std_X)
} else if (inherits(design$std_X, "matrix")) {
std_X <- design$std_X
}
}
std_X_n <- design$std_X_n
std_X_p <- design$std_X_p
# create a list that captures the centering/scaling for std_X;
# will need this later, see `untransform()`
std_X_details <- list(
center = design$std_X_center,
scale = design$std_X_scale,
ns = design$ns,
unpen = design$unpen,
unpen_colnames = design$unpen_colnames,
X_colnames = design$X_colnames,
X_rownames = design$X_rownames,
std_X_rownames = design$std_X_rownames,
std_X_colnames = design$std_X_colnames)
if(inherits(std_X, "big.matrix")){
fbm_flag <- TRUE
} else {
fbm_flag <- FALSE
}
if (inherits(design$y, "matrix") | inherits(design$y, "data.frame")) {
y <- design$y[,1] |> unlist() # design$y will have one column
} else {
y <- design$y
}
penalty_factor <- design$penalty_factor
# set up defaults --------------------------------------------------
# set default init
if(is.null(init)){init <- rep(0, std_X_p)}
# set default gamma (gamma not used in 'lasso' option)
if (missing(gamma)) gamma <- switch(penalty, SCAD = 3.7, MCP = 3, lasso = 1)
# error checking design matrix ---------------------------------------------
if (length(y) != std_X_n) stop("X and y do not have the same number of observations", call.=FALSE)
if (length(penalty_factor)!=std_X_p) stop("Dimensions of penalty_factor and X do not match; something is off in the supplied design", call.=FALSE)
# check K types -------------------------------------------------------
if (!is.null(K)){
# first, check type/class:
if (!inherits(K, "matrix") & !is.list(K)) {
tmp <- try(K <- stats::model.matrix(~0+., data=K), silent=TRUE)
if (inherits(tmp, "try-error")) stop("K must be either (1) able to be coerced to a matrix or (2) be a list.", call.=FALSE)
}
if (typeof(K)=="integer") storage.mode(std_X) <- "double" # change K to X
if (typeof(K)=="character") stop("K must be a numeric matrix", call.=FALSE)
if (is.list(K)) {
if(!('s' %in% names(K) & 'U' %in% names(K))){stop('Components s and U not both found in list supplied for K.')}
}
# last check: look at dimensions
if (is.matrix(K)){
if (dim(K)[1] != std_X_n || dim(K)[2] != std_X_n) stop("Dimensions of K and X do not match", call.=FALSE)
} else if (is.list(K)) {
if (std_X_n != nrow(K$U)) stop("Dimensions of K and X do not match.")
}
}
# for in in-memory designs, set plink flag to FALSE
if(is.null(design$is_plink)){
plink_flag <- FALSE
} else if (design$is_plink) {
plink_flag <- TRUE
} else {
plink_flag <- FALSE
}
# warn about bigalgebra issues if this is user's first call to filebacked analysis
# Check if the warning has been shown
if (!.plmmr_env$warning_shown & fbm_flag) {
warning("Note: plmmr depends on the package bigalgebra, the current GitHub \n
version of which is throwing some warnings for filebacked analysis. \n
See https://github.com/fbertran/bigalgebra/issues/2 \n
If you see a warning about 'stack imbalance' appear while you are \n
fitting a model with plmm() or cv_plmm(), we recommend downloading \n
the last stable version of bigalebra.\n
This message is displayed after your first call to fit a model \n
using filebacked data.\n")
# Update the state
.plmmr_env$warning_shown <- TRUE
}
# return list for model preparation ---------------------------------
ret <- list(
std_X = std_X,
std_X_details = std_X_details,
std_X_n = std_X_n,
std_X_p = std_X_p,
y = y,
y_name = colnames(design$y),
centered_y = y - mean(y),
K = K,
diag_K = diag_K,
fbm_flag = fbm_flag,
plink_flag = plink_flag,
penalty = penalty,
penalty_factor = penalty_factor,
gamma = gamma,
init = init,
n = design$n,
p = design$p
)
if (trace & !is.null(save_rds)){cat("Your results will be saved to ", save_rds, "\n")}
return(ret)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.