Nothing
#' Lagrangian Multiplier Smoothing Splines
#'
#' @docType package
#' @keywords internal
#' @name lgspline-package
#' @rdname lgspline-package
#' @aliases lgspline-package
#'
#' @import Rcpp RcppArmadillo methods stats
#' @importFrom graphics plot points legend
#' @importFrom FNN get.knnx
#' @importFrom quadprog solve.QP
#' @importFrom RColorBrewer brewer.pal
#' @importFrom plotly plot_ly
#'
#' @keywords smoothing regression parametric constrained lagrangian multiplier
"_PACKAGE"
#' Fit Lagrangian Multiplier Smoothing Splines
#'
#' @description
#'
#' A comprehensive software package for fitting a variant of smoothing splines
#' as a constrained optimization problem, avoiding the need to algebraically
#' disentangle a spline basis after fitting, and allowing for interpretable
#' interactions and non-spline effects to be included.
#'
#' \code{lgspline} fits piecewise polynomial regression splines constrained to be smooth where
#' they meet, penalized by the squared, integrated, second-derivative of the
#' estimated function with respect to predictors.
#'
#' The method of Lagrangian multipliers is used to enforce the following:
#' \itemize{
#' \item Equivalent fitted values at knots
#' \item Equivalent first derivatives at knots, with respect to predictors
#' \item Equivalent second derivatives at knots, with respect to predictors
#' }
#'
#' The coefficients are penalized by an analytical form of the traditional
#' cubic smoothing spline penalty, as well as tunable modifications that allow
#' for unique penalization of multiple predictors and partitions.
#'
#' This package supports model fitting for multiple spline and non-spline effects, GLM families,
#' Weibull accelerated failure time (AFT) models, correlation structures, quadratic
#' programming constraints, and extensive customization for user-defined models.
#'
#' In addition, parallel processing capabilities and comprehensive
#' tools for visualization, frequentist, and Bayesian inference are provided.
#'
#' @usage lgspline(predictors = NULL, y = NULL, formula = NULL, response = NULL,
#' standardize_response = TRUE, standardize_predictors_for_knots = TRUE,
#' standardize_expansions_for_fitting = TRUE, family = gaussian(),
#' glm_weight_function = function(mu, y, order_indices, family, dispersion,
#' observation_weights, ...) {
#' if(any(!is.null(observation_weights))){
#' family$variance(mu) * observation_weights
#' } else {
#' family$variance(mu)
#' }
#' }, shur_correction_function = function(X, y, B, dispersion, order_list, K,
#' family, observation_weights, ...) {
#' lapply(1:(K+1), function(k) 0)
#' }, need_dispersion_for_estimation = FALSE,
#' dispersion_function = function(mu, y, order_indices, family,
#' observation_weights, ...) { 1 },
#' K = NULL, custom_knots = NULL, cluster_on_indicators = FALSE,
#' make_partition_list = NULL, previously_tuned_penalties = NULL,
#' smoothing_spline_penalty = NULL, opt = TRUE, use_custom_bfgs = TRUE,
#' delta = NULL, tol = 10*sqrt(.Machine$double.eps),
#' invsoftplus_initial_wiggle = c(-25, 20, -15, -10, -5),
#' invsoftplus_initial_flat = c(-14, -7), wiggle_penalty = 2e-07,
#' flat_ridge_penalty = 0.5, unique_penalty_per_partition = TRUE,
#' unique_penalty_per_predictor = TRUE, meta_penalty = 1e-08,
#' predictor_penalties = NULL, partition_penalties = NULL,
#' include_quadratic_terms = TRUE, include_cubic_terms = TRUE,
#' include_quartic_terms = NULL, include_2way_interactions = TRUE,
#' include_3way_interactions = TRUE, include_quadratic_interactions = FALSE,
#' offset = c(), just_linear_with_interactions = NULL,
#' just_linear_without_interactions = NULL, exclude_interactions_for = NULL,
#' exclude_these_expansions = NULL, custom_basis_fxn = NULL,
#' include_constrain_fitted = TRUE, include_constrain_first_deriv = TRUE,
#' include_constrain_second_deriv = TRUE,
#' include_constrain_interactions = TRUE, cl = NULL, chunk_size = NULL,
#' parallel_eigen = TRUE, parallel_trace = FALSE, parallel_aga = FALSE,
#' parallel_matmult = FALSE, parallel_unconstrained = TRUE,
#' parallel_find_neighbors = FALSE, parallel_penalty = FALSE,
#' parallel_make_constraint = FALSE,
#' unconstrained_fit_fxn = unconstrained_fit_default,
#' keep_weighted_Lambda = FALSE, iterate_tune = TRUE,
#' iterate_final_fit = TRUE, blockfit = FALSE,
#' qp_score_function = function(X, y, mu, order_list, dispersion,
#' VhalfInv, observation_weights, ...) {
#' if(!is.null(observation_weights)) {
#' crossprod(X, cbind((y - mu)*observation_weights))
#' } else {
#' crossprod(X, cbind(y - mu))
#' }
#' }, qp_observations = NULL, qp_Amat = NULL, qp_bvec = NULL, qp_meq = 0,
#' qp_positive_derivative = FALSE, qp_negative_derivative = FALSE,
#' qp_monotonic_increase = FALSE, qp_monotonic_decrease = FALSE,
#' qp_range_upper = NULL, qp_range_lower = NULL, qp_Amat_fxn = NULL,
#' qp_bvec_fxn = NULL, qp_meq_fxn = NULL, constraint_values = cbind(),
#' constraint_vectors = cbind(), return_G = TRUE, return_Ghalf = TRUE,
#' return_U = TRUE, estimate_dispersion = TRUE, unbias_dispersion = NULL,
#' return_varcovmat = TRUE, custom_penalty_mat = NULL,
#' cluster_args = c(custom_centers = NA, nstart = 10),
#' dummy_dividor = 1.2345672152894e-22,
#' dummy_adder = 2.234567210529e-18, verbose = FALSE,
#' verbose_tune = FALSE, expansions_only = FALSE,
#' observation_weights = NULL, do_not_cluster_on_these = c(),
#' neighbor_tolerance = 1 + 1e-08, null_constraint = NULL,
#' critical_value = qnorm(1 - 0.05/2), data = NULL, weights = NULL,
#' no_intercept = FALSE, correlation_id = NULL, spacetime = NULL,
#' correlation_structure = NULL, VhalfInv = NULL, Vhalf = NULL,
#' VhalfInv_fxn = NULL, Vhalf_fxn = NULL, VhalfInv_par_init = c(),
#' REML_grad = NULL, custom_VhalfInv_loss = NULL, VhalfInv_logdet = NULL,
#' include_warnings = TRUE, ...)
#'
#' @details
#' A flexible and interpretable implementation of smoothing splines including:
#' \itemize{
#' \item Multiple predictors and interaction terms
#' \item Various GLM families and link functions
#' \item Correlation structures for longitudinal/clustered data
#' \item Shape constraints via quadratic programming
#' \item Parallel computation for large datasets
#' \item Comprehensive inference tools
#' }
#'
#' @param predictors Default: NULL. Numeric matrix or data frame of predictor variables. Supports direct matrix input or formula interface when used with `data` argument. Must contain numeric predictors, with categorical variables pre-converted to numeric indicators.
#' @param y Default: NULL. Numeric response variable vector representing the target/outcome/dependent variable etc. to be modeled.
#' @param formula Default: NULL. Optional statistical formula for model specification, serving as an alternative to direct matrix input. Supports standard R formula syntax with special `spl()` function for defining spline terms.
#' @param response Default: NULL. Alternative name for response variable, providing compatibility with different naming conventions. Takes precedence only if `y` is not supplied.
#' @param standardize_response Default: TRUE. Logical indicator controlling whether the response variable should be centered by mean and scaled by standard deviation before model fitting. When TRUE, tends to improve numerical stability. Only offered for identity link functions (when family$link == 'identity')
#' @param standardize_predictors_for_knots Default: TRUE. Logical flag determining whether predictor variables should be standardized before knot placement. Ensures consistent knot selection across different predictor scales, and that no one predictor dominates in terms of influence on knot placement. For all expansions (x), standardization corresponds to dividing by the difference in 69 and 31st percentiles = x / (quantile(x, 0.69) - quantile(x, 0.31)).
#' @param standardize_expansions_for_fitting Default: TRUE. Logical switch to standardize polynomial basis expansions during model fitting. Provides computational stability during penalty tuning without affecting statistical inference, as design matrices, variance-covariance matrices, and coefficient estimates are systematically backtransformed after fitting to account for the standardization. If TRUE, \eqn{\textbf{U}} and \eqn{\textbf{G}} will remain on the transformed scale, and B_raw as returned will correspond to the coefficients fitted on the expansion-standardized scale.
#' @param family Default: gaussian(). Generalized linear model (GLM) distribution family specifying the error distribution and link function for model fitting. Defaults to Gaussian distribution with identity link. Supports custom family specifications, including user-defined link functions and optional custom tuning loss criteria. Minimally requires 1) family name (family) 2) link name (link) 3) linkfun (link function) 4) linkinv (link function inverse) 5) variance (mean variance relationship function, \eqn{\text{Var}(Y|\mu)}).
#' @param glm_weight_function Default: function that returns family$variance(mu) * observation_weights if weights exist, family$variance(mu) otherwise. Codes the mean-variance relationship of a GLM or GLM-like model, the diagonal \eqn{\textbf{W}} matrix of \eqn{\textbf{X}^T\textbf{W}\textbf{X}} that appears in the information. This can be replaced with a user-specified function. It is used for updating \eqn{\textbf{G} = (\textbf{X}^{T}\textbf{W}\textbf{X} + \textbf{L})^{-1}} after obtaining constrained estimates of coefficients. This is not used for fitting unconstrained models, but for iterating between updates of \eqn{\textbf{U}}, \eqn{\textbf{G}}, and beta coefficients afterwards.
#' @param shur_correction_function Default: function that returns list of zeros. Advanced function for computing Schur complements \eqn{\textbf{S}} to add to \eqn{\textbf{G}} to properly account for uncertainty in dispersion or other nuisance parameter estimation. The effective information becomes \eqn{\textbf{G}^* = (\textbf{G}^{-1} + \textbf{S})^{-1}}.
#' @param need_dispersion_for_estimation Default: FALSE. Logical indicator specifying whether a dispersion parameter is required for coefficient estimation. This is not needed for canonical regular exponential family models, but is often needed otherwise (such as fitting Weibull AFT models).
#' @param dispersion_function Default: function that returns 1. Custom function for estimating the dispersion parameter. Unless \code{need_dispersion_for_estimation} is TRUE, this will not affect coefficient estimates.
#' @param K Default: NULL. Integer specifying the number of knot locations for spline partitions. This can intuitively be considered the total number of partitions - 1.
#' @param custom_knots Default: NULL. Optional matrix providing user-specified knot locations in 1-D.
#' @param cluster_on_indicators Default: FALSE. Logical flag determining whether indicator variables should be used for clustering knot locations.
#' @param make_partition_list Default: NULL. Optional list allowing direct specification of custom partition assignments. The intent is that the make_partition_list returned by one model can be supplied here to keep the same knot locations for another.
#' @param previously_tuned_penalties Default: NULL. Optional list of pre-computed penalty components from previous model fits.
#' @param smoothing_spline_penalty Default: NULL. Optional custom smoothing spline penalty matrix for fine-tuned complexity control.
#' @param opt Default: TRUE. Logical switch controlling whether model penalties should be automatically optimized via generalized cross-validation. Turn this off if previously_tuned_penalties are supplied AND desired, otherwise, the previously_tuned_penalties will be ignored.
#' @param use_custom_bfgs Default: TRUE. Logical indicator selecting between a native implementation of damped-BFGS optimization method with analytical gradients or base R's BFGS implementation with finite-difference approximation of gradients.
#' @param delta Default: NULL. Numeric pseudocount used for stabilizing optimization in non-identity link function scenarios.
#' @param tol Default: 10*sqrt(.Machine$double.eps). Numeric convergence tolerance controlling the precision of optimization algorithms.
#' @param invsoftplus_initial_wiggle Default: c(-25, 20, -15, -10, -5). Numeric vector of initial grid points for wiggle penalty optimization, specified on the inverse-softplus (\eqn{\text{softplus}(x) = \log(1+e^x)}) scale.
#' @param invsoftplus_initial_flat Default: c(-7, 0). Numeric vector of initial grid points for ridge penalty optimization, specified on the inverse-softplus (\eqn{\text{softplus}(x) = \log(1+e^x)}) scale.
#' @param wiggle_penalty Default: 2e-7. Numeric penalty controlling the integrated squared second derivative, governing function smoothness. Applied to both smoothing spline penalty (alone) and is multiplied by \code{flat_ridge_penalty} for penalizing linear terms.
#' @param flat_ridge_penalty Default: 0.5. Numeric flat ridge penalty for additional regularization on only intercepts and linear terms (won't affect interactions or quadratic/cubic/quartic terms by default). If \code{custom_penalty_mat} is supplied, the penalty will be for the custom penalty matrix instead. This penalty is multiplied with \code{wiggle_penalty} to obtain the total ridge penalty - hence, by default, the ridge penalization on linear terms is half of the magnitude of non-linear terms.
#' @param unique_penalty_per_partition Default: TRUE. Logical flag allowing the magnitude of the smoothing spline penalty to differ across partition.
#' @param unique_penalty_per_predictor Default: TRUE. Logical flag allowing the magnitude of the smoothing spline penalty to differ between predictors.
#' @param meta_penalty Default: 1e-8. Numeric "meta-penalty" applied to predictor and partition penalties during tuning. The minimization of GCV is modified to be a penalized minimization problem, with penalty \eqn{0.5 \times \text{meta\_penalty} \times (\sum \log(\text{penalty}))^2}, such that penalties are pulled towards 1 on the absolute scale and thus, their multiplicative effect towards 0.
#' @param predictor_penalties Default: NULL. Optional list of custom penalties specified per predictor.
#' @param partition_penalties Default: NULL. Optional list of custom penalties specified per partition.
#' @param include_quadratic_terms Default: TRUE. Logical switch to include squared predictor terms in basis expansions.
#' @param include_cubic_terms Default: TRUE. Logical switch to include cubic predictor terms in basis expansions.
#' @param include_quartic_terms Default: NULL. Logical switch to include quartic predictor terms in basis expansions. This is highly recommended for fitting models with multiple predictors to avoid over-specified constraints. When NULL (by default), will internally set to FALSE if only one predictor present, and TRUE otherwise.
#' @param include_2way_interactions Default: TRUE. Logical switch to include linear two-way interactions between predictors.
#' @param include_3way_interactions Default: TRUE. Logical switch to include three-way interactions between predictors.
#' @param include_quadratic_interactions Default: FALSE. Logical switch to include linear-quadratic interaction terms.
#' @param offset Default: Empty vector. When non-missing, this is a vector of column indices/names to include as offsets. \code{lgspline} will automatically introduce constraints such that the coefficient for offset terms are 1.
#' @param just_linear_with_interactions Default: NULL. Integer vector specifying columns to retain linear terms with interactions.
#' @param just_linear_without_interactions Default: NULL. Integer vector specifying columns to retain only linear terms without interactions.
#' @param exclude_interactions_for Default: NULL. Integer vector indicating columns to exclude from all interaction terms.
#' @param exclude_these_expansions Default: NULL. Character vector specifying basis expansions to be excluded from the model. These must be named columns of the data, or in the form "_1_", "_2_", "_1_x_2_", "_2_^2" etc. where "1" and "2" indicate column indices of predictor matrix input.
#' @param custom_basis_fxn Default: NULL. Optional user-defined function for generating custom basis expansions. See \code{\link{get_polynomial_expansions}}.
#' @param include_constrain_fitted Default: TRUE. Logical switch to constrain fitted values at knot points.
#' @param include_constrain_first_deriv Default: TRUE. Logical switch to constrain first derivatives at knot points.
#' @param include_constrain_second_deriv Default: TRUE. Logical switch to constrain second derivatives at knot points.
#' @param include_constrain_interactions Default: TRUE. Logical switch to constrain interaction terms at knot points.
#' @param cl Default: NULL. Parallel processing cluster object for distributed computation (use \code{parallel::makeCluster()}).
#' @param chunk_size Default: NULL. Integer specifying custom fixed chunk size for parallel processing.
#' @param parallel_eigen Default: TRUE. Logical flag to enable parallel processing for eigenvalue decomposition computations.
#' @param parallel_trace Default: FALSE. Logical flag to enable parallel processing for trace computation.
#' @param parallel_aga Default: FALSE. Logical flag to enable parallel processing for specific matrix operations involving G and A.
#' @param parallel_matmult Default: FALSE. Logical flag to enable parallel processing for block-diagonal matrix multiplication.
#' @param parallel_unconstrained Default: TRUE. Logical flag to enable parallel processing for unconstrained maximum likelihood estimation.
#' @param parallel_find_neighbors Default: FALSE. Logical flag to enable parallel processing for neighbor identification (which partitions are neighbors).
#' @param parallel_penalty Default: FALSE. Logical flag to enable parallel processing for penalty matrix construction.
#' @param parallel_make_constraint Default: FALSE. Logical flag to enable parallel processing for constraint matrix generation.
#' @param unconstrained_fit_fxn Default: \code{\link{unconstrained_fit_default}}. Custom function for fitting unconstrained models per partition.
#' @param keep_weighted_Lambda Default: FALSE. Logical flag to retain generalized linear model weights in penalty constraints using Tikhonov parameterization. It is advised to turn this to TRUE when fitting non-canonical GLMs. The default \code{\link{unconstrained_fit_default}} by default assumes canonical GLMs for setting up estimating equations; this is not valid with non-canonical GLMs. With \code{keep_weighted_Lambda = TRUE}, the Tikhonov parameterization binds \eqn{\boldsymbol{\Lambda}^{1/2}}, the square-root penalty matrix, to the design matrix \eqn{\textbf{X}_k} for each partition k, and family$linkinv(0) to the response vector \eqn{\textbf{y}_k} for each partition before finding unconstrained estimates using base R's \code{glm.fit} function. The potential issue is that the weights of the information matrix will appear in the penalty, such that the effective penalty is \eqn{\boldsymbol{\Lambda}_\text{eff} = \textbf{L}^{1/2}\textbf{W}\textbf{L}^{1/2}} rather than just \eqn{\textbf{L}^{1/2}\textbf{L}^{1/2}}. If FALSE, this approach will only be used to supply initial values to a native implementation of damped Newton-Rapshon for fitting GLM models (see \code{\link{damped_newton_r}} and \code{\link{unconstrained_fit_default}}). For Gamma with log-link, this is fortunately a non-issue since the mean-variance relationship is essentially stabilized, so \code{keep_weighted_Lambda = TRUE} is strongly advised.
#' @param iterate_tune Default: TRUE. Logical switch to use iterative optimization during penalty tuning. If FALSE, \eqn{\textbf{G}} and \eqn{\textbf{U}} are constructed from unconstrained \eqn{\boldsymbol{\beta}} estimates when tuning.
#' @param iterate_final_fit Default: TRUE. Logical switch to use iterative optimization for final model fitting. If FALSE, \eqn{\textbf{G}} and \eqn{\textbf{U}} are constructed from unconstrained \eqn{\boldsymbol{\beta}} estimates when fitting the final model after tuning.
#' @param blockfit Default: FALSE. Logical switch to abandon per-partition fitting for non-spline effects without interactions, collapse all matrices into block-diagonal single-matrix form, and fit agnostic to partition. This would be more efficient for many non-spline effects without interactions and relatively few spline effects or non-spline effects with interactions. Ignored if \code{length(just_linear_without_interactions) = 0} after processing formulas and input.
#' @param qp_score_function Default: \eqn{\textbf{X}^{T}(\textbf{y} - \text{E}[\textbf{y}])}, where \eqn{\text{E}[\textbf{y}] = \boldsymbol{\mu}}. A function returning the score of the log-likelihood for optimization (excluding penalization/priors involving \eqn{\boldsymbol{\Lambda}}), which is needed for the formulation of quadratic programming problems, when \code{blockfit = TRUE}, and correlation-structure fitting for GLMs, all relying on \code{\link[quadprog]{solve.QP}}. Accepts arguments "X, y, mu, order_list, dispersion, VhalfInv, observation_weights, ..." in order. As shown in the examples below, a gamma log-link model requires \eqn{\textbf{X}^{T}\textbf{W}(\textbf{y} - \text{E}[\textbf{y}])} instead, with \eqn{\textbf{W}} a diagonal matrix of \eqn{\text{E}[\textbf{y}]^2} (Note: This example might be incorrect; check the specific score equation for Gamma log-link). This argument is not needed when fitting non-canonical GLMs without quadratic programming constraints or correlation structures, situations for which \code{keep_weighted_Lambda=TRUE} is sufficient.
#' @param qp_observations Default: NULL. Numeric vector of observations to apply constraints to for monotonic and range quadratic programming constraints. Useful for saving computational resources.
#' @param qp_Amat Default: NULL. Constraint matrix for quadratic programming formulation. The \code{Amat} argument of \code{\link[quadprog]{solve.QP}}.
#' @param qp_bvec Default: NULL. Constraint vector for quadratic programming formulation. The \code{bvec} argument of \code{\link[quadprog]{solve.QP}}.
#' @param qp_meq Default: 0. Number of equality constraints in quadratic programming setup. The \code{meq} argument of \code{\link[quadprog]{solve.QP}}.
#' @param qp_positive_derivative,qp_monotonic_increase Default: FALSE. Logical flags to constrain the function to have positive first derivatives/be monotonically increasing using quadratic programming with respect to the order (ascending rows) of the input data set.
#' @param qp_negative_derivative,qp_monotonic_decrease Default: FALSE. Logical flags to constrain the function to have negative first derivatives/be monotonically decreasing using quadratic programming with respect to the order (ascending rows) of the input data set.
#' @param qp_range_upper Default: NULL. Numeric upper bound for constrained fitted values using quadratic programming.
#' @param qp_range_lower Default: NULL. Numeric lower bound for constrained fitted values using quadratic programming.
#' @param qp_Amat_fxn Default: NULL. Custom function for generating Amat matrix in quadratic programming.
#' @param qp_bvec_fxn Default: NULL. Custom function for generating bvec vector in quadratic programming.
#' @param qp_meq_fxn Default: NULL. Custom function for determining meq equality constraints in quadratic programming.
#' @param constraint_values Default: \code{cbind()}. Matrix of constraint values for sum constraints. The constraint enforces \eqn{\textbf{C}^T(\boldsymbol{\beta} - \textbf{c}) = \boldsymbol{0}} in addition to smoothing constraints, where \eqn{\textbf{C}} = \code{constraint_vectors} and \eqn{\textbf{c}} = \code{constraint_values}.
#' @param constraint_vectors Default: \code{cbind()}. Matrix of vectors for sum constraints. The constraint enforces \eqn{\textbf{C}^T(\boldsymbol{\beta} - \textbf{c}) = \boldsymbol{0}} in addition to smoothing constraints, where \eqn{\textbf{C}} = \code{constraint_vectors} and \eqn{\textbf{c}} = \code{constraint_values}.
#' @param return_G Default: TRUE. Logical switch to return the unscaled variance-covariance matrix without smoothing constraints (\eqn{\textbf{G}}).
#' @param return_Ghalf Default: TRUE. Logical switch to return the matrix square root of the unscaled variance-covariance matrix without smoothing constraints (\eqn{\textbf{G}^{1/2}}).
#' @param return_U Default: TRUE. Logical switch to return the constraint projection matrix \eqn{\textbf{U}}.
#' @param estimate_dispersion Default: TRUE. Logical flag to estimate the dispersion parameter after fitting.
#' @param unbias_dispersion Default NULL. Logical switch to multiply final dispersion estimates by \eqn{N/(N-\text{trace}(\textbf{X}\textbf{U}\textbf{G}\textbf{X}^{T}))}, which in the case of Gaussian-distributed errors with identity link function, provides unbiased estimates of variance. When NULL (by default), gets set to TRUE for Gaussian + identity link and FALSE otherwise.
#' @param return_varcovmat Default: TRUE. Logical switch to return the variance-covariance matrix of the estimated coefficients. This is needed for performing Wald inference.
#' @param custom_penalty_mat Default: NULL. Optional \eqn{p \times p} custom penalty matrix for individual partitions to replace the default ridge penalty applied to linear-and-intercept terms only. This can be interpreted as proportional to the prior correlation matrix of coefficients for non-spline effects, and will appear in the penalty matrix for all partitions. It is recommended to first run the function using \code{expansions_only = TRUE} so you have an idea of where the expansions appear in each partition, what "p" is, and you can carefully customize your penalty matrix after.
#' @param cluster_args Default: \code{c(custom_centers = NA, nstart = 10)}. Named vector of arguments controlling clustering procedures. If the first argument is not NA, this will be treated as custom cluster centers and all other arguments ignored. Otherwise, default base R k-means clustering will be used with all other arguments supplied to \code{kmeans} (for example, by default, the "nstart" argument as provided). Custom centers must be a \eqn{K \times q} matrix with one column for each predictor in order of their appearance in input predictor/data, and one row for each center.
#' @param dummy_dividor Default: 0.00000000000000000000012345672152894. Small numeric constant to prevent division by zero in computational routines.
#' @param dummy_adder Default: 0.000000000000000002234567210529. Small numeric constant to prevent division by zero in computational routines.
#' @param verbose Default: FALSE. Logical flag to print general progress messages during model fitting (does not include during tuning).
#' @param verbose_tune Default: FALSE. Logical flag to print detailed progress messages during penalty tuning specifically.
#' @param expansions_only Default: FALSE. Logical switch to return only basis expansions without full model fitting. Useful for setting up custom constraints and penalties.
#' @param observation_weights Default: NULL. Numeric vector of observation-specific weights for generalized least squares estimation.
#' @param do_not_cluster_on_these Default: c(). Vector specifying predictor columns to exclude from clustering procedures, in addition to the non-spline effects by default.
#' @param neighbor_tolerance Default: 1 + 1e-8. Numeric tolerance for determining neighboring partitions using k-means clustering. Greater values means more partitions are likely to be considered neighbors. Intended for internal use only (modify at your own risk!).
#' @param null_constraint Default: NULL. Alternative parameterization of constraint values.
#' @param critical_value Default: \code{qnorm(1-0.05/2)}. Numeric critical value value used for constructing Wald confidence intervals of the form \eqn{\text{estimate} \pm \text{critical\_value} \times (\text{standard error})}.
#' @param data Default: NULL. Optional data frame providing context for formula-based model specification.
#' @param weights Default: NULL. Alternative name for observation weights, maintained for interface compatibility.
#' @param no_intercept Default: FALSE. Logical flag to remove intercept, constraining it to 0. The function automatically constructs constraint_vectors and constraint_values to achieve this. Calling formulas with a "0+" in it like \code{y ~ 0 + .} will set this option to TRUE.
#' @param correlation_id,spacetime Default: NULL. N-length vector and N-row matrix of cluster (or subject, group etc.) ids and longitudinal/spatial variables respectively, whereby observations within each grouping of \code{correlation_id} are correlated with respect to the variables submitted to \code{spacetime}.
#' @param correlation_structure Default: NULL. Native implementations of popular variance-covariance structures. Offers options for "exchangeable", "spatial-exponential", "squared-exponential", "ar(1)", "spherical", "gaussian-cosine", "gamma-cosine", and "matern", along with their aliases. The eponymous correlation structure is fit along with coefficients and dispersion, with correlation estimated using a REML objective. See section "Correlation Structure Estimation" for more details.
#' @param VhalfInv Default: NULL. Matrix representing a fixed, custom square-root-inverse covariance structure for the response variable of longitudinal and spatial modeling. Must be an \eqn{N \times N} matrix where N is number of observations. This matrix \eqn{\textbf{V}^{-1/2}} serves as a fixed transformation matrix for the response, equivalent to GLS with known covariance \eqn{\textbf{V}}. This is known as "whitening" in some literature.
#' @param Vhalf Default: NULL. Matrix representing a fixed, custom square-root covariance structure for the response variable of longitudinal and spatial modeling. Must be an \eqn{N \times N} matrix where N is number of observations. This matrix \eqn{\textbf{V}^{1/2}} is used when backtransforming coefficients for fitting GLMs with arbitrary correlation structure.
#' @param VhalfInv_fxn Default: NULL. Function for parametric modeling of the covariance structure \eqn{\textbf{V}^{-1/2}}. Must take a single numeric vector argument "par" and return an \eqn{N \times N} matrix. When provided with \code{VhalfInv_par_init}, this function is optimized via BFGS to find optimal covariance parameters that minimize the negative REML log-likelihood (or custom loss if \code{custom_VhalfInv_loss} is specified). The function must return a valid square root of the inverse covariance matrix - i.e., if \eqn{\textbf{V}} is the true covariance, \code{VhalfInv_fxn} should return \eqn{\textbf{V}^{-1/2}} such that \code{VhalfInv_fxn(par) * VhalfInv_fxn(par)} = \eqn{\textbf{V}^{-1}}.
#' @param Vhalf_fxn Default: NULL. Function for efficient computation of \eqn{\textbf{V}^{1/2}}, used only when optimizing correlation structures with non-canonical-Gaussian response.
#' @param VhalfInv_par_init Default: c(). Numeric vector of initial parameter values for \code{VhalfInv_fxn} optimization. When provided with \code{VhalfInv_fxn}, triggers optimization of the covariance structure. Length determines the dimension of the parameter space. For example, for AR(1) correlation, this could be a single correlation parameter; for unstructured correlation, this could be all unique elements of the correlation matrix.
#' @param REML_grad Default: NULL. Function for evaluating the gradient of the objective function (negative REML or custom loss) with respect to the parameters of \code{VhalfInv_fxn}. Must take the same "par" argument as \code{VhalfInv_fxn}, as well as second argument "model_fit" for the output of \code{lgspline.fit} and ellipses "..." as a third argument. It should return a vector of partial derivatives matching the length of par. When provided, enables more efficient optimization via analytical gradients rather than numerical approximation. Optional - if NULL, BFGS uses numerical gradients.
#' @param custom_VhalfInv_loss Default: NULL. Alternative to negative REML for serving as the objective function for optimizing correlation parameters. Must take the same "par" argument as \code{VhalfInv_fxn}, as well as second argument "model_fit" for the output of \code{lgspline.fit} and ellipses "..." as a third argument. It should return a numeric scalar.
#' @param VhalfInv_logdet Default: NULL. Function for efficient computation of \eqn{\log|\textbf{V}^{-1/2}|} that bypasses construction of the full \eqn{\textbf{V}^{-1/2}} matrix. Must take the same parameter vector 'par' as \code{VhalfInv_fxn} and return a scalar value equal to \eqn{\log(\det(\textbf{V}^{-1/2}))}. When NULL, the determinant is computed directly from \code{VhalfInv}, which can be computationally expensive for large matrices.
#' @param include_warnings Default: TRUE. Logical switch to control display of warning messages during model fitting.
#' @param ... Additional arguments passed to the unconstrained model fitting function.
#'
#' @return A list containing the following components:
#' \describe{
#' \item{y}{Original response vector.}
#' \item{ytilde}{Fitted/predicted values on the scale of the response.}
#' \item{X}{List of design matrices \eqn{\textbf{X}_k} for each partition k, containing basis expansions including intercept, linear, quadratic, cubic, and interaction terms as specified.}
#' \item{A}{Constraint matrix \eqn{\textbf{A}} encoding smoothness constraints at knot points and any user-specified linear constraints.}
#' \item{B}{List of fitted coefficients \eqn{\boldsymbol{\beta}_k} for each partition k on the original, unstandardized scale of the predictors and response.}
#' \item{B_raw}{List of fitted coefficients for each partition on the predictor-and-response standardized scale.}
#' \item{K}{Number of interior knots with one predictor (number of partitions minus 1 with > 1 predictor).}
#' \item{p}{Number of basis expansions of predictors per partition.}
#' \item{q}{Number of predictor variables.}
#' \item{P}{Total number of coefficients (\eqn{p \times (K+1)}).}
#' \item{N}{Number of observations.}
#' \item{penalties}{List containing optimized penalty matrices and components:
#' \itemize{
#' \item Lambda: Combined penalty matrix (\eqn{\boldsymbol{\Lambda}}), includes \eqn{\textbf{L}_{\text{predictor\_list}}} contributions but not \eqn{\textbf{L}_{\text{partition\_list}}}.
#' \item L1: Smoothing spline penalty matrix (\eqn{\textbf{L}_1}).
#' \item L2: Ridge penalty matrix (\eqn{\textbf{L}_2}).
#' \item L predictor list: Predictor-specific penalty matrices (\eqn{\textbf{L}_{\text{predictor\_list}}}).
#' \item L partition list: Partition-specific penalty matrices (\eqn{\textbf{L}_{\text{partition\_list}}}).
#' }
#' }
#' \item{knot_scale_transf}{Function for transforming predictors to standardized scale used for knot placement.}
#' \item{knot_scale_inv_transf}{Function for transforming standardized predictors back to original scale.}
#' \item{knots}{Matrix of knot locations on original unstandarized predictor scale for one predictor.}
#' \item{partition_codes}{Vector assigning observations to partitions.}
#' \item{partition_bounds}{Vector or matrix specifying the boundaries between partitions.}
#' \item{knot_expand_function}{Internal function for expanding data according to partition structure.}
#' \item{predict}{Function for generating predictions on new data.}
#' \item{assign_partition}{Function for assigning new observations to partitions.}
#' \item{family}{GLM family object specifying the error distribution and link function.}
#' \item{estimate_dispersion}{Logical indicating whether dispersion parameter was estimated.}
#' \item{unbias_dispersion}{Logical indicating whether dispersion estimates should be unbiased.}
#' \item{backtransform_coefficients}{Function for converting standardized coefficients to original scale.}
#' \item{forwtransform_coefficients}{Function for converting coefficients to standardized scale.}
#' \item{mean_y, sd_y}{Mean and standard deviation of response if standardized.}
#' \item{og_order}{Original ordering of observations before partitioning.}
#' \item{order_list}{List containing observation indices for each partition.}
#' \item{constraint_values, constraint_vectors}{Matrices specifying linear equality constraints if provided.}
#' \item{make_partition_list}{List containing partition information for > 1-D cases.}
#' \item{expansion_scales}{Vector of scaling factors used for standardizing basis expansions.}
#' \item{take_derivative, take_interaction_2ndderivative}{Functions for computing derivatives of basis expansions.}
#' \item{get_all_derivatives_insample}{Function for computing all derivatives on training data.}
#' \item{numerics}{Indices of numeric predictors used in basis expansions.}
#' \item{power1_cols, power2_cols, power3_cols, power4_cols}{Column indices for linear through quartic terms.}
#' \item{quad_cols}{Column indices for all quadratic terms (including interactions).}
#' \item{interaction_single_cols, interaction_quad_cols}{Column indices for linear-linear and linear-quadratic interactions.}
#' \item{triplet_cols}{Column indices for three-way interactions.}
#' \item{nonspline_cols}{Column indices for terms excluded from spline expansion.}
#' \item{return_varcovmat}{Logical indicating whether variance-covariance matrix was computed.}
#' \item{raw_expansion_names}{Names of basis expansion terms.}
#' \item{std_X, unstd_X}{Functions for standardizing/unstandardizing design matrices.}
#' \item{parallel_cluster_supplied}{Logical indicating whether a parallel cluster was supplied.}
#' \item{weights}{List of observation weights per partition.}
#' \item{G}{List of unscaled unconstrained variance-covariance matrices \eqn{\textbf{G}_k} per partition k if \code{return_G=TRUE}. Computed as \eqn{(\textbf{X}_k^T\textbf{X}_k + \boldsymbol{\Lambda}_\text{eff})^{-1}} for partition k.}
#' \item{Ghalf}{List of \eqn{\textbf{G}_k^{1/2}} matrices if \code{return_Ghalf=TRUE}.}
#' \item{U}{Constraint projection matrix \eqn{\textbf{U}} if \code{return_U=TRUE}. For K=0 and no constraints, returns identity. Otherwise, returns \eqn{\textbf{U} = \textbf{I} - \textbf{G}\textbf{A}(\textbf{A}^T\textbf{G}\textbf{A})^{-1}\textbf{A}^T}. Used for computing the variance-covariance matrix \eqn{\sigma^2 \textbf{U}\textbf{G}}.}
#' \item{sigmasq_tilde}{Estimated (or fixed) dispersion parameter \eqn{\tilde{\sigma}^2}.}
#' \item{trace_XUGX}{Effective degrees of freedom (\eqn{\text{trace}(\textbf{X}\textbf{U}\textbf{G}\textbf{X}^{T})}).}
#' \item{varcovmat}{Variance-covariance matrix of coefficient estimates if \code{return_varcovmat=TRUE}.}
#' \item{VhalfInv}{The \eqn{\textbf{V}^{-1/2}} matrix used for implementing correlation structures, if specified.}
#' \item{VhalfInv_fxn, Vhalf_fxn, VhalfInv_logdet, REML_grad}{Functions for generating \eqn{\textbf{V}^{-1/2}}, \eqn{\textbf{V}^{1/2}}, \eqn{\log|\textbf{V}^{-1/2}|}, and gradient of REML if provided.}
#' \item{VhalfInv_params_estimates}{Vector of estimated correlation parameters when using \code{VhalfInv_fxn}.}
#' \item{VhalfInv_params_vcov}{Approximate variance-covariance matrix of estimated correlation parameters from BFGS optimization.}
#' \item{wald_univariate}{Function for computing univariate Wald statistics and confidence intervals.}
#' \item{critical_value}{Critical value used for confidence interval construction.}
#' \item{generate_posterior}{Function for drawing from the posterior distribution of coefficients.}
#' \item{find_extremum}{Function for optimizing the fitted function.}
#' \item{plot}{Function for visualizing fitted curves.}
#' \item{quadprog_list}{List containing quadratic programming components if applicable.}
#' }
#'
#' When \code{expansions_only=TRUE} is used, a reduced list is returned containing only the following prior to any fitting or tuning:
#' \describe{
#' \item{X}{Design matrices \eqn{\textbf{X}_k}}
#' \item{y}{Response vectors \eqn{\textbf{y}_k}}
#' \item{A}{Constraint matrix \eqn{\textbf{A}}}
#' \item{penalties}{Penalty matrices}
#' \item{order_list, og_order}{Ordering information}
#' \item{expansion_scales, colnm_expansions}{Scaling and naming information}
#' \item{K, knots}{Knot information}
#' \item{make_partition_list, partition_codes, partition_bounds}{Partition information}
#' \item{constraint_vectors, constraint_values}{Constraint information}
#' \item{quadprog_list}{Quadratic programming components if applicable}
#' }
#' The returned object has class "lgspline" and provides comprehensive tools for
#' model interpretation, inference, prediction, and visualization. All
#' coefficients and predictions can be transformed between standardized and
#' original scales using the provided transformation functions. The object includes
#' both frequentist and Bayesian inference capabilities through Wald statistics
#' and posterior sampling. Advanced customization options are available for
#' analyzing arbitrarily complex study designs.
#' See \code{\link{Details}} for descriptions of the model fitting process.
#'
#' @examples
#'
#' ## ## ## ## Simple Examples ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
#' ## Simulate some data, fit using default settings, and plot
#' set.seed(1234)
#' t <- runif(2500, -10, 10)
#' y <- 2*sin(t) + -0.06*t^2 + rnorm(length(t))
#' model_fit <- lgspline(t, y)
#' plot(t, y, main = 'Observed Data vs. Fitted Function, Colored by Partition')
#' plot(model_fit, add = TRUE)
#'
#' ## Repeat using logistic regression, with univariate inference shown
#' # and alternative function call
#' y <- rbinom(length(y), 1, 1/(1+exp(-std(y))))
#' df <- data.frame(t = t, y = y)
#' model_fit <- lgspline(y ~ spl(t),
#' df,
#' opt = FALSE, # no tuning penalties
#' family = quasibinomial())
#' plot(t, y, main = 'Observed Data vs Fitted Function with Formulas and Derivatives',
#' ylim = c(-0.5, 1.05), cex.main = 0.8)
#' plot(model_fit,
#' show_formulas = TRUE,
#' text_size_formula = 0.65,
#' legend_pos = 'bottomleft',
#' legend_args = list(y.intersp = 1.1),
#' add = TRUE)
#' ## Notice how the coefficients match the formula, and expansions are
#' # homogenous across partitions without reparameterization
#' print(summary(model_fit))
#'
#' ## Overlay first and second derivatives of fitted function respectively
#' derivs <- predict(model_fit,
#' new_predictors = sort(t),
#' take_first_derivatives = TRUE,
#' take_second_derivatives = TRUE)
#' points(sort(t), derivs$first_deriv, col = 'gold', type = 'l')
#' points(sort(t), derivs$second_deriv, col = 'goldenrod', type = 'l')
#' legend('bottomright',
#' col = c('gold','goldenrod'),
#' lty = 1,
#' legend = c('First Derivative', 'Second Derivative'))
#'
#' ## Simple 2D example - including a non-spline effect
#' z <- seq(-2, 2, length.out = length(y))
#' df <- data.frame(Predictor1 = t,
#' Predictor2 = z,
#' Response = sin(y)+0.1*z)
#' model_fit <- lgspline(Response ~ spl(Predictor1) + Predictor1*Predictor2,
#' df)
#'
#' ## Notice, while spline effects change over partitions,
#' # interactions and non-spline effects are constrained to remain the same
#' coefficients <- Reduce('cbind', coef(model_fit))
#' colnames(coefficients) <- paste0('Partition ', 1:(model_fit$K+1))
#' print(coefficients)
#'
#' ## One or two variables can be selected for plotting at a time
#' # even when >= 3 predictors are present
#' plot(model_fit,
#' custom_title = 'Marginal Relationship of Predictor 1 and Response',
#' vars = 'Predictor1',
#' custom_response_lab = 'Response',
#' show_formulas = TRUE,
#' legend_pos = 'bottomright',
#' digits = 4,
#' text_size_formula = 0.5)
#' \donttest{
#' ## 3D plots are implemented as well, retaining analytical formulas
#' my_plot <- plot(model_fit,
#' show_formulas = TRUE,
#' custom_response_lab = 'Response')
#' my_plot
#'
#'
#' ## ## ## ## More Detailed 1D Example ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
#' ## 1D data generating functions
#' t <- seq(-9, 9, length.out = 1000)
#' slinky <- function(x) {
#' (50 * cos(x * 2) -2 * x^2 + (0.25 * x)^4 + 80)
#' }
#' coil <- function(x) {
#' (100 * cos(x * 2) +-1.5 * x^2 + (0.1 * x)^4 +
#' (0.05 * x^3) + (-0.01 * x^5) +
#' (0.00002 * x^6) -(0.000001 * x^7) + 100)
#' }
#' exponential_log <- function(x) {
#' unlist(c(sapply(x, function(xx) {
#' if (xx <= 1) {
#' 100 * (exp(xx) - exp(1))
#' } else {
#' 100 * (log(xx))
#' }
#' })))
#' }
#' scaled_abs_gamma <- function(x) {
#' 2*sqrt(gamma(abs(x)))
#' }
#'
#' ## Composite function
#' fxn <- function(x)(slinky(t) +
#' coil(t) +
#' exponential_log(t) +
#' scaled_abs_gamma(t))
#'
#' ## Bind together with random noise
#' dat <- cbind(t, fxn(t) + rnorm(length(t), 0, 50))
#' colnames(dat) <- c('t', 'y')
#' x <- dat[,'t']
#' y <- dat[,'y']
#'
#' ## Fit Model, 4 equivalent ways are shown below
#' model_fit <- lgspline(t, y)
#' model_fit <- lgspline(y ~ spl(t), as.data.frame(dat))
#' model_fit <- lgspline(response = y, predictors = t)
#' model_fit <- lgspline(data = as.data.frame(dat), formula = y ~ .)
#'
#' # This is not valid: lgspline(y ~ ., t)
#' # This is not valid: lgspline(y, data = as.data.frame(dat))
#' # Do not put operations in formulas, not valid: lgspline(y ~ log(t) + spl(t))
#'
#' ## Basic Functionality
#' predict(model_fit, new_predictors = rnorm(1)) # make prediction on new data
#' head(leave_one_out(model_fit)) # leave-one-out cross-validated predictions
#' coef(model_fit) # extract coefficients
#' summary(model_fit) # model information and Wald inference
#' generate_posterior(model_fit) # generate draws of parameters from posterior distribution
#' find_extremum(model_fit, minimize = TRUE) # find the minimum of the fitted function
#'
#' ## Incorporate range constraints, custom knots, keep penalization identical
#' # across partitions
#' model_fit <- lgspline(y ~ spl(t),
#' unique_penalty_per_partition = FALSE,
#' custom_knots = cbind(c(-2, -1, 0, 1, 2)),
#' data = data.frame(t = t, y = y),
#' qp_range_lower = -150,
#' qp_range_upper = 150)
#'
#' ## Plotting the constraints and knots
#' plot(model_fit,
#' custom_title = 'Fitted Function Constrained to Lie Between (-150, 150)',
#' cex.main = 0.75)
#' # knot locations
#' abline(v = model_fit$knots)
#' # lower bound from quadratic program
#' abline(h = -150, lty = 2)
#' # upper bound from quadratic program
#' abline(h = 150, lty = 2)
#' # observed data
#' points(t, y, cex = 0.24)
#'
#' ## Enforce monotonic increasing constraints on fitted values
#' # K = 4 => 5 partitions
#' t <- seq(-10, 10, length.out = 100)
#' y <- 5*sin(t) + t + 2*rnorm(length(t))
#' model_fit <- lgspline(t,
#' y,
#' K = 4,
#' qp_monotonic_increase = TRUE)
#' plot(t, y, main = 'Monotonic Increasing Function with Respect to Fitted Values')
#' plot(model_fit,
#' add = TRUE,
#' show_formulas = TRUE,
#' legend_pos = 'bottomright',
#' custom_predictor_lab = 't',
#' custom_response_lab = 'y')
#'
#' ## ## ## ## 2D Example using Volcano Dataset ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
#' ## Prep
#' data('volcano')
#' volcano_long <-
#' Reduce('rbind', lapply(1:nrow(volcano), function(i){
#' t(sapply(1:ncol(volcano), function(j){
#' c(i, j, volcano[i,j])
#' }))
#' }))
#' colnames(volcano_long) <- c('Length', 'Width', 'Height')
#'
#' ## Fit, with 50 partitions
#' # When fitting with > 1 predictor and large K, including quartic terms
#' # is highly recommended, and/or dropping the second-derivative constraint.
#' # Otherwise, the constraints can impose all partitions to be equal, with one
#' # cubic function fit for all (there isn't enough degrees of freedom to fit
#' # unique cubic functions due to the massive amount of constraints).
#' # Below, quartic terms are included and the constraint of second-derivative
#' # smoothness at knots is ignored.
#' model_fit <- lgspline(volcano_long[,c(1, 2)],
#' volcano_long[,3],
#' include_quadratic_interactions = TRUE,
#' K = 49,
#' opt = FALSE,
#' return_U = FALSE,
#' return_varcov = FALSE,
#' estimate_variance = TRUE,
#' return_Ghalf = FALSE,
#' return_G = FALSE,
#' include_constrain_second_deriv = FALSE,
#' unique_penalty_per_predictor = FALSE,
#' unique_penalty_per_partition = FALSE,
#' wiggle_penalty = 1e-5, # the fixed wiggle penalty
#' flat_ridge_penalty = 1e-4) # the ridge penalty
#'
#' ## Plotting on new data with interactive visual + formulas
#' new_input <- expand.grid(seq(min(volcano_long[,1]),
#' max(volcano_long[,1]),
#' length.out = 250),
#' seq(min(volcano_long[,2]),
#' max(volcano_long[,2]),
#' length.out = 250))
#' model_fit$plot(new_predictors = new_input,
#' show_formulas = TRUE,
#' custom_response_lab = "Height",
#' custom_title = 'Volcano 3-D Map',
#' digits = 2)
#'
#' ## ## ## ## Advanced Techniques using Trees Dataset ## ## ## ## ## ## ## ## ## ## ## ## ##
#' ## Goal here is to introduce how lgspline works with non-canonical GLMs and
#' # demonstrate some custom features
#' data('trees')
#'
#' ## L1-regularization constraint function on standardized coefficients
#' # Bound all coefficients to be less than a certain value (l1_bound) in absolute
#' # magnitude such that | B^{(j)}_k | < lambda for all j = 1....p coefficients,
#' # and k = 1...K+1 partitions.
#' l1_constraint_matrix <- function(p, K) {
#' ## Total number of coefficients
#' P <- p * (K + 1)
#'
#' ## Create diagonal matrices for L1 constraint
#' # First matrix: lamdba > -bound
#' # Second matrix: -lambda > -bound
#' first_diag <- diag(P)
#' second_diag <- -diag(P)
#'
#' ## Combine matrices
#' l1_Amat <- cbind(first_diag, second_diag)
#'
#' return(l1_Amat)
#' }
#'
#' ## Bounds absolute value of coefficients to be < l1_bound
#' l1_bound_vector <- function(qp_Amat,
#' scales,
#' l1_bound) {
#'
#' ## Combine matrices
#' l1_bvec <- rep(-l1_bound, ncol(qp_Amat)) * c(1, scales)
#'
#' return(l1_bvec)
#' }
#'
#' ## Fit model, using predictor-response formulation, assuming
#' # Gamma-distributed response, and custom quadratic-programming constraints,
#' # with qp_score_function/glm_weight_function updated for non-canonical GLMs
#' # as well as quartic terms, keeping the effect of height constant across
#' # partitions, and 3 partitions in total. Hence, this is an advanced-usage
#' # case.
#' # You can modify this code for performing l1-regularization in general.
#' # For canonical GLMs, the default qp_score_function/glm_weight_function are
#' # correct and do not need to be changed
#' # (custom functionality is not needed for canonical GLMs).
#' model_fit <- lgspline(
#' Volume ~ spl(Girth) + Height*Girth,
#' data = with(trees, cbind(Girth, Height, Volume)),
#' family = Gamma(link = 'log'),
#' keep_weighted_Lambda = TRUE,
#' glm_weight_function = function(
#' mu,
#' y,
#' order_indices,
#' family,
#' dispersion,
#' observation_weights,
#' ...){
#' rep(1/dispersion, length(y))
#' },
#' dispersion_function = function(
#' mu,
#' y,
#' order_indices,
#' family,
#' observation_weights,
#' ...){
#' mean(
#' mu^2/((y-mu)^2)
#' )
#' }, # = biased estimate of 1/shape parameter
#' need_dispersion_for_estimation = TRUE,
#' unbias_dispersion = TRUE, # multiply dispersion by N/(N-trace(XUGX^{T}))
#' K = 2, # 3 partitions
#' opt = FALSE, # keep penalties fixed
#' unique_penalty_per_partition = FALSE,
#' unique_penalty_per_predictor = FALSE,
#' flat_ridge_penalty = 1e-64,
#' wiggle_penalty = 1e-64,
#' qp_score_function = function(X, y, mu, order_list, dispersion, VhalfInv,
#' observation_weights, ...){
#' t(X) %**% diag(c(1/mu * 1/dispersion)) %**% cbind(y - mu)
#' }, # updated score for gamma regression with log link
#' qp_Amat_fxn = function(N, p, K, X, colnm, scales, deriv_fxn, ...) {
#' l1_constraint_matrix(p, K)
#' },
#' qp_bvec_fxn = function(qp_Amat, N, p, K, X, colnm, scales, deriv_fxn, ...) {
#' l1_bound_vector(qp_Amat, scales, 25)
#' },
#' qp_meq_fxn = function(qp_Amat, N, p, K, X, colnm, scales, deriv_fxn, ...) 0
#' )
#'
#' ## Notice, interaction effect is constant across partitions as is the effect
#' # of Height alone
#' Reduce('cbind', coef(model_fit))
#' print(summary(model_fit))
#'
#' ## Plot results
#' plot(model_fit, custom_predictor_lab1 = 'Girth',
#' custom_predictor_lab2 = 'Height',
#' custom_response_lab = 'Volume',
#' custom_title = 'Girth and Height Predicting Volume of Trees',
#' show_formulas = TRUE)
#'
#' ## Verify magnitude of unstandardized coefficients does not exceed bound (25)
#' print(max(abs(unlist(model_fit$B))))
#'
#' ## Find height and girth where tree volume is closest to 42
#' # Uses custom objective that minimizes MSE discrepancy between predicted
#' # value and 42.
#' # The vanilla find_extremum function can be thought of as
#' # using "function(mu)mu" aka the identity function as the
#' # objective, where mu = "f(t)", our estimated function. The derivative is then
#' # d_mu = "f'(t)" with respect to predictors t.
#' # But with more creative objectives, and since we have machinery for
#' # f'(t) already available, we can compute gradients for (and optimize)
#' # arbitrary differentiable functions of our predictors too.
#' # For any objective, differentiate w.r.t. to mu, then multiply by d_mu to
#' # satisfy chain rule.
#' # Here, we have objective function: 0.5*(mu-42)^2
#' # and gradient : (mu-42)*d_mu
#' # and L-BFGS-B will be used to find the height and girth that most closely
#' # yields a prediction of 42 within the bounds of the observed data.
#' # The d_mu also takes into account link function transforms automatically
#' # for most common link functions, and will return warning + instructions
#' # on how to program the link-function derivatives otherwise.
#'
#' ## Custom acquisition functions for Bayesian optimization could be coded here.
#' find_extremum(
#' model_fit,
#' minimize = TRUE,
#' custom_objective_function = function(mu, sigma, ybest, ...){
#' 0.5*(mu - 42)^2
#' },
#' custom_objective_derivative = function(mu, sigma, ybest, d_mu, ...){
#' (mu - 42) * d_mu
#' }
#' )
#'
#' ## ## ## ## How to Use Formulas in lgspline ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
#' ## Demonstrates splines with multiple mixed predictors and interactions
#'
#' ## Generate data
#' n <- 2500
#' x <- rnorm(n)
#' y <- rnorm(n)
#' z <- sin(x)*mean(abs(y))/2
#'
#' ## Categorical predictors
#' cat1 <- rbinom(n, 1, 0.5)
#' cat2 <- rbinom(n, 1, 0.5)
#' cat3 <- rbinom(n, 1, 0.5)
#'
#' ## Response with mix of effects
#' response <- y + z + 0.1*(2*cat1 - 1)
#'
#' ## Continuous predictors re-named
#' continuous1 <- x
#' continuous2 <- z
#'
#' ## Combine data
#' dat <- data.frame(
#' response = response,
#' continuous1 = continuous1,
#' continuous2 = continuous2,
#' cat1 = cat1,
#' cat2 = cat2,
#' cat3 = cat3
#' )
#'
#' ## Example 1: Basic Model with Default Terms, No Intercept
#' # standardize_response = FALSE often needed when constraining intercepts to 0
#' fit1 <- lgspline(
#' formula = response ~ 0 + spl(continuous1, continuous2) +
#' cat1*cat2*continuous1 + cat3,
#' K = 2,
#' standardize_response = FALSE,
#' data = dat
#' )
#' ## Examine coefficients included
#' rownames(fit1$B$partition1)
#' ## Verify intercept term is near 0 up to some numeric tolerance
#' abs(fit1$B[[1]][1]) < 1e-8
#'
#' ## Example 2: Similar Model with Intercept, Other Terms Excluded
#' fit2 <- lgspline(
#' formula = response ~ spl(continuous1, continuous2) +
#' cat1*cat2*continuous1 + cat3,
#' K = 1,
#' standardize_response = FALSE,
#' include_cubic_terms = FALSE,
#' exclude_these_expansions = c( # Not all need to actually be present
#' '_batman_x_robin_',
#' '_3_x_4_', # no cat1 x cat2 interaction, coded using column indices
#' 'continuous1xcontinuous2', # no continuous1 x continuous2 interaction
#' 'thejoker'
#' ),
#' data = dat
#' )
#' ## Examine coefficients included
#' rownames(Reduce('cbind',coef(fit2)))
#' # Intercept will probably be present and non-0 now
#' abs(fit2$B[[1]][1]) < 1e-8
#'
#' ## ## ## ## Compare Inference to survreg for Weibull AFT Model Validation ##
#' # Only linear predictors, no knots, no penalties, using Weibull AFT Model
#' # The goal here is to ensure that for the special case of no spline effects
#' # and no knots, this implementation will be consistent with other model
#' # implementations.
#' # Also note, that when using models (like Weibull AFT) where dispersion is
#' # being estimated and is required for estimating beta coefficients,
#' # we use a shur complement correction function to adjust (or "correct") our
#' # variance-covariance matrix for both estimation and inference to account for
#' # uncertainty in estimating the dispersion.
#' # Typically the shur_correction_function would return a negative-definite
#' # matrix, as it's output is elementwise added to the information matrix prior
#' # to inversion.
#' require(survival)
#' df <- data.frame(na.omit(
#' pbc[,c('time','trt','stage','hepato','bili','age','status')]
#' ))
#'
#' ## Weibull AFT using lgspline, showing how some custom options can be used to
#' # fit more complicated models
#' model_fit <- lgspline(time ~ trt + stage + hepato + bili + age,
#' df,
#' family = weibull_family(),
#' need_dispersion_for_estimation = TRUE,
#' dispersion_function = weibull_dispersion_function,
#' glm_weight_function = weibull_glm_weight_function,
#' shur_correction_function = weibull_shur_correction,
#' unconstrained_fit_fxn = unconstrained_fit_weibull,
#' opt = FALSE,
#' wiggle_penalty = 0,
#' flat_ridge_penalty = 0,
#' K = 0,
#' status = pbc$status!=0)
#' print(summary(model_fit))
#'
#' ## Survreg results match closely on estimates and inference for coefficients
#' survreg_fit <- survreg(Surv(time, status!=0) ~ trt + stage + hepato + bili + age,
#' df)
#' print(summary(survreg_fit))
#'
#' ## sigmasq_tilde = scale^2 of survreg
#' print(c(sqrt(model_fit$sigmasq_tilde), survreg_fit$scale))
#'
#' ## ## ## ## Modelling Correlation Structures ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
#' ## Setup
#' n_blocks <- 150 # Number of correlation_ids (subjects)
#' block_size <- 5 # Size of each correlation_ids (number of repeated measures per subj.)
#' N <- n_blocks * block_size # total sample size (balanced here)
#' rho_true <- 0.25 # True correlation
#'
#' ## Generate predictors and mean structure
#' t <- seq(-9, 9, length.out = N)
#' true_mean <- sin(t)
#'
#' ## Create block compound symmetric errors = I(1-p) + Jp
#' errors <- Reduce('rbind',
#' lapply(1:n_blocks,
#' function(i){
#' sigma <- diag(block_size) + rho_true *
#' (matrix(1, block_size, block_size) -
#' diag(block_size))
#' matsqrt(sigma) %*% rnorm(block_size)
#' }))
#'
#' ## Generate response with correlated errors
#' y <- true_mean + errors * 0.5
#'
#' ## Fit model with correlation structure
#' # include_warnings = FALSE is a good idea here, since many proposed
#' # correlations won't work
#' model_fit <- lgspline(t,
#' y,
#' K = 4,
#' correlation_id = rep(1:n_blocks, each = block_size),
#' correlation_structure = 'exchangeable',
#' include_warnings = FALSE
#' )
#'
#' ## Assess overall fit
#' plot(t, y, main = 'Sinosudial Fit Under Correlation Structure')
#' plot(model_fit, add = TRUE, show_formulas = TRUE, custom_predictor_lab = 't')
#'
#' ## Compare estimated vs true correlation
#' rho_est <- tanh(model_fit$VhalfInv_params_estimates)
#' print(c("True correlation:" = rho_true,
#' "Estimated correlation:" = rho_est))
#'
#' ## Quantify uncertainty in correlation estimate with 95% confidence interval
#' se <- c(sqrt(diag(model_fit$VhalfInv_params_vcov))) / sqrt(model_fit$N)
#' ci <- tanh(model_fit$VhalfInv_params_estimates + c(-1.96, 1.96)*se)
#' print("95% CI for correlation:")
#' print(ci)
#'
#' ## Also check SD (should be close to 0.5)
#' print(sqrt(model_fit$sigmasq_tilde))
#'
#' ## Toeplitz Simulation Setup, with demonstration of custom functions
#' # and boilerplate. Toep is not implemented by default, because it makes
#' # strong assumptions on the study design and missingness that are rarely met,
#' # with non-obvious workarounds.
#' # If a GLM was to-be-fit, you'd also submit a function "Vhalf_fxn" analogous
#' # to VhalfInv_fxn with same argument (par) and an output of an N x N matrix
#' # that yields the inverse of VhalfInv_fxn output.
#' n_blocks <- 150 # Number of correlation_ids
#' block_size <- 4 # Observations per correlation_id
#' N <- n_blocks * block_size # total sample size
#' rho_true <- 0.5 # True correlation within correlation_ids
#' true_intercept <- 2 # True intercept
#' true_slope <- 0.5 # True slope for covariate
#'
#' ## Create design matrix with meaningful predictors
#' Tmat <- matrix(0, N, 2)
#' Tmat[,1] <- 1 # Intercept
#' Tmat[,2] <- cos(rnorm(N)) # Continuous predictor
#'
#' ## True coefficients
#' beta <- c(true_intercept, true_slope)
#'
#' ## Create time and correlation_id variables
#' time_var <- rep(1:block_size, n_blocks)
#' correlation_id_var <- rep(1:n_blocks, each = block_size)
#'
#' ## Create block compound symmetric errors
#' errors <- Reduce('rbind',
#' lapply(1:n_blocks, function(i) {
#' sigma <- diag(block_size) + rho_true *
#' (matrix(1, block_size, block_size) -
#' diag(block_size))
#' matsqrt(sigma) %*% rnorm(block_size)
#' }))
#'
#' ## Generate response with correlated errors and covariate effect
#' y <- Tmat %*% beta + errors * 2
#'
#' ## Toeplitz correlation function
#' VhalfInv_fxn <- function(par) {
#' # Initialize correlation matrix
#' corr <- matrix(0, block_size, block_size)
#'
#' # Construct Toeplitz matrix with correlation by lag
#' for(i in 1:block_size) {
#' for(j in 1:block_size) {
#' lag <- abs(time_var[i] - time_var[j])
#' if(lag == 0) {
#' corr[i,j] <- 1
#' } else if(lag <= length(par)) {
#' # Use tanh to bound correlations between -1 and 1
#' corr[i,j] <- tanh(par[lag])
#' }
#' }
#' }
#'
#' ## Matrix square root inverse
#' corr_inv_sqrt <- matinvsqrt(corr)
#'
#' ## Expand to full matrix using Kronecker product
#' kronecker(diag(n_blocks), corr_inv_sqrt)
#' }
#'
#' ## Determinant function (for efficiency)
#' # This avoids taking determinant of N by N matrix
#' VhalfInv_logdet <- function(par) {
#' # Initialize correlation matrix
#' corr <- matrix(0, block_size, block_size)
#'
#' # Construct Toeplitz matrix
#' for(i in 1:block_size) {
#' for(j in 1:block_size) {
#' lag <- abs(time_var[i] - time_var[j])
#' if(lag == 0) {
#' corr[i,j] <- 1
#' } else if(lag <= length(par)) {
#' corr[i,j] <- tanh(par[lag])
#' }
#' }
#' }
#'
#' # Compute log determinant
#' log_det_invsqrt_corr <- -0.5 * determinant(corr, logarithm=TRUE)$modulus[1]
#' return(n_blocks * log_det_invsqrt_corr)
#' }
#'
#' ## REML gradient function
#' REML_grad <- function(par, model_fit, ...) {
#' ## Initialize gradient vector
#' n_par <- length(par)
#' gradient <- numeric(n_par)
#'
#' ## Get dimensions and organize data
#' nr <- nrow(model_fit$X[[1]])
#'
#' ## Process derivatives one parameter at a time
#' for(p in 1:n_par) {
#' ## Initialize derivative matrix
#' dV <- matrix(0, nrow(model_fit$VhalfInv), ncol(model_fit$VhalfInv))
#' V <- matrix(0, nrow(model_fit$VhalfInv), ncol(model_fit$VhalfInv))
#'
#' ## Compute full correlation matrix and its derivative for parameter p
#' for(clust in unique(correlation_id_var)) {
#' inds <- which(correlation_id_var == clust)
#' block_size <- length(inds)
#'
#' ## Initialize block matrices
#' V_block <- matrix(0, block_size, block_size)
#' dV_block <- matrix(0, block_size, block_size)
#'
#' ## Construct Toeplitz matrix and its derivative
#' for(i in 1:block_size) {
#' for(j in 1:block_size) {
#' ## Compute lag between observations
#' lag <- abs(time_var[i] - time_var[j])
#'
#' ## Diagonal is always 1
#' if(i == j) {
#' V_block[i,j] <- 1
#' dV_block[i,j] <- 0
#' } else {
#' ## Correlation for off-diagonal depends on lag
#' if(lag <= length(par)) {
#' ## Correlation via tanh parameterization
#' V_block[i,j] <- tanh(par[lag])
#'
#' ## Derivative for the relevant parameter
#' if(lag == p) {
#' ## Chain rule for tanh: d/dx tanh(x) = 1 - tanh^2(x)
#' dV_block[i,j] <- 1 - tanh(par[p])^2
#' }
#' }
#' }
#' }
#' }
#'
#' ## Assign blocks to full matrices
#' V[inds, inds] <- V_block
#' dV[inds, inds] <- dV_block
#' }
#'
#' ## GLM Weights based on current model fit (all 1s for normal)
#' glm_weights <- rep(1, model_fit$N)
#'
#' ## Quadratic form contribution
#' resid <- model_fit$y - model_fit$ytilde
#' VinvResid <- model_fit$VhalfInv %**% cbind(resid) / glm_weights
#' quad_term <- -0.5 * ((t(VinvResid) %**% dV) %**% VinvResid) /
#' model_fit$sigmasq_tilde
#'
#' ## Log|V| contribution - trace term
#' trace_term <- 0.5 * sum(diag(model_fit$VhalfInv %**%
#' model_fit$VhalfInv %**%
#' dV))
#'
#' ## Information matrix contribution
#' U <- t(t(model_fit$U) * rep(c(1, model_fit$expansion_scales),
#' model_fit$K + 1)) /
#' model_fit$sd_y
#' VhalfInvX <- model_fit$VhalfInv %**%
#' collapse_block_diagonal(model_fit$X)[unlist(
#' model_fit$og_order
#' ),] %**%
#' U
#'
#' ## Lambda computation for GLMs
#' if(length(model_fit$penalties$L_partition_list) != (model_fit$K + 1)){
#' model_fit$penalties$L_partition_list <- lapply(
#' 1:(model_fit$K + 1), function(k)0
#' )
#' }
#' Lambda <- U %**% collapse_block_diagonal(
#' lapply(1:(model_fit$K + 1),
#' function(k)
#' c(1, model_fit$expansion_scales) * (
#' model_fit$penalties$L_partition_list[[k]] +
#' model_fit$penalties$Lambda) %**%
#' diag(c(1, model_fit$expansion_scales)) /
#' model_fit$sd_y^2
#' )
#' ) %**% t(U)
#'
#' XVinvX_inv <- invert(gramMatrix(t(t(VhalfInvX)*c(glm_weights))) +
#' Lambda)
#' VInvX <- model_fit$VhalfInv %**% VhalfInvX
#' sc <- sqrt(norm(VInvX, '2'))
#' VInvX <- VInvX/sc
#' dXVinvX <-
#' (XVinvX_inv %**% t(VInvX)) %**%
#' (dV %**% VInvX)
#' XVinvX_term <- -0.5 * colSums(cbind(c(diag(dXVinvX) * sc))) * sc
#'
#' ## Store gradient component (all three terms)
#' gradient[p] <- as.numeric(quad_term + trace_term + XVinvX_term)
#' }
#'
#' ## Return normalized gradient
#' return(gradient / model_fit$N)
#' }
#'
#' ## Visualization
#' plot(time_var, y, col = correlation_id_var,
#' main = "Simulated Data with Toeplitz Correlation")
#'
#' ## Fit model with custom Toeplitz (takes ~ 5-10 minutes on my laptop)
#' # Note, for GLMs, efficiency would be improved by supplying a Vhalf_fxn
#' # although strictly only VhalfInv_fxn and VhalfInv_par_init are needed
#' model_fit <- lgspline(
#' response = y,
#' predictors = Tmat[,2],
#' K = 4,
#' VhalfInv_fxn = VhalfInv_fxn,
#' VhalfInv_logdet = VhalfInv_logdet,
#' REML_grad = REML_grad,
#' VhalfInv_par_init = c(0, 0, 0),
#' include_warnings = FALSE
#' )
#'
#' ## Print comparison of true and estimated correlations
#' rho_true <- rep(0.5, 3)
#' rho_est <- tanh(model_fit$VhalfInv_params_estimates)
#' cat("Correlation Estimates:\n")
#' print(data.frame(
#' "True Correlation" = rho_true,
#' "Estimated Correlation" = rho_est
#' ))
#'
#' ## Should be ~ 2
#' print(sqrt(model_fit$sigmasq_tilde))
#'
#' ## ## ## ## Parallelism ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
#' ## Data generating function
#' a <- runif(500000, -9, 9)
#' b <- runif(500000, -9, 9)
#' c <- rnorm(500000)
#' d <- rpois(500000, 1)
#' y <- sin(a) + cos(b) - 0.2*sqrt(a^2 + b^2) +
#' abs(a) + b +
#' 0.5*(a^2 + b^2) +
#' (1/6)*(a^3 + b^3) +
#' a*b*c -
#' c +
#' d +
#' rnorm(500000, 0, 5)
#'
#' ## Set up cores
#' cl <- parallel::makeCluster(1)
#' on.exit(parallel::stopCluster(cl))
#'
#' ## This example shows some options for what operations can be parallelized
#' # By default, only parallel_eigen and parallel_unconstrained are TRUE
#' # G, G^{-1/2}, and G^{1/2} are computed in parallel across each of the
#' # K+1 partitions.
#' # However, parallel_unconstrained only affects GLMs without corr. components
#' # - it does not affect fitting here
#' system.time({
#' parfit <- lgspline(y ~ spl(a, b) + a*b*c + d,
#' data = data.frame(y = y,
#' a = a,
#' b = b,
#' c = c,
#' d = d),
#' cl = cl,
#' K = 1,
#' parallel_eigen = TRUE,
#' parallel_unconstrained = TRUE,
#' parallel_aga = FALSE,
#' parallel_find_neighbors = FALSE,
#' parallel_trace = FALSE,
#' parallel_matmult = FALSE,
#' parallel_make_constraint = FALSE,
#' parallel_penalty = FALSE)
#' })
#' parallel::stopCluster(cl)
#' print(summary(parfit))
#' }
#'
#' @seealso
#' \itemize{
#' \item \code{\link[quadprog]{solve.QP}} for quadratic programming optimization
#' \item \code{\link[plotly]{plot_ly}} for interactive plotting
#' \item \code{\link[stats]{kmeans}} for k-means clustering
#' \item \code{\link[stats]{optim}} for general purpose optimization routines
#' }
#'
#' @export
lgspline <- function(
predictors = NULL,
y = NULL,
formula = NULL,
response = NULL,
standardize_response = TRUE,
standardize_predictors_for_knots = TRUE,
standardize_expansions_for_fitting = TRUE,
family = gaussian(),
glm_weight_function = function(mu,
y,
order_indices,
family,
dispersion,
observation_weights,
...){
if(any(!is.null(observation_weights))){
family$variance(mu) * observation_weights
} else {
family$variance(mu)
}
},
shur_correction_function = function(X,
y,
B,
dispersion,
order_list,
K,
family,
observation_weights,
...){
lapply(1:(K+1), function(k)0)
},
need_dispersion_for_estimation = FALSE,
dispersion_function = function(mu,
y,
order_indices,
family,
observation_weights,
...){ 1 },
K = NULL,
custom_knots = NULL,
cluster_on_indicators = FALSE,
make_partition_list = NULL,
previously_tuned_penalties = NULL,
smoothing_spline_penalty = NULL,
opt = TRUE,
use_custom_bfgs = TRUE,
delta = NULL,
tol = 10*sqrt(.Machine$double.eps),
invsoftplus_initial_wiggle = c(-25, 20, -15, -10, -5),
invsoftplus_initial_flat = c(-14, -7),
wiggle_penalty = 2e-7,
flat_ridge_penalty = 0.5,
unique_penalty_per_partition = TRUE,
unique_penalty_per_predictor = TRUE,
meta_penalty = 1e-8,
predictor_penalties = NULL,
partition_penalties = NULL,
include_quadratic_terms = TRUE,
include_cubic_terms = TRUE,
include_quartic_terms = NULL,
include_2way_interactions = TRUE,
include_3way_interactions = TRUE,
include_quadratic_interactions = FALSE,
offset = c(),
just_linear_with_interactions = NULL,
just_linear_without_interactions = NULL,
exclude_interactions_for = NULL,
exclude_these_expansions = NULL,
custom_basis_fxn = NULL,
include_constrain_fitted = TRUE,
include_constrain_first_deriv = TRUE,
include_constrain_second_deriv = TRUE,
include_constrain_interactions = TRUE,
cl = NULL,
chunk_size = NULL,
parallel_eigen = TRUE,
parallel_trace = FALSE,
parallel_aga = FALSE,
parallel_matmult = FALSE,
parallel_unconstrained = TRUE,
parallel_find_neighbors = FALSE,
parallel_penalty = FALSE,
parallel_make_constraint = FALSE,
unconstrained_fit_fxn = unconstrained_fit_default,
keep_weighted_Lambda = FALSE,
iterate_tune = TRUE,
iterate_final_fit = TRUE,
blockfit = FALSE,
qp_score_function = function(X, y, mu, order_list, dispersion, VhalfInv, observation_weights, ...) {
if(!is.null(observation_weights)) {
crossprod(X, cbind((y - mu)*observation_weights))
} else {
crossprod(X, cbind(y - mu))
}
},
qp_observations = NULL,
qp_Amat = NULL,
qp_bvec = NULL,
qp_meq = 0,
qp_positive_derivative = FALSE,
qp_negative_derivative = FALSE,
qp_monotonic_increase = FALSE,
qp_monotonic_decrease = FALSE,
qp_range_upper = NULL,
qp_range_lower = NULL,
qp_Amat_fxn = NULL,
qp_bvec_fxn = NULL,
qp_meq_fxn = NULL,
constraint_values = cbind(),
constraint_vectors = cbind(),
return_G = TRUE,
return_Ghalf = TRUE,
return_U = TRUE,
estimate_dispersion = TRUE,
unbias_dispersion = NULL,
return_varcovmat = TRUE,
custom_penalty_mat = NULL,
cluster_args = c(custom_centers = NA, nstart = 10),
dummy_dividor = 0.00000000000000000000012345672152894,
dummy_adder = 0.000000000000000002234567210529,
verbose = FALSE,
verbose_tune = FALSE,
expansions_only = FALSE,
observation_weights = NULL,
do_not_cluster_on_these = c(),
neighbor_tolerance = 1 + 1e-8,
null_constraint = NULL,
critical_value = qnorm(1-0.05/2),
data = NULL,
weights = NULL,
no_intercept = FALSE,
correlation_id = NULL,
spacetime = NULL,
correlation_structure = NULL,
VhalfInv = NULL,
Vhalf = NULL,
VhalfInv_fxn = NULL,
Vhalf_fxn = NULL,
VhalfInv_par_init = c(),
REML_grad = NULL,
custom_VhalfInv_loss = NULL,
VhalfInv_logdet = NULL,
include_warnings = TRUE,
...
){
if(verbose){
cat('Pre-Processing\n')
}
## Unbiasing dispersion is automatically done for Gaussian + identity only
if(paste0(family)[1] == 'gaussian' &
paste0(family)[2] == 'identity' &
is.null(unbias_dispersion)){
unbias_dispersion <- TRUE
} else if(is.null(unbias_dispersion)){
unbias_dispersion <- FALSE
}
## Update naming conventions, if first argument is a formula and second is a
# data frame, assumed by R-like interfaces for user convenience.
if(any(!is.null(predictors)) & any(!is.null(y))){
if(any(inherits(y,'data.frame') & inherits(predictors, "formula"))){
data <- y
}
}
## Update naming conventions, if response supplied in place of "y" for user
# convenience.
if(any(is.null(predictors)) & any(!is.null(formula))){
predictors <- formula
} else if(any(is.null(predictors))){
stop('\n \t Predictors argument is NULL without formula supplied.',
' Either supply a formula to predictors OR formula argument, or a',
' data frame to predictors argument, or a matrix of numeric predictors',
' to the predictor argument. \n')
}
## Update naming conventions, if response supplied in place of "y" for user
# convenience.
if(any(is.null(y)) & any(!is.null(response))){
y <- response
response <- NULL
}
## Weights is just an R-friendly argument to be passed to observation_weights
# actually used by the function.
if(any(is.null(observation_weights)) &
any(!is.null(weights))){
observation_weights <- weights
weights <- NULL
}
## Check cluster args for compatibility
if(any(!is.na(cluster_args[[1]]))){
ncluster <- try({nrow(cluster_args[[1]])},
silent = TRUE)
if(any(inherits(ncluster, 'try-error'))){
stop('\n \t custom_centers should be a matrix; do not include any other',
' arguments within cluster_args if you include custom_centers. \n')
}
if(!is.null(K)){
if(ncluster != (K+1) & include_warnings){
warning('\n \t K must be equal to number of custom_centers minus 1. ',
'Updating K for compatibility. \n')
K <- ncluster - 1
}
} else {
K <- ncluster - 1
}
}
## Check data and formula argument
if(!is.null(data) & !inherits(predictors, "formula")) {
stop("\n \t If submitting data argument, formula must be supplied and',
' variables must match. Otherwise, use predictors and y (or response)',
' arguments directly.\n",
"\t Example: lgspline(y ~ spl(x1, x2) + x3 + x4*x5, data = my_data)\n",
"\t Example: lgspline(y ~ ., data = my_data)\n")
}
## Handle formula interface
if(inherits(predictors, "formula")) {
## Check data argument
if(is.null(data)) {
stop("\n \t When using formula interface, data argument must be ",
"provided.\n",
"\t Example: lgspline(y ~ spl(x1, x2) + x3 + x4*x5, data = my_data)\n",
"\t Example: lgspline(y ~ ., data = my_data)\n")
}
## Try to coerce data to data.frame
tryCatch({
data <- as.data.frame(data)
}, error = function(e) {
stop("\n \t Could not coerce data argument to data.frame. ",
"Please provide data in a format coercible to data.frame. ",
"Examples: data.frame, tibble, or matrix.")
})
## Check column names are present
if(any(is.null(colnames(data)))){
stop('\n \t Column names of data must be supplied if formula is',
' supplied, and column names must match what is provided in formula. \n')
}
## Stringed formula
form_paste0 <- paste0(predictors)
## If formula is y ~ ., replace with y ~ spl(x1, x2, ...)
if(form_paste0[1] == '~' &
(gsub(' ', '', form_paste0[3]) %in% c('.', '0+.','.+0')) &
length(form_paste0) == 3){
if(form_paste0[2] == '.'){
stop('\n \t . ~ . is not valid for this function, specify the',
' response directly, like "y ~ . \n"')
} else if(gsub(' ', '', form_paste0[3]) == '0+.' |
gsub(' ', '', form_paste0[3]) == '.+0'){
## No intercept
predictors <- as.formula(
paste0(form_paste0[2],
' ~ 0 + spl(',
paste(colnames(data)[-which(colnames(data) ==
form_paste0[2])],
collapse = ', '),
')')
)
} else {
## With intercept
predictors <- as.formula(
paste0(form_paste0[2],
' ~ spl(',
paste(colnames(data)[-which(colnames(data) ==
form_paste0[2])],
collapse = ', '),
')')
)
}
}
## If offset() appears in the formula, remove offset from the formula
# and set offset = names of terms inside the offset() operator
if(any(grepl('offset', paste0(predictors)))){
if(any(grepl("offset\\(", deparse(predictors)))){
## Extract the original formula as string
form_str <- deparse(predictors)
## Find all offset terms using regex
offset_matches <- gregexpr("offset\\(([^)]+)\\)", form_str)
offset_vars <- regmatches(form_str, offset_matches)[[1]]
## Extract variable names from offset terms
offset_var_names <- gsub("offset\\((.*)\\)", "\\1", offset_vars)
## Remove whitespace
offset_var_names <- trimws(offset_var_names)
## Add to offset vector
offset <- c(offset, offset_var_names)
## Replace offset(var) with var in formula
for(i in seq_along(offset_vars)) {
form_str <- gsub(offset_vars[i], offset_var_names[i],
form_str,
fixed=TRUE)
}
## Convert back to formula
predictors <- as.formula(form_str)
}
}
## Check that all formula predictors are numeric in data
terms <- terms(predictors)
term_labels <- attr(terms, "term.labels")
non_numeric <- c(1:ncol(data))[!sapply(data, is.numeric)]
if(length(non_numeric) > 0) {
for(v in non_numeric){
if(any(term_labels == colnames(data)[v])){
stop("Non-numeric columns detected in predictors. ",
"Convert these to numeric before proceeding. ",
"You can use create_onehot() on categorical",
" columns of your dataset to obtain binary ",
"indicator variables if you need to. Remember " ,
"to remove the original categorical variable ",
"and append the indicators to your data ",
"before adjusting your formula and call ",
"to lgspline(...). \n\nSee ?create_one_hot for an example.")
}
}
}
## Parse rest of formula
has_3way <- any(attr(terms, "order") == 3)
if(include_3way_interactions) include_3way_interactions <- has_3way
## Check for no intercept specification in formula
if(inherits(predictors, "formula")) {
formula_text <- gsub(" ", "", Reduce(paste, deparse(predictors)))
if(grepl("\\+0|0\\+", formula_text)) {
no_intercept <- TRUE
}
}
## Get the factors matrix which shows interaction structure
factors <- attr(terms, "factors")
## Initialize term containers
spline_terms <- character()
linear_no_int <- character()
linear_with_int <- character()
## First pass to identify spline terms and extract their variables
for(term in term_labels) {
if(grepl("^spl\\(.*\\)$", term)) {
vars <- gsub("^spl\\((.*)\\)$", "\\1", term)
spline_terms <- c(spline_terms, trimws(strsplit(vars, ",")[[1]]))
}
}
## Extract explicit interactions from formula
formula_interactions <- term_labels[attr(terms, "order") > 1]
allowed_interactions <- sapply(formula_interactions, function(term) {
if(grepl(":", term)) {
vars <- strsplit(term, ":")[[1]]
# Only keep interactions where NO term is a spline term
if(!any(vars %in% spline_terms)) {
return(term)
}
}
return(NULL)
})
## Identify interaction structure using factors matrix
var_names <- rownames(factors)[-1] # Remove responses
## Extract all types of variables in formula
all_formula_vars <- unique(c(spline_terms, var_names))
formula_cols <- which(colnames(data) %in% all_formula_vars)
## Match custom exclusions nominal to numeric formula columns, if available
if(inherits(exclude_interactions_for, 'character')){
exclude_interactions_for <-
unlist(lapply(exclude_interactions_for,
function(var){
grep(var,
colnames(data)[formula_cols])
}))
}
if(inherits(just_linear_with_interactions, 'character')){
just_linear_with_interactions <-
unlist(lapply(just_linear_with_interactions,
function(var){
grep(var,
colnames(data)[formula_cols])
}))
}
if(inherits(just_linear_without_interactions, 'character')){
just_linear_without_interactions <-
unlist(lapply(just_linear_without_interactions,
function(var){
grep(var,
colnames(data)[formula_cols])
}))
}
if(length(exclude_these_expansions) > 0){
for(ii in 1:length(exclude_these_expansions)){
exps <- exclude_these_expansions[ii]
if(substr(exps, 1, 1) != "_" |
substr(exps, length(exps), length(exps)) != "_"){
for(jj in 1:length(formula_cols)){
if(grepl(colnames(data)[formula_cols[jj]],
exclude_these_expansions[ii])){
exclude_these_expansions[ii] <- gsub(
colnames(data)[formula_cols[jj]],
paste0('_', jj, '_'),
exclude_these_expansions[ii]
)
}
}
}
}
}
if(inherits(offset, 'character')){
offset <-
unlist(lapply(offset,
function(var){
grep(var,
colnames(data)[formula_cols])
}))
}
## Non-spline variables that appear in interactions (from factors matrix)
interaction_terms <- term_labels[attr(terms, "order") > 1]
nonspline_interact_vars <- unique(sapply(interaction_terms,
function(term) {
if(grepl(":", term)) {
vars <-
strsplit(term,":")[[1]]
vars[!vars %in%
spline_terms]
}
}))
## Add explicit interactions for spline terms
if(length(spline_terms) > 1) {
# Generate all possible interactions between spline terms
spline_interactions <- utils::combn(spline_terms, 2, simplify=FALSE)
spline_triplets <- if(length(spline_terms) >= 3) {
utils::combn(spline_terms, 3, simplify=FALSE)
} else {
list()
}
# Add 2-way interactions
for(pair in spline_interactions) {
interaction_terms <- c(interaction_terms, paste(pair, collapse=":"))
}
# Add 3-way interactions
for(triplet in spline_triplets) {
interaction_terms <- c(interaction_terms, paste(triplet, collapse=":"))
}
}
## Get indices of variables in raw expansions (after response removed)
resp_ind <- which(colnames(data) == paste0(terms[[2]]))
var_positions <- match(var_names, colnames(data[,formula_cols]))
## Get allowed interaction pairs
allowed_pairs <- lapply(interaction_terms[grepl(":", interaction_terms)],
function(term) {
vars <- strsplit(term, ":")[[1]]
match(vars, colnames(data[,formula_cols]))
})
## Generate exclusion patterns for non-spline vars
exclude_patterns <- c()
## Get all possible 2-way interactions between ANY variables
vars <- colnames(data[,formula_cols])
if(include_2way_interactions){
for(ii in seq_along(vars)) {
for(jj in seq_along(vars)) {
if(ii != jj) {
pattern <- get_interaction_patterns(c(vars[ii], vars[jj]))
## Skip if not in formula
if(any(!(c(vars[ii], vars[jj]) %in% all_formula_vars))){
next
}
## Skip if any variable in exclude_interactions_for
if(!is.null(exclude_interactions_for)){
if(any(c(ii, jj) %in% exclude_interactions_for)){
next
}
}
## Only keep spline-spline interactions and explicit interactions
if(!all(c(vars[ii], vars[jj]) %in% spline_terms) &
length(spline_terms) > 0 &
all(c(ii, jj) %in% formula_cols)){
exclude_patterns <- c(exclude_patterns, pattern)
}
}
}
}
}
## Add all possible 3-way interactions to exclusions
if(include_3way_interactions) {
for(ii in seq_along(vars)) {
for(jj in seq_along(vars)) {
for(kk in seq_along(vars)) {
if(ii != jj && jj != kk && ii != kk) {
triplet_vars <- c(vars[ii], vars[jj], vars[kk])
pattern <- get_interaction_patterns(triplet_vars)
## Skip if not in formula
if(any(!(triplet_vars %in% all_formula_vars))){
next
}
## Skip if any variable in exclude_interactions_for
if(!is.null(exclude_interactions_for)){
if(any(c(ii, jj, kk) %in% exclude_interactions_for)){
next
}
}
## Skip if only spline terms
if(all(triplet_vars %in% spline_terms)){
next
}
## If ANY var is a spline term but not ALL are spline terms,
# we should exclude this interaction
if(any(triplet_vars %in% spline_terms) &&
!all(triplet_vars %in% spline_terms)) {
exclude_patterns <- c(exclude_patterns, pattern)
next
}
## For non-spline terms, allow explicitly specified interactions
if(!any(triplet_vars %in% spline_terms)) {
## Get interaction terms that could involve these variables
relevant_terms <- interaction_terms[grepl(paste(triplet_vars,
collapse="|"),
interaction_terms)]
## Check if this exact triplet exists in any order
matches_interaction <- any(sapply(strsplit(relevant_terms, ":"),
function(term) {
length(term) == 3 &&
all(sort(triplet_vars) == sort(term))
}))
if(!matches_interaction) {
exclude_patterns <- c(exclude_patterns, pattern)
}
}
}
}
}
}
}
## Helper-vector for the next step, in isolating which interactions are
# not part of the same grouping/block.
# The above only works if we have only 1 additive interaction effect.
# the code below corrects for when we have multiple (for example, a*b + c*d)
# and want to prevent unspecified interactions (for example, remove a*d and b*c)
different_grouping_exclusions <- c()
## Explicitly track allowed interactions from * and : terms
allowed_interaction_pairs <- list()
## Track which interactions are explicitly specified
for(term in term_labels) {
# Handle * interactions
if(grepl("\\*", term)) {
vars <- trimws(strsplit(term, "\\*")[[1]])
# Ensure exactly 2 variables in the interaction
if(length(vars) == 2) {
allowed_interaction_pairs[[length(allowed_interaction_pairs) + 1]] <-
list(pair = vars,
transforms = c(
paste(vars, collapse = "x"),
paste(rev(vars), collapse = "x")
)
)
}
}
# Handle : interactions
if(grepl(":", term)) {
vars <- trimws(strsplit(term, ":")[[1]])
# Ensure exactly 2 variables in the interaction
if(length(vars) == 2) {
allowed_interaction_pairs[[length(allowed_interaction_pairs) + 1]] <-
list(pair = vars,
transforms = c(
paste(vars, collapse = "x"),
paste(rev(vars), collapse = "x")
)
)
}
}
}
## Identify spline block variables
spline_block_vars <- c()
for(term in term_labels) {
if(grepl("^spl\\(.*\\)$", term)) {
vars <- gsub("^spl\\((.*)\\)$", "\\1", term)
term_vars <- trimws(strsplit(vars, ",")[[1]])
spline_block_vars <- c(spline_block_vars, term_vars)
}
}
## Generate exclusions for any interaction not in the allowed pairs
vars <- colnames(data[,formula_cols])
if(include_2way_interactions | include_3way_interactions){
for(ii in seq_along(vars)) {
for(jj in seq_along(vars)) {
if(ii != jj) {
pair <- c(vars[ii], vars[jj])
current_transform <- paste(pair, collapse = "x")
## Skip if not in formula
if(any(!(pair %in% all_formula_vars))){
next
}
## Skip if any variable in exclude_interactions_for
if(!is.null(exclude_interactions_for)){
if(any(c(ii, jj) %in% exclude_interactions_for)){
next
}
}
## Check if this is an allowed interaction
is_allowed_interaction <- FALSE
for(allowed in allowed_interaction_pairs) {
if(setequal(pair, allowed$pair) ||
any(current_transform == allowed$transforms)) {
is_allowed_interaction <- TRUE
break
}
}
## Check if this is a spline block interaction
is_spline_block_interaction <- all(pair %in% spline_block_vars)
## If not an allowed or spline block interaction,
# generate exclusion patterns
if(!is_allowed_interaction && !is_spline_block_interaction) {
patterns <- get_interaction_patterns(pair)
different_grouping_exclusions <- c(different_grouping_exclusions,
patterns)
}
}
}
}
}
## Remove duplicates
different_grouping_exclusions <- unique(different_grouping_exclusions)
## Remove explicitly allowed interactions from exclusions
for(term in interaction_terms) {
vars <- strsplit(term, ":")[[1]]
allowed <- get_interaction_patterns(vars)
exclude_patterns <- setdiff(exclude_patterns, allowed)
}
exclude_patterns <- unique(c(exclude_patterns,
different_grouping_exclusions))
## Convert to positional notation
vars <- colnames(data)[formula_cols]
for(ii in seq_along(formula_cols)) {
exclude_patterns <- gsub(vars[ii], paste0("_", ii, "_"), exclude_patterns)
}
## Append to custom exclusions
if(!is.null(exclude_these_expansions)){
exclude_these_expansions <- c(exclude_these_expansions,
exclude_patterns)
} else if (length(exclude_patterns) > 0){
exclude_these_expansions <- exclude_patterns
}
## For each variable, determine if linear with or without interactions
for(var in var_names) {
if(length(spline_terms) > 0){
if(var %in% spline_terms) next # Skip spline terms
}
## Check if this variable appears in any interactions
var_terms <- which(factors[var,] > 0)
if(length(var_terms) > 0) {
## If any term containing this variable has order > 1,
# it's in an interaction
if(any(attr(terms, "order")[var_terms] > 1)) {
linear_with_int <- c(linear_with_int, var)
} else {
linear_no_int <- c(linear_no_int, var)
}
} else {
linear_no_int <- c(linear_no_int, var)
}
}
## Remove duplicates and ensure proper separation
linear_with_int <- unique(linear_with_int)
linear_no_int <- setdiff(unique(linear_no_int),
c(spline_terms, linear_with_int))
linear_no_int <- linear_no_int[!(substr(linear_no_int,
1,
4) == 'spl(')]
## Create predictors matrix and response for compatibility
predictors <- data[, formula_cols, drop = FALSE]
y <- data[, resp_ind]
## Convert variable names to column indices
new_just_linear_without_interactions <- match(linear_no_int,
colnames(predictors))
new_just_linear_with_interactions <- match(linear_with_int,
colnames(predictors))
new_just_linear_without_interactions <- new_just_linear_without_interactions[
!(new_just_linear_without_interactions %in% spline_terms)
]
new_just_linear_with_interactions <- new_just_linear_with_interactions[
!(new_just_linear_with_interactions %in% spline_terms)
]
if(is.null(just_linear_with_interactions)){
just_linear_with_interactions <- new_just_linear_with_interactions
} else{
just_linear_with_interactions <- unique(c(
just_linear_with_interactions,
new_just_linear_with_interactions
))
}
if(is.null(just_linear_without_interactions)){
just_linear_without_interactions <- new_just_linear_without_interactions
} else {
just_linear_without_interactions <- unique(c(
just_linear_without_interactions,
new_just_linear_without_interactions
))
}
}
## Not a formula - try to coerce to matrix
tryCatch({
predictors <- as.matrix(predictors)
}, error = function(e) {
stop("\n \t Could not coerce predictors to matrix. ",
"predictors must be either a formula or an object coercible to matrix.",
"Examples:\n",
" Formula: lgspline(y ~ spl(x1, x2) + x3, data = my_data)\n",
" Matrix: lgspline(predictors = Tmat, y = y) \n")
})
## Check numeric type
if(any(!is.numeric(predictors))){
stop("\n \t predictors matrix must be numeric. ",
"Please convert categorical variables to numeric indicators. \n")
}
## Check response for missings
if(any(is.na(y) | is.nan(y) | !is.finite(y))){
stop("\n \t NA, NaN, or infinite value detected in response. \n")
}
## Original predictor names
og_cols <- colnames(predictors)
if(!any(is.null(og_cols))){
replace_colnames <- TRUE
} else {
replace_colnames <- FALSE
}
## Alternative parameterization of null constraint for ease of use
if(any(!is.null(null_constraint)) &
length(constraint_vectors) > 0 &
length(constraint_values) == 0){
constraint_values <-
constraint_vectors %**%
invert(gramMatrix(cbind(constraint_vectors))) %**%
cbind(c(null_constraint))
}
## Check nrow of input predictors and matrix coersion
t <- try({if(nrow(methods::as(predictors,'matrix')) < 3){
stop('\n \t Need at least 3 observations to fit model \n')
}}, silent = TRUE)
if(inherits(t, 'try-error')){
stop('\n \t Cannot coerce predictors to a matrix \n')
}
## Check if no spline terms - if so, set K = 0
if(length(unique(c(just_linear_with_interactions,
just_linear_without_interactions))) == ncol(predictors)){
K <- 0
}
## Check if custom knots is not missing, that it can be
# coerced to a matrix
if(any(!(is.null(custom_knots)))){
custom_knots <- try(cbind(custom_knots),
silent = TRUE)
if(any(inherits(custom_knots, 'try-error')) & include_warnings){
warning('\n \t custom_knots must be a matrix, or should be coercible ',
'to it. Custom_knots will be ignored. \n')
custom_knots <- NULL
}
}
## If ncol(predictors) > 1 and include_quartic_terms is NULL,
# set include_quartic_terms = TRUE
# otherwise, if ncol(predictors) == 1 then FALSE
if(is.null(include_quartic_terms)){
if(ncol(cbind(predictors)) == 1){
include_quartic_terms <- FALSE
} else {
include_quartic_terms <- TRUE
}
}
## Model fit procedure called
model_fit <- try({lgspline.fit(predictors,
y,
standardize_response,
standardize_predictors_for_knots,
standardize_expansions_for_fitting,
family,
glm_weight_function,
shur_correction_function,
need_dispersion_for_estimation,
dispersion_function,
K,
custom_knots,
cluster_on_indicators,
make_partition_list,
previously_tuned_penalties,
smoothing_spline_penalty,
opt,
use_custom_bfgs,
delta,
tol,
invsoftplus_initial_wiggle,
invsoftplus_initial_flat,
wiggle_penalty,
flat_ridge_penalty,
unique_penalty_per_partition,
unique_penalty_per_predictor,
meta_penalty,
predictor_penalties,
partition_penalties,
include_quadratic_terms,
include_cubic_terms,
include_quartic_terms,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
offset,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for,
exclude_these_expansions,
custom_basis_fxn,
include_constrain_fitted,
include_constrain_first_deriv,
include_constrain_second_deriv,
include_constrain_interactions,
cl,
chunk_size,
parallel_eigen,
parallel_trace,
parallel_aga,
parallel_matmult,
parallel_unconstrained,
parallel_find_neighbors,
parallel_penalty,
parallel_make_constraint,
unconstrained_fit_fxn,
keep_weighted_Lambda,
iterate_tune,
iterate_final_fit,
blockfit,
qp_score_function,
qp_observations,
qp_Amat,
qp_bvec,
qp_meq,
qp_positive_derivative,
qp_negative_derivative,
qp_monotonic_increase,
qp_monotonic_decrease,
qp_range_upper,
qp_range_lower,
qp_Amat_fxn,
qp_bvec_fxn,
qp_meq_fxn,
constraint_values,
constraint_vectors,
return_G,
return_Ghalf,
return_U,
estimate_dispersion,
unbias_dispersion,
return_varcovmat,
custom_penalty_mat,
cluster_args,
dummy_dividor,
dummy_adder,
verbose,
verbose_tune,
## expansions_only is automatically used if
# fitting a correlation structure,
# since iterative fitting is performed
# after
expansions_only |
(!is.null(VhalfInv_fxn) &
length(VhalfInv_par_init) > 0),
observation_weights,
do_not_cluster_on_these,
neighbor_tolerance,
no_intercept,
VhalfInv,
Vhalf,
include_warnings,
...
)}, silent = TRUE)
## Return try error if model fails to to be fit
if(any(inherits(model_fit, 'try-error')) & include_warnings){
warning('\n \t Model fitting error: try verbose = TRUE, checking for NAs,',
' adjusting starting tuning grid, or K. If using parallel options,',
' check your parallel cluster, submit to cl argument if valid, and make ',
'sure base R parallel package is loaded. Also make sure your ',
'formula and overall setup is valid. \n')
return(model_fit)
}
## Return expansions only and associated components
if(expansions_only){
return(c(model_fit, list(og_cols = colnames(predictors))))
}
## Default correlation structures
if(!is.null(correlation_structure) &
!is.null(correlation_id)){
if(inherits(correlation_structure, 'character') &
length(correlation_structure) == 1 &
length(correlation_id) == nrow(predictors)){
if(length(spacetime) > 0){
spacetime <- cbind(spacetime)
if(nrow(spacetime) != nrow(predictors)){
stop('\n\t Spacetime must be an N-length vector or N-row matrix ')
}
}
if(correlation_structure %in% c('exchangeable',
'cs',
'CS',
'compoundsymmetric',
'compound-symmetric',
'compound symmetric')){
## VhalfInv_fxn to construct exchangeable correlation structure
VhalfInv_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
rho <- tanh(par)
V <- diag(block_size) +
rho*(matrix(1, block_size, block_size) - diag(block_size))
corr[inds, inds] <- matinvsqrt(V)
}
corr
}
## Vhalf_fxn for non-Gaussian responses
if(paste0(family)[1] != 'gaussian' |
paste0(family)[2] != 'identity'){
Vhalf_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
rho <- tanh(par)
V <- diag(block_size) +
rho*(matrix(1, block_size, block_size) - diag(block_size))
corr[inds, inds] <- matsqrt(V)
}
corr
}
} else {
Vhalf_fxn <- NULL
}
## Efficient determinant computation
VhalfInv_logdet <- function(par) {
log_det <- 0
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
rho <- tanh(par)
V <- diag(block_size) +
rho*(matrix(1, block_size, block_size) - diag(block_size))
log_det <- log_det +
(-0.5 * determinant(V, logarithm=TRUE)$modulus[1])
}
log_det
}
## REML gradient function
REML_grad <- function(par, model_fit, ...) {
## Initialize matrices
dV <- matrix(0, nrow(predictors), nrow(predictors))
V <- dV
## Get correlation and derivative for each correlation_id
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
## Correlation and its derivative
rho <- tanh(par)
drho <- 1 - tanh(par)^2
## Block matrices
V[inds, inds] <- diag(block_size) +
rho*(matrix(1, block_size, block_size) - diag(block_size))
dV[inds, inds] <- drho*(matrix(1, block_size, block_size) -
diag(block_size))
}
## GLM Weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) /
sqrt(unlist(model_fit$weights)[model_fit$og_order])
## Quadratic form contribution
resid <- model_fit$y - model_fit$ytilde
VinvResid <- model_fit$VhalfInv %**% cbind(resid) / glm_weights
quad_term <- -0.5 * ((t(VinvResid) %**% dV) %**% VinvResid) /
model_fit$sigmasq_tilde
## Log|V| contribution
trace_term <- 0.5 * sum(diag(model_fit$VhalfInv %**%
model_fit$VhalfInv %**%
dV))
## Information matrix contribution
U <- t(t(model_fit$U) * rep(c(1, model_fit$expansion_scales),
model_fit$K + 1)) /
model_fit$sd_y
VhalfInvX <- model_fit$VhalfInv %**%
collapse_block_diagonal(model_fit$X)[unlist(
model_fit$og_order
),] %**%
U
## Lambda computation for GLMs
if(length(model_fit$penalties$L_partition_list) != (model_fit$K + 1)){
model_fit$penalties$L_partition_list <- lapply(
1:(model_fit$K + 1), function(k)0
)
}
Lambda <- U %**% collapse_block_diagonal(
lapply(1:(model_fit$K + 1),
function(k)
c(1, model_fit$expansion_scales) * (
model_fit$penalties$L_partition_list[[k]] +
model_fit$penalties$Lambda) %**%
diag(c(1, model_fit$expansion_scales)) /
model_fit$sd_y^2
)
) %**% t(U)
## Handle GLM weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) *
sqrt(unlist(model_fit$weights)[model_fit$og_order])
XVinvX_inv <- invert(gramMatrix(t(t(VhalfInvX)*c(glm_weights))) +
Lambda)
VInvX <- model_fit$VhalfInv %**% VhalfInvX
sc <- sqrt(norm(VInvX, '2'))
VInvX <- VInvX/sc
dXVinvX <-
(XVinvX_inv %**% t(VInvX)) %**%
(dV %**% VInvX)
XVinvX_term <- -0.5 * colSums(cbind(c(diag(dXVinvX) * sc))) * sc
## Return derivative
as.numeric(quad_term + trace_term + XVinvX_term) /
nrow(predictors)
}
} else if(length(spacetime) == 0){
stop('\n\t "Spacetime" variable must be supplied if correlation ',
'structure other than exchangeable is selected. Spacetime can be ',
'spatial coordinates, time coordinates, or any other longitudinal ',
'vector/matrix of measurements. \n')
}
if(correlation_structure %in% c('spatial-exponential',
'spatialexponential',
'exp',
'exponential')){
## VhalfInv_fxn to construct spatial-exponential correlation
VhalfInv_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute Euclidean distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Rate parameter using softplus parameterization
omega <- log(1 + exp(par))
# Compute exponential correlation matrix directly
V <- exp(-omega * diffs)
# Compute inverse square root
corr[inds, inds] <- matinvsqrt(V)
}
return(corr)
}
## Vhalf_fxn for non-Gaussian responses
if(paste0(family)[1] != 'gaussian' |
paste0(family)[2] != 'identity'){
Vhalf_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Rate parameter using softplus
omega <- log(1 + exp(par))
# Compute exponential correlation matrix directly
V <- exp(-omega * diffs)
# Compute square root
corr[inds, inds] <- matsqrt(V)
}
return(corr)
}
} else {
Vhalf_fxn <- NULL
}
## Efficient determinant computation
VhalfInv_logdet <- function(par) {
log_det <- 0
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Rate parameter using softplus
omega <- log(1 + exp(par))
# Compute exponential correlation matrix directly
V <- exp(-omega * diffs)
# Compute log determinant
log_det <- log_det +
(-0.5 * determinant(V, logarithm=TRUE)$modulus[1])
}
log_det
}
## REML gradient function
REML_grad <- function(par, model_fit, ...) {
## Initialize matrices
dV <- matrix(0, nrow(predictors), nrow(predictors))
V <- matrix(0, nrow(predictors), nrow(predictors))
## Get correlation and derivatives for each correlation_id
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
## Compute distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
## Parameter and its transformation
omega <- log(1 + exp(par))
domega <- exp(par) / (1 + exp(par)) # Derivative of softplus
## Build correlation matrix directly
V_block <- exp(-omega * diffs)
diag(V_block) <- 1
## Compute derivative matrix directly
dV_block <- -diffs * V_block * domega
diag(dV_block) <- 0 # No derivative on diagonal
## Assign to full matrices
V[inds, inds] <- V_block
dV[inds, inds] <- dV_block
}
## GLM Weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) /
sqrt(unlist(model_fit$weights)[model_fit$og_order])
## Quadratic form contribution
resid <- model_fit$y - model_fit$ytilde
VinvResid <- model_fit$VhalfInv %**% cbind(resid) / glm_weights
quad_term <- -0.5 * ((t(VinvResid) %**% dV) %**% VinvResid) /
model_fit$sigmasq_tilde
## Log|V| contribution
trace_term <- 0.5 * sum(diag(model_fit$VhalfInv %**%
model_fit$VhalfInv %**%
dV))
## Information matrix contribution
U <- t(t(model_fit$U) * rep(c(1, model_fit$expansion_scales),
model_fit$K + 1)) /
model_fit$sd_y
VhalfInvX <- model_fit$VhalfInv %**%
collapse_block_diagonal(model_fit$X)[unlist(
model_fit$og_order
),] %**%
U
## Lambda computation for GLMs
if(length(model_fit$penalties$L_partition_list) != (model_fit$K + 1)){
model_fit$penalties$L_partition_list <- lapply(
1:(model_fit$K + 1), function(k)0
)
}
Lambda <- U %**% collapse_block_diagonal(
lapply(1:(model_fit$K + 1),
function(k)
c(1, model_fit$expansion_scales) * (
model_fit$penalties$L_partition_list[[k]] +
model_fit$penalties$Lambda) %**%
diag(c(1, model_fit$expansion_scales)) /
model_fit$sd_y^2
)
) %**% t(U)
## Handle GLM weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) *
sqrt(unlist(model_fit$weights)[model_fit$og_order])
XVinvX_inv <- invert(gramMatrix(t(t(VhalfInvX)*c(glm_weights))) +
Lambda)
VInvX <- model_fit$VhalfInv %**% VhalfInvX
sc <- sqrt(norm(VInvX, '2'))
VInvX <- VInvX/sc
dXVinvX <-
(XVinvX_inv %**% t(VInvX)) %**%
(dV %**% VInvX)
XVinvX_term <- -0.5 * colSums(cbind(c(diag(dXVinvX) * sc))) * sc
## Return gradient
as.numeric(quad_term + trace_term + XVinvX_term) / nrow(predictors)
}
## Set default starting values
if(length(VhalfInv_par_init) == 0){
VhalfInv_par_init <- 0 # This gives omega ≈ 0.693 via softplus
}
}
if(correlation_structure %in% c('ar1','ar(1)','AR(1)','AR1')){
## VhalfInv_fxn to construct AR(1) correlation structure
VhalfInv_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Get all pairwise differences and rank them
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
unique_diffs <- unique(as.vector(diffs))
ranked <- matrix(match(diffs, sort(unique_diffs)) - 1,
block_size,
block_size)
# AR(1) correlation using ranks
rho <- tanh(par)
V <- rho^ranked
corr[inds, inds] <- matinvsqrt(V)
}
corr
}
## Vhalf_fxn for non-Gaussian responses
if(paste0(family)[1] != 'gaussian' |
paste0(family)[2] != 'identity'){
Vhalf_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
unique_diffs <- unique(as.vector(diffs))
ranked <- matrix(match(diffs, sort(unique_diffs)) - 1,
block_size,
block_size)
rho <- tanh(par)
V <- rho^ranked
corr[inds, inds] <- matsqrt(V)
}
corr
}
} else {
Vhalf_fxn <- NULL
}
## Efficient determinant computation
VhalfInv_logdet <- function(par) {
log_det <- 0
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
unique_diffs <- unique(as.vector(diffs))
ranked <- matrix(match(diffs,
sort(unique_diffs)) - 1,
block_size,
block_size)
rho <- tanh(par)
V <- rho^ranked
log_det <- log_det +
(-0.5 * determinant(V, logarithm=TRUE)$modulus[1])
}
log_det
}
## REML gradient function
REML_grad <- function(par, model_fit, ...) {
## Initialize matrices
dV <- matrix(0, nrow(predictors), nrow(predictors))
V <- dV
## Get correlation and derivative for each correlation_id
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
## Compute ranked differences
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
unique_diffs <- unique(as.vector(diffs))
ranked <- matrix(match(diffs, sort(unique_diffs)) - 1,
block_size, block_size)
## Correlation and its derivative
rho <- tanh(par)
drho <- 1 - tanh(par)^2
## Block matrices
V[inds, inds] <- rho^ranked
dV[inds, inds] <- ranked * rho^(ranked-1) * drho
}
## GLM Weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) /
sqrt(unlist(model_fit$weights)[model_fit$og_order])
## Quadratic form contribution
resid <- model_fit$y - model_fit$ytilde
VinvResid <- model_fit$VhalfInv %**% cbind(resid) / glm_weights
quad_term <- -0.5 * ((t(VinvResid) %**% dV) %**% VinvResid) /
model_fit$sigmasq_tilde
## Log|V| contribution
trace_term <- 0.5 * sum(diag(model_fit$VhalfInv %**%
model_fit$VhalfInv %**%
dV))
## Information matrix contribution
U <- t(t(model_fit$U) * rep(c(1, model_fit$expansion_scales),
model_fit$K + 1)) /
model_fit$sd_y
VhalfInvX <- model_fit$VhalfInv %**%
collapse_block_diagonal(model_fit$X)[unlist(
model_fit$og_order
),] %**%
U
## Lambda computation for GLMs
if(length(model_fit$penalties$L_partition_list) != (model_fit$K + 1)){
model_fit$penalties$L_partition_list <- lapply(
1:(model_fit$K + 1), function(k)0
)
}
Lambda <- U %**% collapse_block_diagonal(
lapply(1:(model_fit$K + 1),
function(k)
c(1, model_fit$expansion_scales) * (
model_fit$penalties$L_partition_list[[k]] +
model_fit$penalties$Lambda) %**%
diag(c(1, model_fit$expansion_scales)) /
model_fit$sd_y^2
)
) %**% t(U)
## Handle GLM weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) *
sqrt(unlist(model_fit$weights)[model_fit$og_order])
XVinvX_inv <- invert(gramMatrix(t(t(VhalfInvX)*c(glm_weights))) +
Lambda)
VInvX <- model_fit$VhalfInv %**% VhalfInvX
sc <- sqrt(norm(VInvX, '2'))
VInvX <- VInvX/sc
dXVinvX <-
(XVinvX_inv %**% t(VInvX)) %**%
(dV %**% VInvX)
XVinvX_term <- -0.5 * colSums(cbind(c(diag(dXVinvX) * sc))) * sc
## Return derivative
as.numeric(quad_term + trace_term + XVinvX_term) / nrow(predictors)
}
}
if(correlation_structure %in% c('gaussian', 'rbf', 'squared-exponential')){
## VhalfInv_fxn to construct Gaussian/squared-exponential correlation
VhalfInv_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute Euclidean distances
diffs <- outer(spacetime[inds,1], spacetime[inds,1], function(x,y) (x-y)^2)
if(ncol(spacetime) > 1){
diffs <- diffs + Reduce("+", lapply(2:ncol(spacetime),
function(v) outer(spacetime[inds, v], spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Simple squared exponential correlation with positive length scale
length_scale <- log(1 + exp(par))
V <- exp(-diffs^2/(2*length_scale^2))
diag(V) <- 1 # Ensure diagonal is exactly 1
# Compute inverse square root
corr[inds, inds] <- matinvsqrt(V)
}
corr
}
## Vhalf_fxn for non-Gaussian responses
if(paste0(family)[1] != 'gaussian' || paste0(family)[2] != 'identity'){
Vhalf_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1], spacetime[inds,1], function(x,y) (x-y)^2)
if(ncol(spacetime) > 1){
diffs <- diffs + Reduce("+", lapply(2:ncol(spacetime),
function(v) outer(spacetime[inds, v], spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Simple squared exponential correlation
length_scale <- log(1 + exp(par))
V <- exp(-diffs^2/(2*length_scale^2))
diag(V) <- 1 # Ensure diagonal is exactly 1
# Compute square root
corr[inds, inds] <- matsqrt(V)
}
corr
}
} else {
Vhalf_fxn <- NULL
}
## Efficient determinant computation
VhalfInv_logdet <- function(par) {
log_det <- 0
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1], spacetime[inds,1], function(x,y) (x-y)^2)
if(ncol(spacetime) > 1){
diffs <- diffs + Reduce("+", lapply(2:ncol(spacetime),
function(v) outer(spacetime[inds, v], spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Simple squared exponential correlation
length_scale <- log(1 + exp(par))
V <- exp(-diffs^2/(2*length_scale^2))
diag(V) <- 1
# Compute log determinant
log_det <- log_det + (-0.5 * determinant(V, logarithm=TRUE)$modulus[1])
}
log_det
}
## REML gradient function
REML_grad <- function(par, model_fit, ...) {
## Initialize matrices
dV <- matrix(0, nrow(predictors), nrow(predictors))
V <- matrix(0, nrow(predictors), nrow(predictors))
## Get correlation and derivative for each correlation_id
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
## Compute distances
diffs <- outer(spacetime[inds,1], spacetime[inds,1], function(x,y) (x-y)^2)
if(ncol(spacetime) > 1){
diffs <- diffs + Reduce("+", lapply(2:ncol(spacetime),
function(v) outer(spacetime[inds, v], spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
## Correlation and its derivative
length_scale <- log(1 + exp(par))
dlength_scale <- exp(par) / (1 + exp(par)) # Derivative of softplus
## Block matrices
V_block <- exp(-diffs^2/(2*length_scale^2))
diag(V_block) <- 1
## Derivative: d/dl[exp(-d^2/(2*l^2))] = (d^2/l^3) * exp(-d^2/(2*l^2))
dV_block <- (diffs^2/length_scale^3) * V_block
diag(dV_block) <- 0 # No derivative on diagonal
## Assign to full matrices
V[inds, inds] <- V_block
dV[inds, inds] <- dV_block * dlength_scale # Chain rule for softplus
}
## GLM Weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) /
sqrt(unlist(model_fit$weights)[model_fit$og_order])
## Quadratic form contribution
resid <- model_fit$y - model_fit$ytilde
VinvResid <- model_fit$VhalfInv %**% cbind(resid) / glm_weights
quad_term <- -0.5 * ((t(VinvResid) %**% dV) %**% VinvResid) /
model_fit$sigmasq_tilde
## Log|V| contribution
trace_term <- 0.5 * sum(diag(model_fit$VhalfInv %**%
model_fit$VhalfInv %**%
dV))
## Information matrix contribution
U <- t(t(model_fit$U) * rep(c(1, model_fit$expansion_scales),
model_fit$K + 1)) /
model_fit$sd_y
VhalfInvX <- model_fit$VhalfInv %**%
collapse_block_diagonal(model_fit$X)[unlist(
model_fit$og_order
),] %**%
U
## Lambda computation for GLMs
if(length(model_fit$penalties$L_partition_list) != (model_fit$K + 1)){
model_fit$penalties$L_partition_list <- lapply(
1:(model_fit$K + 1), function(k)0
)
}
Lambda <- U %**% collapse_block_diagonal(
lapply(1:(model_fit$K + 1),
function(k)
c(1, model_fit$expansion_scales) * (
model_fit$penalties$L_partition_list[[k]] +
model_fit$penalties$Lambda) %**%
diag(c(1, model_fit$expansion_scales)) /
model_fit$sd_y^2
)
) %**% t(U)
## Handle GLM weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) *
sqrt(unlist(model_fit$weights)[model_fit$og_order])
XVinvX_inv <- invert(gramMatrix(t(t(VhalfInvX)*c(glm_weights))) +
Lambda)
VInvX <- model_fit$VhalfInv %**% VhalfInvX
sc <- sqrt(norm(VInvX, '2'))
VInvX <- VInvX/sc
dXVinvX <-
(XVinvX_inv %**% t(VInvX)) %**%
(dV %**% VInvX)
XVinvX_term <- -0.5 * colSums(cbind(c(diag(dXVinvX) * sc))) * sc
## Return derivative
as.numeric(quad_term + trace_term + XVinvX_term) / nrow(predictors)
}
}
if(correlation_structure %in% c('spherical', 'cubic', "Spherical", 'sphere')) {
## VhalfInv_fxn to construct spherical correlation
VhalfInv_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)) {
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1], spacetime[inds,1], function(x,y) (x-y)^2)
if(ncol(spacetime) > 1) {
diffs <- diffs + Reduce("+", lapply(2:ncol(spacetime),
function(v) outer(spacetime[inds, v], spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Range parameter (always positive)
range <- log(1 + exp(par))
# Initialize with identity matrix (1 on diagonal, 0 elsewhere)
V <- diag(block_size)
# Fill in non-diagonal elements with spherical correlation
for(i in 1:block_size) {
for(j in 1:block_size) {
if(i != j) {
d_val <- diffs[i,j]
if(d_val <= range) {
V[i,j] <- 1 - 1.5 * (d_val/range) + 0.5 * (d_val/range)^3
}
# If d_val > range, leave as 0
}
}
}
# Compute inverse square root
corr[inds, inds] <- matinvsqrt(V)
}
corr
}
## Vhalf_fxn for non-Gaussian responses
if(paste0(family)[1] != 'gaussian' || paste0(family)[2] != 'identity') {
Vhalf_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)) {
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1], spacetime[inds,1], function(x,y) (x-y)^2)
if(ncol(spacetime) > 1) {
diffs <- diffs + Reduce("+", lapply(2:ncol(spacetime),
function(v) outer(spacetime[inds, v], spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Range parameter
range <- log(1 + exp(par))
# Initialize with identity matrix
V <- diag(block_size)
# Fill in with spherical correlation
for(i in 1:block_size) {
for(j in 1:block_size) {
if(i != j) {
d_val <- diffs[i,j]
if(d_val <= range) {
V[i,j] <- 1 - 1.5 * (d_val/range) + 0.5 * (d_val/range)^3
}
# If d_val > range, leave as 0
}
}
}
# Compute square root
corr[inds, inds] <- matsqrt(V)
}
corr
}
} else {
Vhalf_fxn <- NULL
}
## Efficient determinant computation
VhalfInv_logdet <- function(par) {
log_det <- 0
for(clust in unique(correlation_id)) {
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1], spacetime[inds,1], function(x,y) (x-y)^2)
if(ncol(spacetime) > 1) {
diffs <- diffs + Reduce("+", lapply(2:ncol(spacetime),
function(v) outer(spacetime[inds, v], spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Range parameter
range <- log(1 + exp(par))
# Initialize with identity matrix
V <- diag(block_size)
# Fill in with spherical correlation
for(i in 1:block_size) {
for(j in 1:block_size) {
if(i != j) {
d_val <- diffs[i,j]
if(d_val <= range) {
V[i,j] <- 1 - 1.5 * (d_val/range) + 0.5 * (d_val/range)^3
}
# If d_val > range, leave as 0
}
}
}
# Compute log determinant
log_det <- log_det + (-0.5 * determinant(V, logarithm=TRUE)$modulus[1])
}
log_det
}
## REML gradient function
REML_grad <- function(par, model_fit, ...) {
## Initialize matrices
dV <- matrix(0, nrow(predictors), nrow(predictors))
V <- matrix(0, nrow(predictors), nrow(predictors))
## Get correlation and derivative for each correlation_id
for(clust in unique(correlation_id)) {
inds <- which(correlation_id == clust)
block_size <- length(inds)
## Compute distances
diffs <- outer(spacetime[inds,1], spacetime[inds,1], function(x,y) (x-y)^2)
if(ncol(spacetime) > 1) {
diffs <- diffs + Reduce("+", lapply(2:ncol(spacetime),
function(v) outer(spacetime[inds, v], spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
## Correlation parameters
range <- log(1 + exp(par))
drange <- exp(par) / (1 + exp(par)) # Derivative of softplus
## Initialize with identity matrix
V_block <- diag(block_size)
dV_block <- matrix(0, block_size, block_size)
## Fill in with spherical correlation and its derivative
for(i in 1:block_size) {
for(j in 1:block_size) {
if(i != j) {
d_val <- diffs[i,j]
if(d_val <= range) {
# Correlation function: 1 - 1.5(d/r) + 0.5(d/r)^3
h <- d_val/range
V_block[i,j] <- 1 - 1.5 * h + 0.5 * h^3
# Derivative with respect to range: 1.5(d/r^2) - 1.5(d^3/r^4)
dV_block[i,j] <- (1.5 * d_val/range^2 - 1.5 * d_val^3/range^4) * drange
}
}
}
}
## Assign to full matrices
V[inds, inds] <- V_block
dV[inds, inds] <- dV_block
}
## GLM Weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) /
sqrt(unlist(model_fit$weights)[model_fit$og_order])
## Quadratic form contribution
resid <- model_fit$y - model_fit$ytilde
VinvResid <- model_fit$VhalfInv %**% cbind(resid) / glm_weights
quad_term <- -0.5 * ((t(VinvResid) %**% dV) %**% VinvResid) /
model_fit$sigmasq_tilde
## Log|V| contribution
trace_term <- 0.5 * sum(diag(model_fit$VhalfInv %**%
model_fit$VhalfInv %**%
dV))
## Information matrix contribution
U <- t(t(model_fit$U) * rep(c(1, model_fit$expansion_scales),
model_fit$K + 1)) /
model_fit$sd_y
VhalfInvX <- model_fit$VhalfInv %**%
collapse_block_diagonal(model_fit$X)[unlist(
model_fit$og_order
),] %**%
U
## Lambda computation for GLMs
if(length(model_fit$penalties$L_partition_list) != (model_fit$K + 1)) {
model_fit$penalties$L_partition_list <- lapply(
1:(model_fit$K + 1), function(k)0
)
}
Lambda <- U %**% collapse_block_diagonal(
lapply(1:(model_fit$K + 1),
function(k)
c(1, model_fit$expansion_scales) * (
model_fit$penalties$L_partition_list[[k]] +
model_fit$penalties$Lambda) %**%
diag(c(1, model_fit$expansion_scales)) /
model_fit$sd_y^2
)
) %**% t(U)
## Handle GLM weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) *
sqrt(unlist(model_fit$weights)[model_fit$og_order])
XVinvX_inv <- invert(gramMatrix(t(t(VhalfInvX)*c(glm_weights))) +
Lambda)
VInvX <- model_fit$VhalfInv %**% VhalfInvX
sc <- sqrt(norm(VInvX, '2'))
VInvX <- VInvX/sc
dXVinvX <-
(XVinvX_inv %**% t(VInvX)) %**%
(dV %**% VInvX)
XVinvX_term <- -0.5 * colSums(cbind(c(diag(dXVinvX) * sc))) * sc
## Return derivative
as.numeric(quad_term + trace_term + XVinvX_term) / nrow(predictors)
}
}
if(correlation_structure %in% c('matern', 'Matern')){
## VhalfInv_fxn to construct Matérn correlation
VhalfInv_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Extract parameters: length scale and smoothness
length_scale <- log(1 + exp(par[1])) # ensure positive length scale
nu <- log(1 + exp(par[2])) # ensure positive smoothness
# Compute scaled distances
scaled_diffs <- sqrt(2*nu) * diffs/length_scale
# Matérn correlation using modified Bessel function
V <- matrix(1, block_size, block_size)
nonzero <- which(scaled_diffs != 0, arr.ind=TRUE)
if(length(nonzero) > 0){
V[nonzero] <- (2^(1-nu)/gamma(nu)) *
(scaled_diffs[nonzero])^nu *
besselK(scaled_diffs[nonzero], nu)
}
corr[inds, inds] <- matinvsqrt(V)
}
corr
}
## Vhalf_fxn for non-Gaussian responses
if(paste0(family)[1] != 'gaussian' |
paste0(family)[2] != 'identity'){
Vhalf_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
length_scale <- log(1 + exp(par[1]))
nu <- log(1 + exp(par[2]))
scaled_diffs <- sqrt(2*nu) * diffs/length_scale
V <- matrix(1, block_size, block_size)
nonzero <- which(scaled_diffs != 0, arr.ind=TRUE)
if(length(nonzero) > 0){
V[nonzero] <- (2^(1-nu)/gamma(nu)) *
(scaled_diffs[nonzero])^nu *
besselK(scaled_diffs[nonzero], nu)
}
corr[inds, inds] <- matsqrt(V)
}
corr
}
} else {
Vhalf_fxn <- NULL
}
## Efficient determinant computation
VhalfInv_logdet <- function(par) {
log_det <- 0
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
length_scale <- log(1 + exp(par[1]))
nu <- log(1 + exp(par[2]))
scaled_diffs <- sqrt(2*nu) * diffs/length_scale
V <- matrix(1, block_size, block_size)
nonzero <- which(scaled_diffs != 0, arr.ind=TRUE)
if(length(nonzero) > 0){
V[nonzero] <- (2^(1-nu)/gamma(nu)) *
(scaled_diffs[nonzero])^nu *
besselK(scaled_diffs[nonzero], nu)
}
log_det <- log_det +
(-0.5 * determinant(V, logarithm=TRUE)$modulus[1])
}
log_det
}
## Let optimizer use finite-difference approximation
REML_grad <- NULL
## Unique starting values here
if(length(VhalfInv_par_init) == 0){
VhalfInv_par_init <- c(0, 0)
}
}
if(correlation_structure %in% c('gaussian-cosine',
'gaussiancosine',
'GaussianCosine')){
## VhalfInv_fxn to construct Gaussian-cosine correlation structure
VhalfInv_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute Euclidean distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Extract gaussian parameters
length_scale <- log(1 + exp(par[1])) # ℓ = length scale parameter (always positive)
omega <- par[2] # ω = oscillation frequency
# Compute gaussian-cosine correlation matrix
V <- matrix(1, block_size, block_size) # Diagonal is 1
# For non-diagonal elements
nondiag <- which(diffs > 0, arr.ind=TRUE)
if(nrow(nondiag) > 0) {
# Extract row and column indices
row_indices <- nondiag[, 1]
col_indices <- nondiag[, 2]
# Compute values for each non-diagonal element
for(i in 1:nrow(nondiag)) {
row <- row_indices[i]
col <- col_indices[i]
d_val <- diffs[row, col]
# Squared exponential with cosine modulation
gaussian_part <- exp(-(d_val^2) / (2 * length_scale^2))
cos_part <- cos(omega * d_val)
corr_val <- gaussian_part * cos_part
# Ensure valid correlation values (between -1 and 1)
V[row, col] <- pmin(pmax(corr_val, -1), 1)
}
}
# Compute inverse square root
corr[inds, inds] <- matinvsqrt(V)
}
corr
}
## Vhalf_fxn for non-Gaussian responses
if(paste0(family)[1] != 'gaussian' |
paste0(family)[2] != 'identity'){
Vhalf_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Extract gaussian parameters
length_scale <- log(1 + exp(par[1]))
omega <- par[2]
# Compute gaussian-cosine correlation matrix
V <- matrix(1, block_size, block_size)
nondiag <- which(diffs > 0, arr.ind=TRUE)
if(nrow(nondiag) > 0) {
# Extract row and column indices
row_indices <- nondiag[, 1]
col_indices <- nondiag[, 2]
# Compute values for each non-diagonal element
for(i in 1:nrow(nondiag)) {
row <- row_indices[i]
col <- col_indices[i]
d_val <- diffs[row, col]
# Squared exponential with cosine modulation
gaussian_part <- exp(-(d_val^2) / (2 * length_scale^2))
cos_part <- cos(omega * d_val)
corr_val <- gaussian_part * cos_part
# Ensure valid correlation values (between -1 and 1)
V[row, col] <- pmin(pmax(corr_val, -1), 1)
}
}
# Compute square root
corr[inds, inds] <- matsqrt(V)
}
corr
}
} else {
Vhalf_fxn <- NULL
}
## Efficient determinant computation
VhalfInv_logdet <- function(par) {
log_det <- 0
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Extract gaussian parameters
length_scale <- log(1 + exp(par[1]))
omega <- par[2]
# Compute gaussian-cosine correlation matrix
V <- matrix(1, block_size, block_size)
nondiag <- which(diffs > 0, arr.ind=TRUE)
if(nrow(nondiag) > 0) {
# Extract row and column indices
row_indices <- nondiag[, 1]
col_indices <- nondiag[, 2]
# Compute values for each non-diagonal element
for(i in 1:nrow(nondiag)) {
row <- row_indices[i]
col <- col_indices[i]
d_val <- diffs[row, col]
# Squared exponential with cosine modulation
gaussian_part <- exp(-(d_val^2) / (2 * length_scale^2))
cos_part <- cos(omega * d_val)
corr_val <- gaussian_part * cos_part
# Ensure valid correlation values (between -1 and 1)
V[row, col] <- pmin(pmax(corr_val, -1), 1)
}
}
# Compute log determinant
log_det <- log_det +
(-0.5 * determinant(V, logarithm=TRUE)$modulus[1])
}
log_det
}
## REML gradient function
REML_grad <- function(par, model_fit, ...) {
## Initialize matrices for both parameters
dV1 <- matrix(0, nrow(predictors), nrow(predictors)) # For length_scale
dV2 <- matrix(0, nrow(predictors), nrow(predictors)) # For omega
V <- matrix(0, nrow(predictors), nrow(predictors)) # Current V
## Get correlation and derivatives for each correlation_id
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
## Compute distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
## Extract and transform parameters
length_scale <- log(1 + exp(par[1]))
omega <- par[2]
dlength_scale <- exp(par[1]) / (1 + exp(par[1])) # Chain rule: d/dpar[softplus(par[1])]
## Block matrices for correlation
V_block <- matrix(1, block_size, block_size)
dV1_block <- matrix(0, block_size, block_size) # Derivative wrt length_scale
dV2_block <- matrix(0, block_size, block_size) # Derivative wrt omega
## Compute correlation and derivatives for non-diagonal elements
nondiag <- which(diffs > 0, arr.ind=TRUE)
if(nrow(nondiag) > 0) {
# Extract row and column indices
row_indices <- nondiag[, 1]
col_indices <- nondiag[, 2]
# Compute values and derivatives for each non-diagonal element
for(i in 1:nrow(nondiag)) {
row <- row_indices[i]
col <- col_indices[i]
d_val <- diffs[row, col]
# Gaussian component and cosine component
gaussian_part <- exp(-(d_val^2) / (2 * length_scale^2))
cos_part <- cos(omega * d_val)
# Full correlation
corr_val <- gaussian_part * cos_part
corr_val <- pmin(pmax(corr_val, -1), 1)
V_block[row, col] <- corr_val
# For length_scale: d/dlength_scale[gaussian_part * cos_part]
dgaussian_ls <- gaussian_part * (d_val^2 / length_scale^3)
dV1_block[row, col] <- dlength_scale * dgaussian_ls * cos_part
# For omega: d/domega[gaussian_part * cos_part]
dcos_omega <- -d_val * sin(omega * d_val)
dV2_block[row, col] <- gaussian_part * dcos_omega
}
}
## Assign to full matrices
V[inds, inds] <- V_block
dV1[inds, inds] <- dV1_block
dV2[inds, inds] <- dV2_block
}
## Compute gradient components for both parameters
gradient <- numeric(2)
## Process derivatives one at a time
for(p in 1:2) {
dV <- if(p == 1) dV1 else dV2
## GLM Weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) /
sqrt(unlist(model_fit$weights)[model_fit$og_order])
## Quadratic form contribution
resid <- model_fit$y - model_fit$ytilde
VinvResid <- model_fit$VhalfInv %**% cbind(resid) / glm_weights
quad_term <- -0.5 * ((t(VinvResid) %**% dV) %**% VinvResid) /
model_fit$sigmasq_tilde
## Log|V| contribution
trace_term <- 0.5 * sum(diag(model_fit$VhalfInv %**%
model_fit$VhalfInv %**%
dV))
## Information matrix contribution
U <- t(t(model_fit$U) * rep(c(1, model_fit$expansion_scales),
model_fit$K + 1)) /
model_fit$sd_y
VhalfInvX <- model_fit$VhalfInv %**%
collapse_block_diagonal(model_fit$X)[unlist(
model_fit$og_order
),] %**%
U
## Lambda computation for GLMs
if(length(model_fit$penalties$L_partition_list) !=
(model_fit$K + 1)){
model_fit$penalties$L_partition_list <- lapply(
1:(model_fit$K + 1), function(k)0
)
}
Lambda <- U %**% collapse_block_diagonal(
lapply(1:(model_fit$K + 1),
function(k)
c(1, model_fit$expansion_scales) * (
model_fit$penalties$L_partition_list[[k]] +
model_fit$penalties$Lambda) %**%
diag(c(1, model_fit$expansion_scales)) /
model_fit$sd_y^2
)
) %**% t(U)
## Handle GLM weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) *
sqrt(unlist(model_fit$weights)[model_fit$og_order])
XVinvX_inv <- invert(gramMatrix(t(t(VhalfInvX)*c(glm_weights))) +
Lambda)
VInvX <- model_fit$VhalfInv %**% VhalfInvX
sc <- sqrt(norm(VInvX, '2'))
VInvX <- VInvX/sc
dXVinvX <-
(XVinvX_inv %**% t(VInvX)) %**%
(dV %**% VInvX)
XVinvX_term <- -0.5 * colSums(cbind(c(diag(dXVinvX) * sc))) * sc
## Store gradient component
gradient[p] <- as.numeric(quad_term + trace_term + XVinvX_term)
}
return(gradient / nrow(predictors))
}
## Set default starting values
if(length(VhalfInv_par_init) == 0){
VhalfInv_par_init <- c(0, 0) # log(1) for length_scale, 0 for omega
}
}
if(correlation_structure %in% c('gamma-cosine',
'gammacosine',
'GammaCosine')){
## VhalfInv_fxn to construct gamma-cosine correlation structure
VhalfInv_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute Euclidean distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Extract gamma parameters
shape <- log(1 + exp(par[1])) # θ₁ = shape parameter
rate <- log(1 + exp(par[2])) # θ₂ = rate parameter
omega <- par[3] # ω = oscillation frequency
# Compute gamma-cosine correlation matrix
V <- matrix(1, block_size, block_size) # Diagonal is 1
# For non-diagonal elements
nondiag <- which(diffs > 0, arr.ind=TRUE)
if(nrow(nondiag) > 0) {
# Extract row and column indices
row_indices <- nondiag[, 1]
col_indices <- nondiag[, 2]
# Normalization constant
norm_const <- gamma(shape) / rate^shape
# Process each non-diagonal element
for(i in 1:nrow(nondiag)) {
row <- row_indices[i]
col <- col_indices[i]
d_val <- diffs[row, col]
# Using normalized gamma function with cosine for correlation
gamma_val <- (d_val^(shape-1) * exp(-rate * d_val)) / norm_const
corr_val <- gamma_val * cos(omega * d_val)
# Ensure valid correlation values (between -1 and 1)
V[row, col] <- pmin(pmax(corr_val, -1), 1)
}
}
# Compute inverse square root
corr[inds, inds] <- matinvsqrt(V)
}
corr
}
## Vhalf_fxn for non-Gaussian responses
if(paste0(family)[1] != 'gaussian' |
paste0(family)[2] != 'identity'){
Vhalf_fxn <- function(par) {
corr <- matrix(0, nrow(predictors), nrow(predictors))
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Extract gamma parameters
shape <- log(1 + exp(par[1]))
rate <- log(1 + exp(par[2]))
omega <- par[3]
# Compute gamma-cosine correlation matrix
V <- matrix(1, block_size, block_size)
nondiag <- which(diffs > 0, arr.ind=TRUE)
if(nrow(nondiag) > 0) {
# Extract row and column indices
row_indices <- nondiag[, 1]
col_indices <- nondiag[, 2]
# Normalization constant
norm_const <- gamma(shape) / rate^shape
# Process each non-diagonal element
for(i in 1:nrow(nondiag)) {
row <- row_indices[i]
col <- col_indices[i]
d_val <- diffs[row, col]
# Using normalized gamma function with cosine for correlation
gamma_val <- (d_val^(shape-1) * exp(-rate * d_val)) / norm_const
corr_val <- gamma_val * cos(omega * d_val)
# Ensure valid correlation values (between -1 and 1)
V[row, col] <- pmin(pmax(corr_val, -1), 1)
}
}
# Compute square root
corr[inds, inds] <- matsqrt(V)
}
corr
}
} else {
Vhalf_fxn <- NULL
}
## Efficient determinant computation
VhalfInv_logdet <- function(par) {
log_det <- 0
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
# Compute distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
# Extract gamma parameters
shape <- log(1 + exp(par[1]))
rate <- log(1 + exp(par[2]))
omega <- par[3]
# Compute gamma-cosine correlation matrix
V <- matrix(1, block_size, block_size)
nondiag <- which(diffs > 0, arr.ind=TRUE)
if(nrow(nondiag) > 0) {
# Extract row and column indices
row_indices <- nondiag[, 1]
col_indices <- nondiag[, 2]
# Normalization constant
norm_const <- gamma(shape) / rate^shape
# Process each non-diagonal element
for(i in 1:nrow(nondiag)) {
row <- row_indices[i]
col <- col_indices[i]
d_val <- diffs[row, col]
# Using normalized gamma function with cosine for correlation
gamma_val <- (d_val^(shape-1) * exp(-rate * d_val)) / norm_const
corr_val <- gamma_val * cos(omega * d_val)
# Ensure valid correlation values (between -1 and 1)
V[row, col] <- pmin(pmax(corr_val, -1), 1)
}
}
# Compute log determinant
log_det <- log_det +
(-0.5 * determinant(V, logarithm=TRUE)$modulus[1])
}
log_det
}
## REML gradient function
REML_grad <- function(par, model_fit, ...) {
## Initialize matrices for all three parameters
dV1 <- matrix(0, nrow(predictors), nrow(predictors)) # For shape
dV2 <- matrix(0, nrow(predictors), nrow(predictors)) # For rate
dV3 <- matrix(0, nrow(predictors), nrow(predictors)) # For omega
V <- matrix(0, nrow(predictors), nrow(predictors)) # Current V
## Get correlation and derivatives for each correlation_id
for(clust in unique(correlation_id)){
inds <- which(correlation_id == clust)
block_size <- length(inds)
## Compute distances
diffs <- outer(spacetime[inds,1],
spacetime[inds,1],
function(x,y){
(x-y)^2
})
if(ncol(spacetime) > 1){
diffs <- diffs +
Reduce("+",
lapply(2:ncol(spacetime),
function(v)outer(spacetime[inds, v],
spacetime[inds, v],
function(x,y) (x - y)^2)))
}
diffs <- sqrt(diffs / ncol(spacetime))
## Extract and transform parameters
shape <- log(1 + exp(par[1]))
rate <- log(1 + exp(par[2]))
omega <- par[3]
dshape <- exp(par[1]) / (1 + exp(par[1])) # Chain rule: d/dpar[softplus(par[1])]
drate <- exp(par[2]) / (1 + exp(par[2])) # Chain rule: d/dpar[softplus(par[2])]
## Block matrices for correlation
V_block <- matrix(1, block_size, block_size)
dV1_block <- matrix(0, block_size, block_size) # Derivative wrt shape
dV2_block <- matrix(0, block_size, block_size) # Derivative wrt rate
dV3_block <- matrix(0, block_size, block_size) # Derivative wrt omega
## Compute correlation and derivatives for non-diagonal elements
nondiag <- which(diffs > 0, arr.ind=TRUE)
if(nrow(nondiag) > 0) {
# Extract row and column indices
row_indices <- nondiag[, 1]
col_indices <- nondiag[, 2]
# Normalization constant and derivatives
norm_const <- gamma(shape) / rate^shape
dnorm_const_shape <- norm_const * (digamma(shape) - log(rate))
dnorm_const_rate <- -shape * norm_const / rate
# Process each non-diagonal element
for(i in 1:nrow(nondiag)) {
row <- row_indices[i]
col <- col_indices[i]
d_val <- diffs[row, col]
# Gamma component and cosine component
gamma_part <- (d_val^(shape-1) * exp(-rate * d_val)) / norm_const
cos_part <- cos(omega * d_val)
# Full correlation
corr_val <- gamma_part * cos_part
corr_val <- pmin(pmax(corr_val, -1), 1)
V_block[row, col] <- corr_val
# Calculate derivatives using product rule
# For shape parameter
dgamma_shape <- gamma_part * (
log(d_val) -
dnorm_const_shape / norm_const
)
dV1_block[row, col] <- dshape * dgamma_shape * cos_part
# For rate parameter
dgamma_rate <- gamma_part * (
-d_val +
dnorm_const_rate / norm_const
)
dV2_block[row, col] <- drate * dgamma_rate * cos_part
# For omega parameter
dcos_omega <- -d_val * sin(omega * d_val)
dV3_block[row, col] <- gamma_part * dcos_omega
}
}
## Assign to full matrices
V[inds, inds] <- V_block
dV1[inds, inds] <- dV1_block
dV2[inds, inds] <- dV2_block
dV3[inds, inds] <- dV3_block
}
## Compute gradient components for all three parameters
gradient <- numeric(3)
## Process derivatives one at a time
for(p in 1:3) {
dV <- if(p == 1) dV1 else if(p == 2) dV2 else dV3
## GLM Weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) /
sqrt(unlist(model_fit$weights)[model_fit$og_order])
## Quadratic form contribution
resid <- model_fit$y - model_fit$ytilde
VinvResid <- model_fit$VhalfInv %**% cbind(resid) / glm_weights
quad_term <- -0.5 * ((t(VinvResid) %**% dV) %**% VinvResid) /
model_fit$sigmasq_tilde
## Log|V| contribution
trace_term <- 0.5 * sum(diag(model_fit$VhalfInv %**%
model_fit$VhalfInv %**%
dV))
## Information matrix contribution
U <- t(t(model_fit$U) * rep(c(1, model_fit$expansion_scales),
model_fit$K + 1)) /
model_fit$sd_y
VhalfInvX <- model_fit$VhalfInv %**%
collapse_block_diagonal(model_fit$X)[unlist(
model_fit$og_order
),] %**%
U
## Lambda computation for GLMs
if(length(model_fit$penalties$L_partition_list) !=
(model_fit$K + 1)){
model_fit$penalties$L_partition_list <- lapply(
1:(model_fit$K + 1), function(k)0
)
}
Lambda <- U %**% collapse_block_diagonal(
lapply(1:(model_fit$K + 1),
function(k)
c(1, model_fit$expansion_scales) * (
model_fit$penalties$L_partition_list[[k]] +
model_fit$penalties$Lambda) %**%
diag(c(1, model_fit$expansion_scales)) /
model_fit$sd_y^2
)
) %**% t(U)
## Handle GLM weights
glm_weights <- sqrt(c(glm_weight_function(model_fit$ytilde,
model_fit$y,
1:model_fit$N,
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))) *
sqrt(unlist(model_fit$weights)[model_fit$og_order])
XVinvX_inv <- invert(gramMatrix(t(t(VhalfInvX)*c(glm_weights))) +
Lambda)
VInvX <- model_fit$VhalfInv %**% VhalfInvX
sc <- sqrt(norm(VInvX, '2'))
VInvX <- VInvX/sc
dXVinvX <-
(XVinvX_inv %**% t(VInvX)) %**%
(dV %**% VInvX)
XVinvX_term <- -0.5 * colSums(cbind(c(diag(dXVinvX) * sc))) * sc
## Store gradient component
gradient[p] <- as.numeric(quad_term + trace_term + XVinvX_term)
}
return(gradient / nrow(predictors))
}
## Set default starting values
if(length(VhalfInv_par_init) == 0){
VhalfInv_par_init <- c(0, 0, 0) # log(1) for shape and rate, 0 for omega
}
}
}
## Default starting values
if(length(VhalfInv_par_init) == 0){
VhalfInv_par_init <- 0
}
}
## If fitting correlation-structure components, do so here:
if(!is.null(VhalfInv_fxn) & length(VhalfInv_par_init) > 0){
## Only use custom BFGS if gradient supplied, otherwise stats::optim
# has a more robust version for finite-difference approximation
if(is.null(REML_grad)){
efficient_bfgs <- function(par_in, fn){
res <- stats::optim(
par_in,
function(par)fn(c(par))[[1]],
method = 'BFGS',
hessian = TRUE
)
res$vcov <- invert(methods::as(res$hessian, 'matrix'))
return(res)
}
}
if(length(observation_weights) == 0){
observation_weights <- rep(1, length(y))
}
## Constant used in computing log-likelihood of normal distributions
neghalf_l2pi <- -0.5*log(2*pi)
## Optimize correlation-structure parameters, via VhalfInv
res <-
efficient_bfgs(
c(VhalfInv_par_init),
fn = function(par){
tr <- try({
VhalfInv <- VhalfInv_fxn(par)
if(!is.null(Vhalf_fxn)){
Vhalf <- Vhalf_fxn(par)
} else {
Vhalf <- invert(VhalfInv)
}
})
if(inherits(tr, 'try-error')){
return(list(NaN, NaN))
}
tr <- try({nrow(VhalfInv) == length(y) &
ncol(VhalfInv) == nrow(VhalfInv)}, silent = TRUE)
if(inherits(tr, 'try-error')){
stop('\n \t VhalfInv_fxn does not return a matrix.',
'Adjust your function. \n')
} else if(!tr){
stop('\n \t VhalfInv_fxn does not return an N x N matrix. ',
'Adjust your function. \n')
}
## Re-fit
model_fit <- try({lgspline.fit(predictors,
y,
standardize_response,
standardize_predictors_for_knots,
standardize_expansions_for_fitting,
family,
glm_weight_function,
shur_correction_function,
need_dispersion_for_estimation,
dispersion_function,
K,
model_fit$knots,
cluster_on_indicators,
model_fit$make_partition_list,
previously_tuned_penalties,
smoothing_spline_penalty,
opt,
use_custom_bfgs,
delta,
tol,
invsoftplus_initial_wiggle,
invsoftplus_initial_flat,
wiggle_penalty,
flat_ridge_penalty,
unique_penalty_per_partition,
unique_penalty_per_predictor,
meta_penalty,
predictor_penalties,
partition_penalties,
include_quadratic_terms,
include_cubic_terms,
include_quartic_terms,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
offset,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for,
exclude_these_expansions,
custom_basis_fxn,
include_constrain_fitted,
include_constrain_first_deriv,
include_constrain_second_deriv,
include_constrain_interactions,
cl,
chunk_size,
parallel_eigen,
parallel_trace,
parallel_aga,
parallel_matmult,
parallel_unconstrained,
parallel_find_neighbors,
parallel_penalty,
parallel_make_constraint,
unconstrained_fit_fxn,
keep_weighted_Lambda,
iterate_tune,
iterate_final_fit,
blockfit,
qp_score_function,
qp_observations,
qp_Amat,
qp_bvec,
qp_meq,
qp_positive_derivative,
qp_negative_derivative,
qp_monotonic_increase,
qp_monotonic_decrease,
qp_range_upper,
qp_range_lower,
qp_Amat_fxn,
qp_bvec_fxn,
qp_meq_fxn,
constraint_values,
constraint_vectors,
return_G,
return_Ghalf,
TRUE,#return_U,
TRUE,#estimate dispersion
unbias_dispersion,#unbias dispersion,
TRUE,#return_varcovmat,
custom_penalty_mat,
cluster_args,
dummy_dividor,
dummy_adder,
verbose,
verbose_tune,
expansions_only,
observation_weights,
do_not_cluster_on_these,
neighbor_tolerance,
no_intercept,
VhalfInv,
Vhalf,
include_warnings,
...)}, silent = TRUE)
if(any(inherits(model_fit, 'try-error'))){
return(list(NaN, NaN))
}
## Use custom loss if available in family list
if(!is.null(custom_VhalfInv_loss)){
return(list(custom_VhalfInv_loss(par,
model_fit,
...),
REML_grad(par,
model_fit,
...)))
## Otherwise use REML assuming Gaussian response with identity link
} else {
if(verbose) cat('\nREML computation\n')
VhalfInv <- t(t(VhalfInv) * sqrt(observation_weights))
## Crude and conservative estimate if degenerate estimate of sigma^2
if(model_fit$sigmasq_tilde <= 0){
degfed <- model_fit$N - model_fit$P + qr(model_fit$A)$rank
model_fit$sigmasq_tilde <-
sum((model_fit$y - model_fit$ytilde)^2/degfed)
}
## REML log-likelihood
# Weighted SSE
W <- c(glm_weight_function(model_fit$ytilde,
model_fit$y,
unlist(model_fit$order_list),
model_fit$family,
model_fit$sigmasq_tilde,
rep(1, model_fit$N),
...))
if(!is.null(model_fit$family$custom_dev.resids)){
raw <- family$custom_dev.resids(model_fit$y,
model_fit$ytilde,
1:model_fit$N,
model_fit$family,
1+0*model_fit$weights,
...)
logloss <- sum((
VhalfInv %**% cbind(sign(raw)*sqrt(abs(
raw
))) / sqrt(c(W))
)^2)
}
else if(is.null(model_fit$family$dev.resids)){
logloss <-
sum(c(VhalfInv %**% cbind(model_fit$y - model_fit$ytilde))^2 /
c(W))
} else {
logloss <- sum(model_fit$family$dev.resids(
c(VhalfInv %**% cbind(model_fit$y)),
c(VhalfInv %**% cbind(model_fit$ytilde)),
wt = 1/W))
}
# -log| V / sigma^2 |
if(!is.null(VhalfInv_logdet)){
logdet_VhalfInv <- VhalfInv_logdet(par)
} else {
logdet_VhalfInv <- determinant(VhalfInv,
logarithm=TRUE)$modulus[1]
}
# Generalized determinant of inverse information matrix
eigvals <- eigen(model_fit$varcovmat,
symmetric = TRUE,
only.values = TRUE)$values
nonzero_eigvals <- eigvals[eigvals > sqrt(.Machine$double.eps)]
logdet_varcovB <- -sum(log(nonzero_eigvals))
## Full negative-REML, generalized to include
# penalties,
# constraints,
# and glm link functions
reml_objective <- (
-logdet_VhalfInv +
0.5 * model_fit$N * log(model_fit$sigmasq_tilde) +
0.5 * logloss / model_fit$sigmasq_tilde +
0.5 * logdet_varcovB
) / model_fit$N
## Now compute gradient, if function is supplied
if(!is.null(REML_grad)){
reml_gradient <- REML_grad(par,
model_fit,
...)
} else {
reml_gradient <- NULL
}
if(verbose) cat('\nDone REML computation\n')
return(list(reml_objective = reml_objective,
reml_gradient = reml_gradient))
}
}
)
if(verbose){
cat("Done VhalfInv Optimization\n")
}
## Extract variance-covariance matrix of estimates using BFGS approximation
VhalfInv_params_estimates <- res$par
VhalfInv <- VhalfInv_fxn(c(VhalfInv_params_estimates))
if(!is.null(Vhalf_fxn)){
Vhalf <- Vhalf_fxn(VhalfInv_params_estimates)
}
VhalfInv_params_vcov <- res$vcov
res <- NULL
## Re-fit given optimal values for VhalfInv now one final time
model_fit <-
lgspline.fit(predictors,
y,
standardize_response,
standardize_predictors_for_knots,
standardize_expansions_for_fitting,
family,
glm_weight_function,
shur_correction_function,
need_dispersion_for_estimation,
dispersion_function,
K,
model_fit$knots,
cluster_on_indicators,
model_fit$make_partition_list,
previously_tuned_penalties,
smoothing_spline_penalty,
opt,
use_custom_bfgs,
delta,
tol,
invsoftplus_initial_wiggle,
invsoftplus_initial_flat,
wiggle_penalty,
flat_ridge_penalty,
unique_penalty_per_partition,
unique_penalty_per_predictor,
meta_penalty,
predictor_penalties,
partition_penalties,
include_quadratic_terms,
include_cubic_terms,
include_quartic_terms,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
offset,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for,
exclude_these_expansions,
custom_basis_fxn,
include_constrain_fitted,
include_constrain_first_deriv,
include_constrain_second_deriv,
include_constrain_interactions,
cl,
chunk_size,
parallel_eigen,
parallel_trace,
parallel_aga,
parallel_matmult,
parallel_unconstrained,
parallel_find_neighbors,
parallel_penalty,
parallel_make_constraint,
unconstrained_fit_fxn,
keep_weighted_Lambda,
iterate_tune,
iterate_final_fit,
blockfit,
qp_score_function,
qp_observations,
qp_Amat,
qp_bvec,
qp_meq,
qp_positive_derivative,
qp_negative_derivative,
qp_monotonic_increase,
qp_monotonic_decrease,
qp_range_upper,
qp_range_lower,
qp_Amat_fxn,
qp_bvec_fxn,
qp_meq_fxn,
constraint_values,
constraint_vectors,
return_G,
return_Ghalf,
return_U,
estimate_dispersion,
unbias_dispersion,
return_varcovmat,
custom_penalty_mat,
cluster_args,
dummy_dividor,
dummy_adder,
verbose,
verbose_tune,
expansions_only,
observation_weights,
do_not_cluster_on_these,
neighbor_tolerance,
no_intercept,
VhalfInv,
Vhalf,
include_warnings,
...)
model_fit$VhalfInv_params_vcov <- abs(VhalfInv_params_vcov)
model_fit$VhalfInv_params_estimates <- VhalfInv_params_estimates
model_fit$VhalfInv_fxn <- VhalfInv_fxn
model_fit$Vhalf_fxn <- Vhalf_fxn
model_fit$VhalfInv_logdet <- VhalfInv_logdet
model_fit$REML_grad <- REML_grad
}
## Rename elements of B_raw and B according to actual column names
if(replace_colnames){
## perform for B and rownames A
og_colnames_match <- cbind(og_cols, paste0('_', 1:ncol(predictors), '_'))
new_names <- sapply(names(model_fit$B[[1]]), function(nm){
for(ii in 1:nrow(og_colnames_match)){
nm <- gsub(og_colnames_match[ii,2], og_colnames_match[ii,1], nm)
}
nm
})
for(k in 1:(model_fit$K+1)){
rownames(model_fit$B[[k]]) <- new_names
names(model_fit$B[[k]]) <- new_names
}
rownames(model_fit$A) <- paste0(rep(paste0('partition',
1:(model_fit$K+1)),
each = model_fit$p),
"_",
new_names)
}
## Inference using Wald:
# score Test/LR Test can be obtained using these components as well
if(return_varcovmat){
## Univariate inference
wald_univariate <- function(scale_vcovmat_by = 1,
cv = critical_value){
return_list <- list(
est = unlist(model_fit$B),
se = sqrt(scale_vcovmat_by * diag(model_fit$varcovmat))
)
return_list$stat <- return_list$est/return_list$se
return_list$interval_lb <- return_list$se*
(return_list$stat - cv)
return_list$interval_ub <- return_list$se*
(return_list$stat + cv)
## If normal errors, use exact t-test
# Otherwise use Wald's N(0,1) approximation
if(!(any(!(paste0(family)[1:4] == paste0(gaussian())[1:4])))){
return_list$pval <- 2*(1-pt(abs(return_list$stat),
df = model_fit$N - model_fit$trace_XUGX))
} else {
return_list$pval <- 2*(1-pnorm(abs(return_list$stat)))
}
return(return_list)
}
} else {
wald_univariate <- function(scale_vcovmat_by = 1,
cv = critical_value){
NULL
}
}
## Univariate inference
model_fit$wald_univariate <- wald_univariate
## Important information thus far missing
model_fit$critical_value <- critical_value
## Function for generating draws from posterior/posterior predictive
model_fit$generate_posterior <- function(new_sigmasq_tilde =
model_fit$sigmasq_tilde,
new_predictors = predictors,
theta_1 = 0,
theta_2 = 0,
posterior_predictive_draw =
function(N,
mean,
sqrt_dispersion,
...)rnorm(
N, mean, sqrt_dispersion
),
draw_dispersion = TRUE,
include_posterior_predictive = FALSE,
num_draws = 1,
...){
## Check compatibility, that new_predictors should be a matrix
if(any(!is.null(new_predictors))){
new_predictors <- try(methods::as(cbind(new_predictors), 'matrix'), silent = TRUE)
if(any(inherits(new_predictors, 'try-error'))){
stop('\n\t new_predictors must be coercible to a matrix.\n')
}
}
## One approach to avoiding R issues while preserving the rest of
# lgspline's functionality
only_1 <- FALSE
if(nrow(new_predictors) == 1){
only_1 <- TRUE
new_predictors <- rbind(new_predictors, new_predictors)
}
## Helpful components
nc <- model_fit$p # number of cubic expansions (P when K = 0)
K <- model_fit$K # number of partitions - 1
nr <- model_fit$N # number of observations in-sample
res <- lapply(1:num_draws,function(m){
## Draw a dispersion parameter, if applicable, from InvG distribution
if(draw_dispersion){
shape <- theta_1 +
0.5*(model_fit$N - model_fit$unbias_dispersion *
model_fit$trace_XUGX)
rate <- theta_2 +
0.5*(model_fit$N - model_fit$unbias_dispersion *
model_fit$trace_XUGX) *
new_sigmasq_tilde
if(shape <= 0){
stop('\n \t Posterior inverse-gamma shape is <= 0, increase ',
'theta_1 argument to draw a dispersion parameter. \n')
}
if(rate <= 0){
stop('\n \t Posterior inverse-gamma rate is <= 0, increase theta_2 ',
'argument to draw a dispersion parameter. \n')
}
post_draw_sigmasq <-
1/rgamma(1,
shape,
rate)
## If degenerate or infinite, default to the point-estimate provided
# and provide warning
if((is.nan(post_draw_sigmasq) | !is.finite(post_draw_sigmasq)) &
include_warnings){
warning("\n\t Infinite/NaN posterior draw of dispersion detected. \n")
post_draw_sigmasq <- new_sigmasq_tilde
}
} else {
post_draw_sigmasq <- new_sigmasq_tilde
}
## Draw posterior "errors" of beta coefficients under smoothing constraints
# Purpose here is to generate draws on the standardized y-scale,
# using standardized X in the design matrix
# we generate posterior draws of beta on model-scale,
# before backtransforming
# Unscaled by dispersion
# = UG^{1/2}z
post_draw_coefficients_err <-
(1/model_fit$sd_y) * # un-scale the dispersion that was drawn
sqrt(post_draw_sigmasq) * # sqrt-dispersion that was drawn
(model_fit$U %**%
cbind(Reduce("c", lapply(1:(K+1),function(k){
c(model_fit$Ghalf[[k]] %**%
cbind(rnorm(nc)))
})))) # UG^{1/2}z
## Add to B MAPs we've already fit, then backtransform for raw scale
post_draw_coefficients <- lapply(1:(K+1),function(k){
raw_draw <- post_draw_coefficients_err[1:nc +(k-1)*nc] +
model_fit$B_raw[[k]]
## Un-scale, based on centered-and-scaled y
raw_draw <- raw_draw * model_fit$sd_y # multiply by sd of y
## Add mean of y to all intercepts
raw_draw[1] <- raw_draw[1] + model_fit$mean_y
## Backtransform for un-standardized predictors
return(model_fit$backtransform_coefficients(raw_draw))
})
## Return posterior predictive draws
if(include_posterior_predictive){
## Posterior-predictive mean
post_pred_mean <- model_fit$predict(
new_predictors,
B_predict = post_draw_coefficients)
## Posterior-predictive realization
post_pred_draw <- posterior_predictive_draw(length(post_pred_mean),
post_pred_mean,
sqrt(post_draw_sigmasq),
...)
return(list(post_pred_draw = post_pred_draw,
post_draw_coefficients = post_draw_coefficients,
post_draw_sigmasq = post_draw_sigmasq))
## Return posterior coefficient draws, and sigma sq
} else {
return(list(
post_draw_coefficients = post_draw_coefficients,
post_draw_sigmasq = post_draw_sigmasq
))
}
})
if(num_draws == 1){
if(only_1){
res[[1]][[1]] <- res[[1]][[1]][1]
}
return(res[[1]])
}
## Combine results
post_draw_coefficients <- lapply(res, `[[`, "post_draw_coefficients")
post_draw_sigmasq <- lapply(res, `[[`, "post_draw_sigmasq")
if(include_posterior_predictive){
post_pred_draw <-
Reduce("cbind", lapply(res, `[[`, "post_pred_draw"))
if(only_1){
post_pred_draw <- post_pred_draw[1,,drop=FALSE]
}
return(list(post_pred_draw = post_pred_draw,
post_draw_coefficients = post_draw_coefficients,
post_draw_sigmasq = post_draw_sigmasq))
}
return(list(
post_draw_coefficients = post_draw_coefficients,
post_draw_sigmasq = post_draw_sigmasq
))
}
## Find global maximum/minimum
model_fit$find_extremum <- function(
vars = NULL,
quick_heuristic = TRUE, # only start search once in top-performing partition
initial = NULL, # initial values, useful for fixing binary predictors which aren't optimized
B_predict = NULL, # custom coefficients, if desired
minimize = FALSE, # minimize vs. maximize
stochastic = FALSE, # add noise to candidates proposed by L-BFGS-B
stochastic_draw = function(mu,
sigma,
...){N <- length(mu)
rnorm(
N, mu, sigma
)},
sigmasq_predict = model_fit$sigmasq_tilde, # Variance for stochastic optimization
custom_objective_function = NULL,# custom function for maximizing/minimizing with args mean (mu), std dev (sigma), best-observed (y_best), and ellipses (...)
custom_objective_derivative = NULL, # custom gradient of function for maximizing/minimizing with args mean (mu), std dev (sigma), best observed thus far (y_best), derivative of fitted function (x')^{t}b to pass through, and ellipses (...)
...
){
## Square-root dispersion is a more convenient parameterization in practice
sigma_tilde <- sqrt(sigmasq_predict)
## Switch for maximizing or minimizing function
# Since optim() default minimizes functions, this is -1 for maximize
# Needed for implementation details that simply using
# optim() option is insufficient for
min_or_max <- 2*(minimize-0.5)
## Re-assign predictions if B_predict offered
if(any(is.null(B_predict))){
B_predict <- model_fit$B
} else {
model_fit$ytilde <-
model_fit$predict(
predictors,
B_predict = B_predict
)
}
## Use all partitions by default
partitions <- 1:(model_fit$K+1)
## If any NaN, return randomly selected value and predicted performance
if(any(is.nan(model_fit$ytilde))){
dummy_draw <- c(sapply(1:ncol(predictors), function(j){
runif(1, min(predictors[,j]), max(predictors[,j]))
}))
dummy_y <- model_fit$predict(rbind(dummy_draw))
return(list(
t = dummy_draw,
y = dummy_y
))
}
## Only use partition with best fitted, by default
if(quick_heuristic | !is.null(initial)){
best_fitted <- which.max(model_fit$ytilde * (-min_or_max))
partitions <- which(sapply(1:(model_fit$K+1),function(k){
best_fitted %in% model_fit$order_list[[k]]
}))
}
## Go through each partition, optimize the cubic function within
# remove empty partitions first
partitions_keep <- c(c(), which(sapply(partitions, function(k){
nrow(model_fit$X[[k]])
}) > 0))
## Find variables to optimize over
if(inherits(vars, 'numeric')){
nms <- paste0('_', vars, '_')
beta_inds <- which(sapply(model_fit$raw_expansion_names,
function(nm)any(grepl(nm, nms))))
select_vars_fl <- TRUE
} else if(inherits(vars,'character')){
if(length(og_cols) == 0){
stop('\n\t Do not submit character argument to "vars" unless you have',
' named columns in the predictors you used to fit the model ',
' and the "data" argument was not NULL \n')
}
vars <- unlist(sapply(vars, function(v)which(og_cols == v)))
nms <- paste0('_', vars, '_')
beta_inds <- which(sapply(model_fit$raw_expansion_names,
function(nm)any(grepl(nm, nms))))
select_vars_fl <- TRUE
} else {
beta_inds <- 1:model_fit$p
select_vars_fl <- FALSE
vars <- 1:model_fit$q
}
## Loop through partitions (or only the "best" one)
best_per_partition <- lapply(partitions[partitions_keep], function(k){
if(any(!is.null(initial))){
predictors_vals <- initial
} else {
## Extract best fitted value for initialization
yk <- model_fit$X[[k]] %**% B_predict[[k]]
best <- which.max(-yk*min_or_max)
predictors_vals <- predictors[model_fit$order_list[[k]][best],
, drop=FALSE]
}
## Quasi-newton optimization
opt <- stats::optim(
predictors_vals[vars],
fn = function(par){
## Adjust
if(select_vars_fl){
dummy <- predictors_vals
dummy[vars] <- par
par <- dummy
}
if(!is.null(custom_objective_function)){
## Prediction
pred <- model_fit$predict(new_predictors = rbind(c(par)),
parallel = FALSE,
cl = NULL,
chunk_size = NULL,
num_chunks = NULL,
rem_chunks = NULL,
B_predict = B_predict)
if(stochastic){
## Add random noise if desired
pred <- stochastic_draw(pred, sigma_tilde, ...)
}
## Throw into custom objective if desired
min_or_max*custom_objective_function(pred,
sigma_tilde,
max(-y*min_or_max),
...)
} else {
## Otherwise, no custom objective
pred <- model_fit$predict(new_predictors = rbind(c(par)),
parallel = FALSE,
cl = NULL,
chunk_size = NULL,
num_chunks = NULL,
rem_chunks = NULL,
B_predict = B_predict)
if(stochastic){
pred <- stochastic_draw(pred, sigma_tilde, ...)
}
min_or_max*pred
}
},
gr = function(par){
## Adjust
if(select_vars_fl){
dummy <- predictors_vals
dummy[vars] <- par
par <- dummy
}
if(!is.null(custom_objective_derivative)) {
## Repeat for gradient
pred <- model_fit$predict(new_predictors = rbind(c(par)),
parallel = FALSE,
cl = NULL,
chunk_size = NULL,
num_chunks = NULL,
rem_chunks = NULL,
B_predict = B_predict)
gr <- model_fit$predict(new_predictors = rbind(c(par)),
parallel = FALSE,
cl = NULL,
chunk_size = NULL,
num_chunks = NULL,
rem_chunks = NULL,
B_predict = B_predict,
take_first_derivatives = TRUE)$first_deriv
gr_par <- rep(0, length(par))
gr_raw <- min_or_max*custom_objective_derivative(pred,
sigma_tilde,
max(-y*min_or_max),
gr,
...)
gr_par[model_fit$numerics] <- gr_raw
gr_par[vars]
} else {
gr_par <- rep(0, length(par))
gr_raw <- min_or_max*model_fit$predict(
new_predictors = rbind(c(par)),
parallel = FALSE,
cl = NULL,
chunk_size = NULL,
num_chunks = NULL,
rem_chunks = NULL,
B_predict = B_predict,
take_first_derivatives = TRUE)$first_deriv
gr_par[model_fit$numerics] <- gr_raw
gr_par[vars]
}
},
method = 'L-BFGS-B',
lower = apply(predictors, 2, min),
upper = apply(predictors, 2, max)
)
## Adjust
if(select_vars_fl){
dummy <- predictors_vals
dummy[vars] <- opt$par
par <- dummy
} else {
par <- opt$par
}
return(rbind(c(par)))
})
## Find the global optimum out of all optimal-per-partitions
best_per_partition <- Reduce("rbind", best_per_partition)
preds <- model_fit$predict(new_predictors = best_per_partition,
B_predict = B_predict)
global_max <- which.max(-min_or_max*preds)
## Return the optimized values
extr <- best_per_partition[global_max, ,drop=FALSE]
colnames(extr) <- colnames(predictors)
return(list(
t = extr,
y = preds[global_max]
))
}
## One-dimensional plotting function
plot_lgspline_1d <- function(modfit,
show_formulas,
digits,
legend_pos,
custom_ylab,
custom_predictor_lab,
custom_formula_lab,
custom_title,
text_size_formula,
xlim1d,
ylim1d,
plot_fxn_1d,
legend_args,
color_function,
...) {
## For preventing stack issues
model_fit <- modfit
drop(modfit)
## Linear term and name
if(length(model_fit$power1_cols) > 0){
xvals <- lapply(model_fit$X, function(x) x[,model_fit$power1_cols[1]])
} else {
xvals <- lapply(model_fit$X, function(x) x[,model_fit$nonspline_cols[1]])
}
## For customizing xlab and legend predictor label
v1 <- colnames(model_fit$X[[1]])[c(model_fit$power1_cols,
model_fit$nonspline_cols)[1]]
if(is.null(custom_predictor_lab)){
if(replace_colnames){
custom_predictor_lab <- og_cols[as.numeric(substr(v1, 2, nchar(v1)-1))]
} else {
custom_predictor_lab <- v1
}
}
## Fitted values re-organized into list format
y_fitted <- lapply(1:(model_fit$K + 1), function(k){
model_fit$ytilde[model_fit$order_list[[k]]]
})
## Rainbow gradient
cols = color_function(model_fit$K+1)
## Xlab defaults to actual variable name if subitted as NULL
if(is.null(custom_predictor_lab)){
xlab <- names(model_fit$B[[1]])[2]
} else {
xlab <- custom_predictor_lab
}
## Default xlim/ylim preventing stack issues
if(is.null(ylim1d)){
ylim <- c(min(unlist(y_fitted),
model_fit$y), max(unlist(y_fitted),
model_fit$y))
} else {
ylim <- ylim1d
}
if(is.null(xlim1d)){
xlim <- c(min(unlist(xvals)), max(unlist(xvals)))
} else {
xlim <- xlim1d
}
## Basic plot
# plot_fxn_1d can be plot() or points()
plot_fxn_1d(xvals[[1]],
y_fitted[[1]],
ylim = ylim,
xlim = xlim,
xlab = xlab,
ylab = custom_ylab,
col = cols[1],
main = custom_title,
...)
## Add in other partitions
if(model_fit$K >= 1){
for(k in 2:(model_fit$K + 1)){
points(xvals[[k]],
y_fitted[[k]],
xlab = xlab,
ylab = custom_ylab,
col = cols[k],
main = custom_title,
...)
}
}
## Add formulas if requested - using existing names
if(show_formulas) {
formulas <- sapply(1:(model_fit$K+1), function(k) {
coefs <- round(model_fit$B[[k]], digits)
names(coefs) <- rownames(coefs)
names(coefs) <- gsub(
rownames(model_fit$B[[k]])[2],
xlab,
names(coefs)
)
names(coefs) <- gsub(v1, custom_predictor_lab, names(coefs))
paste0(custom_formula_lab, " = ", paste(coefs, names(coefs),
collapse = " + "))
})
formulas <- gsub('intercept', '', formulas)
formulas <- gsub(' ', ' ', formulas)
## Create base legend arguments
legend_base_args <- list(
x = legend_pos,
legend = formulas,
col = cols,
lwd = 2,
cex = text_size_formula
)
## Merge with user-supplied legend arguments if any
if(length(legend_args) > 0) {
## If legend_args is a named list, use it directly
if(is.list(legend_args)) {
legend_final_args <- utils::modifyList(legend_base_args, legend_args)
}
## If it is not a list, try to convert it first
else {
legend_args_list <- as.list(legend_args)
if(!is.null(names(legend_args))) {
names(legend_args_list) <- names(legend_args)
legend_final_args <- utils::modifyList(legend_base_args,
legend_args_list)
} else {
legend_final_args <- legend_base_args
}
}
} else {
legend_final_args <- legend_base_args
}
## Call legend with final arguments
do.call(graphics::legend, legend_final_args)
}
}
## Two-dimensional plotting function
plot_lgspline_2d <- function(modfit,
show_formulas,
digits,
custom_zlab,
custom_formula_lab,
custom_predictor_lab1,
custom_predictor_lab2,
custom_title,
text_size_formula,
color_function,
...) {
model_fit <- modfit
## Modification such that when plotting a categorical + spline effect,
# we do not plot spline effect vs. spline effect^2, based on how
# the polynomial expansions are arranged
if(length(model_fit$nonspline_cols) > 0){
if(length(model_fit$nonspline_cols) == 2){
xvals1 <-
lapply(model_fit$X, function(x) x[,model_fit$nonspline_cols[1]])
v1 <- colnames(model_fit$X[[1]])[model_fit$nonspline_cols[1]]
xvals2 <-
lapply(model_fit$X, function(x) x[,model_fit$nonspline_cols[2]])
v2 <- colnames(model_fit$X[[1]])[model_fit$nonspline_cols[2]]
} else {
xvals1 <-
lapply(model_fit$X, function(x) x[,model_fit$power1_cols[1]])
v1 <- colnames(model_fit$X[[1]])[model_fit$power1_cols[1]]
xvals2 <-
lapply(model_fit$X, function(x) x[,model_fit$nonspline_cols[1]])
v2 <- colnames(model_fit$X[[1]])[model_fit$nonspline_cols[1]]
}
} else {
xvals1 <-
lapply(model_fit$X, function(x) x[,model_fit$power1_cols[1]])
v1 <- colnames(model_fit$X[[1]])[model_fit$power1_cols[1]]
xvals2 <-
lapply(model_fit$X, function(x) x[,model_fit$power1_cols[2]])
v2 <- colnames(model_fit$X[[1]])[model_fit$power1_cols[2]]
}
## For swapping out custom labels from formulas
if(is.null(og_cols)){
og_cols <- model_fit$raw_expansion_names[c(model_fit$power1_cols,
model_fit$nonspline_cols)]
}
## For customizing formula and xlab names
if(is.null(custom_predictor_lab1)){
if(replace_colnames){
custom_predictor_lab1 <- og_cols[as.numeric(substr(v1, 2, nchar(v1)-1))]
} else {
custom_predictor_lab1 <- v1
}
}
if(is.null(custom_predictor_lab2)){
if(replace_colnames){
custom_predictor_lab2 <- og_cols[as.numeric(substr(v2, 2, nchar(v2)-1))]
} else {
custom_predictor_lab2 <- v2
}
}
## Fitted values in block-diagonal order
y_fitted <- lapply(1:(model_fit$K+1), function(k) {
model_fit$ytilde[model_fit$order_list[[k]]]
})
## Combine data for plotting
plot_data <- data.frame(
x = unlist(xvals1),
y = unlist(xvals2),
z = unlist(y_fitted),
partition = factor(rep(1:(model_fit$K+1), sapply(xvals1, length)))
)
## Create formulas for hover text if requested
if(show_formulas) {
formulas <- sapply(1:(model_fit$K+1), function(k) {
coefs <- round(model_fit$B[[k]], digits)
names(coefs) <- rownames(coefs)
names(coefs) <- gsub("\\^2", "<sup>2</sup>", names(coefs))
names(coefs) <- gsub("\\^3", "<sup>3</sup>", names(coefs))
names(coefs) <- gsub("\\^4", "<sup>4</sup>", names(coefs))
names(coefs) <- gsub(og_cols[as.numeric(substr(v1, 2, nchar(v1)-1))],
custom_predictor_lab1, names(coefs))
names(coefs) <- gsub(og_cols[as.numeric(substr(v2, 2, nchar(v2)-1))],
custom_predictor_lab2, names(coefs))
paste0(custom_formula_lab, " = ", paste(coefs, names(coefs),
collapse = " + "))
})
formulas <- gsub('intercept', '', formulas)
formulas <- gsub(' ', ' ', formulas)
plot_data$formula <- rep(formulas, sapply(xvals1, length))
}
## Show formulas or not
if(show_formulas){
text <- ~formula
} else {
text <- NULL
}
## Create plotly plot
p <- plotly::layout(
plotly::plot_ly(plot_data,
x = ~x,
y = ~y,
z = ~z,
color = ~partition,
colors = color_function(model_fit$K+1),
type = "scatter3d",
mode = "markers",
text = text,
connectgaps = TRUE,
hoverinfo = if(show_formulas) "text" else "x+y+z+name",
hoverlabel = list(font = list(size = text_size_formula)),
...
),
scene = list(
xaxis = list(title = custom_predictor_lab1),
yaxis = list(title = custom_predictor_lab2),
zaxis = list(title = custom_zlab)
),
title = custom_title
)
return(p)
}
## Wrapper
model_fit$plot <- function(model_fit_in = model_fit,
show_formulas = FALSE,
digits = 4,
legend_pos = "topright",
custom_response_lab = "y",
custom_predictor_lab = NULL,
custom_predictor_lab1 = NULL,
custom_predictor_lab2 = NULL,
custom_formula_lab = NULL,
custom_title = "Fitted Function",
text_size_formula = NULL,
legend_args = list(),
new_predictors = NULL,
xlim = NULL,
ylim = NULL,
color_function = NULL,
add = FALSE,
vars = c(),
...){
## add = TRUE has the effect of overlaying the plot over an existing one
# only for 1D
if(add){
plot_fxn_1d = graphics::points
} else {
plot_fxn_1d = graphics::plot
}
## Check compatibility, that new_predictors should be a matrix
if(any(!is.null(new_predictors))){
new_predictors <- try(methods::as(cbind(new_predictors), 'matrix'), silent = TRUE)
if(any(inherits(new_predictors, 'try-error'))){
stop('\n \t new_predictors should be coercible to a matrix. \n')
}
}
## Default text_size_formula depends on q
if(is.null(text_size_formula)){
text_size_formula <- ifelse(model_fit_in$q == 1 | length(vars) == 1,
0.8,
8)
}
## Default custom_formula_lab = g(E[y]) for g, a link function
if(is.null(custom_formula_lab)){
if(paste0(model_fit_in$family)[2] == 'identity' &
paste0(model_fit_in$family)[1] == 'gaussian'){
custom_formula_lab <- custom_response_lab
} else {
custom_formula_lab <- paste0(model_fit_in$family$link,
'(E[',
custom_response_lab,
'])')
}
}
## Reset model-fit components for new predictors
if(any(!is.null(new_predictors))){
## Get basis and knot expansions
prep <- model_fit_in$predict(new_predictors = new_predictors,
expansions_only = TRUE)
model_fit_in$X <- prep$expansions
## Get order of y by partition
model_fit_in$order_list <- model_fit_in$knot_expand_function(
prep$partition_codes,
prep$partition_bounds,
nrow(new_predictors),
cbind(1:nrow(new_predictors)),
model_fit_in$K)
## Make new prediction
model_fit_in$ytilde <-
model_fit_in$predict(new_predictors = new_predictors)
}
## 1-D plotting
if(model_fit_in$q == 1 | length(vars) == 1){
## Color function takes in single argument (K+1) and returns colors we use
if(is.null(color_function)){
color_function <- grDevices::rainbow
}
if(length(vars) == 1){
## Isolate variables of interest
if(inherits(vars, 'numeric')){
cols <- paste0('_', vars, '_')
} else if(!any(is.null(og_cols))){
inds <- which(og_cols %in% vars)
if(length(inds) != 1){
stop('\n\t vars is not an original predictor name in the data')
}
cols <- paste0('_', inds, '_')
} else {
stop('\n\tInput predictors have no names, use column indices for vars')
}
keeps <- unlist(c(1, sapply(cols, function(col)grep(col,
model_fit_in$raw_expansion_names))))
if(length(keeps) < 2){
stop('\n\t Column indices provided are not present in data\n')
}
for(k in 1:(model_fit_in$K + 1)){
model_fit_in$B[[k]] <-
model_fit_in$B[[k]][keeps,,drop=FALSE]
model_fit_in$B_raw[[k]] <-
model_fit_in$B_raw[[k]][keeps,,drop=FALSE]
}
if(length(model_fit_in$power1_cols) > 0){
model_fit_in$power1_cols <- model_fit_in$power1_cols[
model_fit_in$power1_cols %in% keeps
]
}
if(length(model_fit_in$nonspline_cols) > 0){
model_fit_in$nonspline_cols <- model_fit_in$nonspline_cols[
model_fit_in$nonspline_cols %in% keeps
]
}
}
plot_lgspline_1d(model_fit_in,
show_formulas,
digits,
legend_pos,
custom_response_lab,
custom_predictor_lab,
custom_formula_lab,
custom_title,
text_size_formula,
xlim,
ylim,
plot_fxn_1d,
legend_args,
color_function,
...)
## 2-D plotting
} else if(model_fit_in$q == 2 | length(vars) == 2){
## Color function takes in single argument (K+1) and returns colors we use
if(is.null(color_function)){
color_function <- grDevices::colorRampPalette(
RColorBrewer::brewer.pal(8, "Spectral"))
}
## Isolate variables of interest
if(length(vars) == 2){
if(inherits(vars, 'numeric')){
cols <- paste0('_', vars, '_')
} else if(!is.null(og_cols)){
inds <- which(og_cols %in% vars)
if(length(inds) != 2){
stop('\n\tOne or both vars are not original predictor names of data')
}
cols <- paste0('_', inds, '_')
} else {
stop('\n\t Original predictor names not present, try numeric indices',
' for vars\n')
}
keeps <- unlist(c(1, sapply(cols, function(col)grep(col,
model_fit_in$raw_expansion_names))))
if(length(keeps) < 3){
stop('\n\t Column indices provided are not present in data\n')
}
for(k in 1:(model_fit_in$K+1)){
model_fit_in$B[[k]] <-
model_fit_in$B[[k]][keeps,,drop=FALSE]
model_fit_in$B_raw[[k]] <-
model_fit_in$B_raw[[k]][keeps,,drop=FALSE]
}
if(length(model_fit_in$power1_cols) > 0){
model_fit_in$power1_cols <- model_fit_in$power1_cols[
model_fit_in$power1_cols %in% keeps
]
}
if(length(model_fit_in$nonspline_cols) > 0){
model_fit_in$nonspline_cols <- model_fit_in$nonspline_cols[
model_fit_in$nonspline_cols %in% keeps
]
}
}
plot_lgspline_2d(model_fit_in,
show_formulas,
digits,
custom_response_lab,
custom_formula_lab,
custom_predictor_lab1,
custom_predictor_lab2,
custom_title,
text_size_formula,
color_function,
...)
} else if(include_warnings){
warning("\n \t No default plotting functions implemented for q > 2 \n")
}
}
## Set S3 class
class(model_fit) <- "lgspline"
return(model_fit)
}
#' Low-Level Fitting for Lagrangian Smoothing Splines
#'
#' @description
#' The core function for fitting Lagrangian smoothing splines with
#' less user-friendliness.
#'
#' @return A list containing the fitted model components, forming the core
#' structure used internally by \code{\link{lgspline}} and its associated methods.
#' This function is primarily intended for internal use or advanced users needing
#' direct access to fitting components. The returned list contains numerous elements,
#' typically including:
#' \describe{
#' \item{y}{The original response vector provided.}
#' \item{ytilde}{The fitted values on the original response scale.}
#' \item{X}{A list, with each element the design matrix (\eqn{\textbf{X}_k}) for partition k.}
#' \item{A}{The constraint matrix (\eqn{\textbf{A}}) encoding smoothness and any other linear equality constraints.}
#' \item{B}{A list of the final fitted coefficient vectors (\eqn{\boldsymbol{\beta}_k}) for each partition k, on the original predictor/response scale.}
#' \item{B_raw}{A list of fitted coefficient vectors on the internally standardized scale used during fitting.}
#' \item{K, p, q, P, N}{Key dimensions: number of internal knots (K), basis functions per partition (p), original predictors (q), total coefficients (P), and sample size (N).}
#' \item{penalties}{A list containing the final penalty components used (e.g., \code{Lambda}, \code{L1}, \code{L2}, \code{L_predictor_list}, \code{L_partition_list}). See \code{\link{compute_Lambda}}.}
#' \item{knot_scale_transf, knot_scale_inv_transf}{Functions to transform predictors to/from the scale used for knot placement.}
#' \item{knots}{Matrix or vector of knot locations on the original predictor scale (NULL if K=0 or q > 1).}
#' \item{partition_codes}{Vector assigning each original observation to a partition.}
#' \item{partition_bounds}{Internal representation of partition boundaries.}
#' \item{make_partition_list}{List containing centers, knot midpoints, neighbor info, and assignment function from partitioning (NULL if K=0 or 1D). See \code{\link{make_partitions}}.}
#' \item{knot_expand_function, assign_partition}{Internal functions for partitioning data. See \code{\link{knot_expand_list}}.}
#' \item{predict}{The primary function embedded in the object for generating predictions on new data. See \code{\link{predict.lgspline}}.}
#' \item{family}{The \code{\link[stats]{family}} object or custom list used.}
#' \item{estimate_dispersion, unbias_dispersion}{Logical flags related to dispersion estimation settings.}
#' \item{sigmasq_tilde}{The estimated (or fixed, if \code{estimate_dispersion=FALSE}) dispersion parameter \eqn{\tilde{\sigma}^2}.}
#' \item{backtransform_coefficients, forwtransform_coefficients}{Functions to convert coefficients between standardized and original scales.}
#' \item{mean_y, sd_y}{Mean and standard deviation used for standardizing the response.}
#' \item{og_order, order_list}{Information mapping original data order to partitioned order.}
#' \item{constraint_values, constraint_vectors}{User-supplied additional linear equality constraints.}
#' \item{expansion_scales}{Scaling factors applied to basis expansions during fitting (if \code{standardize_expansions_for_fitting=TRUE}).}
#' \item{take_derivative, take_interaction_2ndderivative, get_all_derivatives_insample}{Functions related to computing derivatives of the fitted spline. See \code{\link{take_derivative}}, \code{\link{take_interaction_2ndderivative}}, \code{\link{make_derivative_matrix}}.}
#' \item{numerics, power1_cols, ..., nonspline_cols}{Integer vectors storing column indices identifying different types of terms in the basis expansion.}
#' \item{return_varcovmat}{Logical indicating if variance matrix calculation was requested.}
#' \item{raw_expansion_names}{Original generated names for basis expansion columns (before potential renaming if input predictors had names).}
#' \item{std_X, unstd_X}{Functions to standardize/unstandardize design matrices according to \code{expansion_scales}.}
#' \item{parallel_cluster_supplied}{Logical indicating if a parallel cluster was used.}
#' \item{weights}{The original observation weights provided (potentially reformatted).}
#' \item{VhalfInv}{The fixed \eqn{\mathbf{V}^{-1/2}} matrix if supplied for GEEs.}
#' \item{quadprog_list}{List containing components related to quadratic programming constraints, if used.}
#' \item{G, Ghalf, U}{Matrices related to the variance-covariance structure (\eqn{\mathbf{G}}, \eqn{\mathbf{G}^{1/2}}, \eqn{\mathbf{U}}), returned if requested via corresponding arguments. See \code{\link{compute_G_eigen}} and \code{\link{get_U}}.}
#' \item{trace_XUGX}{The trace term \eqn{\text{trace}(\mathbf{X}\mathbf{U}\mathbf{G}\mathbf{X}^{T})}, used for effective degrees of freedom. See \code{\link{compute_trace_UGXX_wrapper}}.}
#' \item{varcovmat}{The final variance-covariance matrix of the estimated coefficients, \eqn{\sigma^2 \mathbf{U}\mathbf{G}}, returned if \code{return_varcovmat = TRUE}.}
#' }
#' Note that the exact components returned depend heavily on the function
#' arguments (e.g., values of \code{return_G}, \code{return_varcovmat}, etc.).
#' If \code{expansions_only = TRUE}, a much smaller list is returned containing
#' only pre-fitting components needed for inspection or setup (see \code{\link{lgspline}}).
#'
#' @usage
#' lgspline.fit(predictors, y = NULL, standardize_response = TRUE,
#' standardize_predictors_for_knots = TRUE,
#' standardize_expansions_for_fitting = TRUE, family = gaussian(),
#' glm_weight_function = function(mu, y, order_indices, family,
#' dispersion, observation_weights,
#' ...) {
#' if(any(!is.null(observation_weights))){
#' family$variance(mu) * observation_weights
#' } else {
#' family$variance(mu)
#' }
#' },
#' shur_correction_function = function(X, y, B, dispersion, order_list,
#' K, family, observation_weights,
#' ...) {
#' lapply(1:(K+1), function(k) 0)
#' },
#' need_dispersion_for_estimation = FALSE,
#' dispersion_function = function(mu, y, order_indices, family,
#' observation_weights, ...) { 1 },
#' K = NULL, custom_knots = NULL, cluster_on_indicators = FALSE,
#' make_partition_list = NULL, previously_tuned_penalties = NULL,
#' smoothing_spline_penalty = NULL, opt = TRUE, use_custom_bfgs = TRUE,
#' delta = NULL, tol = 10*sqrt(.Machine$double.eps),
#' invsoftplus_initial_wiggle = c(-25, 20, -15, -10, -5),
#' invsoftplus_initial_flat = c(-14, -7), wiggle_penalty = 2e-07,
#' flat_ridge_penalty = 0.5, unique_penalty_per_partition = TRUE,
#' unique_penalty_per_predictor = TRUE, meta_penalty = 1e-08,
#' predictor_penalties = NULL, partition_penalties = NULL,
#' include_quadratic_terms = TRUE, include_cubic_terms = TRUE,
#' include_quartic_terms = FALSE, include_2way_interactions = TRUE,
#' include_3way_interactions = TRUE,
#' include_quadratic_interactions = FALSE,
#' offset = c(), just_linear_with_interactions = NULL,
#' just_linear_without_interactions = NULL,
#' exclude_interactions_for = NULL,
#' exclude_these_expansions = NULL, custom_basis_fxn = NULL,
#' include_constrain_fitted = TRUE,
#' include_constrain_first_deriv = TRUE,
#' include_constrain_second_deriv = TRUE,
#' include_constrain_interactions = TRUE, cl = NULL, chunk_size = NULL,
#' parallel_eigen = TRUE, parallel_trace = FALSE, parallel_aga = FALSE,
#' parallel_matmult = FALSE, parallel_unconstrained = FALSE,
#' parallel_find_neighbors = FALSE, parallel_penalty = FALSE,
#' parallel_make_constraint = FALSE,
#' unconstrained_fit_fxn = unconstrained_fit_default,
#' keep_weighted_Lambda = FALSE, iterate_tune = TRUE,
#' iterate_final_fit = TRUE, blockfit = FALSE,
#' qp_score_function = function(X, y, mu, order_list, dispersion,
#' VhalfInv, observation_weights, ...) {
#' if(!is.null(observation_weights)) {
#' crossprod(X, cbind((y - mu)*observation_weights))
#' } else {
#' crossprod(X, cbind(y - mu))
#' }
#' },
#' qp_observations = NULL, qp_Amat = NULL, qp_bvec = NULL, qp_meq = 0,
#' qp_positive_derivative = FALSE, qp_negative_derivative = FALSE,
#' qp_monotonic_increase = FALSE, qp_monotonic_decrease = FALSE,
#' qp_range_upper = NULL, qp_range_lower = NULL, qp_Amat_fxn = NULL,
#' qp_bvec_fxn = NULL, qp_meq_fxn = NULL, constraint_values = cbind(),
#' constraint_vectors = cbind(), return_G = TRUE, return_Ghalf = TRUE,
#' return_U = TRUE, estimate_dispersion = TRUE,
#' unbias_dispersion = TRUE,
#' return_varcovmat = TRUE, custom_penalty_mat = NULL,
#' cluster_args = c(custom_centers = NA, nstart = 10),
#' dummy_dividor = 1.2345672152894e-22,
#' dummy_adder = 2.234567210529e-18, verbose = FALSE,
#' verbose_tune = FALSE, expansions_only = FALSE,
#' observation_weights = NULL, do_not_cluster_on_these = c(),
#' neighbor_tolerance = 1 + 1e-16, no_intercept = FALSE,
#' VhalfInv = NULL, Vhalf = NULL, include_warnings = TRUE, ...)
#'
#' @inheritParams lgspline
#'
#' @keywords internal
#' @export
lgspline.fit <- function(predictors,
y = NULL,
standardize_response = TRUE,
standardize_predictors_for_knots = TRUE,
standardize_expansions_for_fitting = TRUE,
family = gaussian(),
glm_weight_function = function(mu,
y,
order_indices,
family,
dispersion,
observation_weights,
...){
if(any(!is.null(observation_weights))){
family$variance(mu) * observation_weights
} else {
family$variance(mu)
}
},
shur_correction_function = function(X,
y,
B,
dispersion,
order_list,
K,
family,
observation_weights,
...){
lapply(1:(K+1), function(k)0)
},
need_dispersion_for_estimation = FALSE,
dispersion_function = function(mu,
y,
order_indices,
family,
observation_weights,
...) { 1 },
K = NULL,
custom_knots = NULL,
cluster_on_indicators = FALSE,
make_partition_list = NULL,
previously_tuned_penalties = NULL,
smoothing_spline_penalty = NULL,
opt = TRUE,
use_custom_bfgs = TRUE,
delta = NULL,
tol = 10*sqrt(.Machine$double.eps),
invsoftplus_initial_wiggle = c(-25, 20, -15, -10, -5),
invsoftplus_initial_flat = c(-14, -7),
wiggle_penalty = 2e-7,
flat_ridge_penalty = 0.5,
unique_penalty_per_partition = TRUE,
unique_penalty_per_predictor = TRUE,
meta_penalty = 1e-8,
predictor_penalties = NULL,
partition_penalties = NULL,
include_quadratic_terms = TRUE,
include_cubic_terms = TRUE,
include_quartic_terms = FALSE,
include_2way_interactions = TRUE,
include_3way_interactions = TRUE,
include_quadratic_interactions = FALSE,
offset = c(),
just_linear_with_interactions = NULL,
just_linear_without_interactions = NULL,
exclude_interactions_for = NULL,
exclude_these_expansions = NULL,
custom_basis_fxn = NULL,
include_constrain_fitted = TRUE,
include_constrain_first_deriv = TRUE,
include_constrain_second_deriv = TRUE,
include_constrain_interactions = TRUE,
cl = NULL,
chunk_size = NULL,
parallel_eigen = TRUE,
parallel_trace = FALSE,
parallel_aga = FALSE,
parallel_matmult = FALSE,
parallel_unconstrained = FALSE,
parallel_find_neighbors = FALSE,
parallel_penalty = FALSE,
parallel_make_constraint = FALSE,
unconstrained_fit_fxn = unconstrained_fit_default,
keep_weighted_Lambda = FALSE,
iterate_tune = TRUE,
iterate_final_fit = TRUE,
blockfit = FALSE,
qp_score_function = function(X, y, mu, order_list, dispersion, VhalfInv, observation_weights, ...) {
if(!is.null(observation_weights)) {
crossprod(X, cbind((y - mu)*observation_weights))
} else {
crossprod(X, cbind(y - mu))
}
},
qp_observations = NULL,
qp_Amat = NULL,
qp_bvec = NULL,
qp_meq = 0,
qp_positive_derivative = FALSE,
qp_negative_derivative = FALSE,
qp_monotonic_increase = FALSE,
qp_monotonic_decrease = FALSE,
qp_range_upper = NULL,
qp_range_lower = NULL,
qp_Amat_fxn = NULL,
qp_bvec_fxn = NULL,
qp_meq_fxn = NULL,
constraint_values = cbind(),
constraint_vectors = cbind(),
return_G = TRUE,
return_Ghalf = TRUE,
return_U = TRUE,
estimate_dispersion = TRUE,
unbias_dispersion = TRUE,
return_varcovmat = TRUE,
custom_penalty_mat = NULL,
cluster_args = c(custom_centers = NA, nstart = 10),
dummy_dividor = 0.00000000000000000000012345672152894,
dummy_adder = 0.000000000000000002234567210529,
verbose = FALSE,
verbose_tune = FALSE,
expansions_only = FALSE,
observation_weights = NULL,
do_not_cluster_on_these = c(),
neighbor_tolerance = 1 + 1e-16,
no_intercept = FALSE,
VhalfInv = NULL,
Vhalf = NULL,
include_warnings = TRUE,
...){
if(verbose){
cat("Starting\n")
}
## Do not cluster on these should include all linear terms
if(!is.null(just_linear_with_interactions)){
do_not_cluster_on_these <- unique(c(do_not_cluster_on_these,
just_linear_with_interactions))
}
if(!is.null(just_linear_without_interactions)){
do_not_cluster_on_these <- unique(c(do_not_cluster_on_these,
just_linear_without_interactions))
}
## Accept raw predictors (the T matrix) and get dimensions
predictors <- methods::as(predictors,'matrix')
qcols <- ncol(predictors)
nr <- nrow(predictors)
## Return error message if any terms are > q
vecdummy <- c(1,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for)
if(any(
c(1,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for) > qcols
)){
print(c(1,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for))
stop('\n \t Elements in just_linear_with_interactions, ',
'just_linear_without_interactions, and/or exclude_interactions_for are',
' greater than the number of columns of predictors matrix. \n')
}
## Original y vector of response
y_og <- y
## Initialize all variables as numeric by default
numerics <- 1:qcols
## Separate some variables based on desired polynomial expansions
if(any(is.null(just_linear_with_interactions))){
just_linear_with_interactions <- c()
}
if(any(is.null(just_linear_without_interactions))){
just_linear_without_interactions <- c()
}
if(any(is.null(exclude_interactions_for))){
exclude_interactions_for <- c()
}
numerics <- numerics[!(numerics %in% c(just_linear_with_interactions,
just_linear_without_interactions))]
intercept <- 1
## No interaction terms, set the corresponding options to FALSE
# only one interaction term available = no interactions
if(length(exclude_interactions_for) >= (qcols - 1)){
include_2way_interactions <- FALSE
include_3way_interactions <- FALSE
include_quadratic_interactions <- FALSE
}
## With only two terms available for interactions, exclude 3-ways
if(length(exclude_interactions_for) >= (qcols - 2)){
include_3way_interactions = FALSE
}
if(verbose){
cat("Polynomial Expansions\n")
}
## Get cubic expansions for design matrix predictors
C <- get_polynomial_expansions(predictors,
numerics,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for,
include_quadratic_terms,
include_cubic_terms,
include_quartic_terms,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
exclude_these_expansions,
custom_basis_fxn,
...)
## Number of cubic expansions per-partition (little p = nc)
nc <- ncol(C)
## In 1-D, set K = number of constraints given by custom knots
# if not null
if(any(!(is.null(custom_knots))) & qcols == 1){
if(is.null(K)){
K <- nrow(custom_knots)
}
}
## Default K
orig_null <- FALSE
if(is.null(K)){
orig_null <- TRUE
K <- round(max(min(24/(1 +
1*(qcols > 1) +
1*(paste0(family)[1] != 'gaussian' |
paste0(family)[2] != 'identity')),
nr/nc),
0)/(1 +
1*(qcols > 1) +
1*(paste0(family)[1] != 'gaussian' |
paste0(family)[2] != 'identity')))
}
if(K == 0){
unique_penalty_per_partition <- FALSE
}
## Catch error where we need to cluster on some variables, but we have none
# allowed
if(length(do_not_cluster_on_these) == length(qcols) &
length(qcols) > 1 &
K > 0){
stop('\n \t Must include at least 1 variable to cluster on if multiple',
' variables are present. \n')
}
## K can't be greater than max of number of observations or q
# for kmeans clustering purposes
if(K >= max(c(nr, qcols))) {
if(include_warnings){
warning('\n \t Max (N, q) too samll for K. K = max(N, q) - 2 will be',
' used. \n')
}
K <- max(max(c(nr, qcols)) - 2, 0)
}
## Detect if parallel, and K > 0
if(any(!(is.null(cl))) & K > 0){
if(inherits(cl, 'cluster')){
parallel <- TRUE
ncores <- length(cl)
## if K was not inserted as an argument, multiply minimum by 50
if(orig_null){
K <- K*ncores
}
## extract the chunk sizes, number of chunks, and odd-out remaining chunks
if(is.null(chunk_size)){
chunk_size <- max(1, ceiling((K + 1) / (4 * ncores)))
}
num_chunks <- (K+1) %/% chunk_size
rem_chunks <- (K+1) %% chunk_size
} else {
parallel <- FALSE
}
} else{
parallel <- FALSE
}
if(verbose){
cat("Standardization\n")
}
## Standardize outcome for identity link (agnostic to distribution)
if(paste0(family)[2] == 'identity' &
#paste0(family)[1] == 'gaussian' &
length(unique(y)) > 1 &
standardize_response){
mean_y <- mean(y)
sd_y <- try(sd(y),silent = TRUE)
if(any(inherits(sd_y, 'try-error'))){
sd_y <- 1
}
y <- (y - mean_y)/sd_y
} else {
sd_y <- 1
mean_y <- 0
}
## For cardinal knot-placement,
# Scale between (0, 1) one-dimension, or standardize N(0,1) higher dim.
if(standardize_predictors_for_knots){
if(length(numerics) == 1){
minns <- apply(predictors, 2, min)
maxxs <- apply(predictors, 2, max)
for (j in 1:qcols) {
predictors[, j] <-
(predictors[, j] - minns[j] + dummy_adder) /
(maxxs[j] - minns[j] + dummy_dividor)
}
} else {
means <- apply(predictors, 2, mean)
sds <- apply(predictors, 2, function(x)tryCatch(sd(x),
error = function(err)1))
for (j in 1:qcols) {
predictors[, j] <-
(predictors[, j] - means[j] + dummy_adder) /
(sds[j] + dummy_dividor)
}
}
} else {
if(length(numerics) == 1){
maxxs <- rep(1, qcols)
minns <- rep(0, qcols)
dummy_adder <- 0
dummy_divider <- 0
} else {
means <- rep(0, qcols)
sds <- rep(1, qcols)
dummy_adder <- 0
dummy_divider <- 0
}
}
## Transform function for cardinal knot placement
transf <- function(X) {
if(length(numerics) == 1){
for (j in 1:ncol(X)) {
X[, j] <-
(X[, j] - minns[j] + dummy_adder) /
(maxxs[j] - minns[j] + dummy_dividor)
}
} else {
for (j in 1:ncol(X)) {
X[, j] <-
(X[, j] - means[j] + dummy_adder) /
(sds[j] + dummy_dividor)
}
}
X
}
## Inverse transform function for cardinal knot placement
inv_transf <- function(Xsc) {
if(length(numerics) == 1){
for (j in 1:ncol(Xsc)) {
Xsc[, j] <-
(Xsc[, j] *
(maxxs[j] - minns[j] + dummy_dividor) + minns[j] - dummy_adder)
}
} else {
for (j in 1:ncol(Xsc)) {
Xsc[, j] <-
(Xsc[, j] *
(sds[j] + dummy_dividor) + means[j] - dummy_adder)
}
}
Xsc
}
if(verbose){
cat("Get Knots\n")
}
if(length(numerics) == 1 & qcols == 1){
partitions <- NULL
## Needed for determining knot locations,
# compute l1-norms of rows of standardized columns
partition_codes <- rowMeans(predictors)
if(any(!(is.null(custom_knots)))){
## Scaled by std devs, shift between (0, 1)
knot_values <- transf(custom_knots)
partition_bounds <- sort(rowMeans(knot_values))
kvb <- partition_bounds
} else if(K > 0){
## Knot values at partition_bounds
kvb <- seq(0,1,length.out = K + 2)[-c(1, K + 2)]
knot_values <- cbind(kvb)[,rep(1, qcols),drop=FALSE]
## Mapped to a single value, since all quantiles are equal
partition_bounds <- kvb
}
## Compatibility with knot expand function when K = 0
if(K == 0){
partition_bounds <- c()
}
} else {
## If custom knots, replace the partition knots
if(!any(is.null(make_partition_list))){
partitions <- make_partition_list
} else {
## Get partitions based on kmeans clustering
partitions <- make_partitions(predictors,
cluster_args,
cluster_on_indicators,
K,
parallel & parallel_find_neighbors,
cl,
do_not_cluster_on_these,
neighbor_tolerance)
}
knot_values <- rbind(partitions$knots)
## For compatibility when no knots
if(K == 0){
partition_codes <- c()
partition_bounds <- c()
## Code the partitions as an arbitrary monotonic transform
} else {
if(verbose){
cat("Assign partitions\n")
}
partition_codes <- partitions$assign_partition(predictors)
partition_bounds <- 1:nrow(partitions$centers)
}
}
if(verbose){
cat("Expansion Standardize\n")
}
## Back transform to raw scale,
# now that knots have been established on standardized scale
predictors <- inv_transf(predictors)
## Index columns of C by variable type for penalization purposes later
intercept_col <- 1
colnm_expansions <- colnames(C)
power1_cols <- 2:(length(numerics) + 1)
if(length(numerics) == 0){
power2_cols <- c()
power1_cols <- c()
include_constrain_second_deriv <- FALSE
} else {
power2_cols <- which(substr(colnm_expansions, nchar(colnm_expansions)-1,
nchar(colnm_expansions)) == '^2')
}
power3_cols <- which(substr(colnm_expansions, nchar(colnm_expansions)-1,
nchar(colnm_expansions)) == '^3')
power4_cols <- which(substr(colnm_expansions, nchar(colnm_expansions)-1,
nchar(colnm_expansions)) == '^4')
interaction_cols <- grep("_x_", colnm_expansions)
if(length(numerics) > 2 & length(interaction_cols) > 0){
triplet_cols <- interaction_cols[
which(sapply(colnm_expansions[interaction_cols], function(col){
grepl('_x_',substr(col, regexpr('_x_',col)[[1]]+3,nchar(col)))
}))]
} else {
triplet_cols <- c()
}
quad_cols <- which(substr(colnm_expansions, nchar(colnm_expansions)-1, nchar(colnm_expansions)) == "^2")
interaction_quad_cols <- intersect(
interaction_cols,quad_cols
)
interaction_single_cols <- interaction_cols[!(interaction_cols %in% c(
triplet_cols, interaction_quad_cols
))]
## Append non-spline terms
nonspline_cols <- c(
which(colnm_expansions %in% c(paste0("_", just_linear_with_interactions, "_"),
paste0("_", just_linear_without_interactions, "_")))
)
nonspline_cols <- nonspline_cols[!(nonspline_cols %in%
c(power1_cols,
interaction_single_cols,
interaction_quad_cols,
triplet_cols))]
## Standardize columns of C using expansion/(q0.69 - q0.31)
# This is a p-1 length vector, it excludes the intercept
expansion_scales <- apply(C[,-intercept_col,drop=FALSE], 2, function(x){
1
if(length(unique(x)) >= 2){
## Near sigma for a normal distribution
# (i.e. this is close to 1 for N(0,1))
abs(quantile(x, 0.69) - quantile(x, 0.31))
} else {
1
}
})
## For offsets, keep the linear term unscaled
if(length(offset) > 0){
offset_ind <- which(colnm_expansions %in% paste0(
'_', offset, '_'
))
expansion_scales[offset_ind-1] <- 1
}
expansion_scales[expansion_scales == 0] <- 1
names(expansion_scales) <- colnm_expansions[-intercept_col, drop=FALSE]
# Set back to 1 if not desired
if(!standardize_expansions_for_fitting){
expansion_scales <- 0*expansion_scales + 1
}
## Function to un-standardize columns of C
std_X <- function(unstd_X_in){
sweep(unstd_X_in, 2, c(1, expansion_scales), "/")
}
max_C <- apply(C, 2, max)
min_C <- apply(C, 2, min)
max_min_C <- rbind(c(max_C), c(min_C))
C <- std_X(C)
unstd_X <- function(std_X_in){
sweep(std_X_in, 2, c(1, expansion_scales), "*")
}
## If no intercept enforced, include constraint on A indicating this
if(no_intercept & length(constraint_vectors) < 1){
constr <- sapply(1:(K+1), function(k){
vec <- rep(0, nc*(K+1))
vec[nc*(k-1) + 1] <- 1
vec
})
constraint_vectors <- cbind(constr)
constraint_values <- 0*constraint_vectors
} else if(no_intercept){
constr <- sapply(1:(K+1), function(k){
vec <- rep(0, nc*(K+1))
vec[nc*(k-1) + 1] <- 1
vec
})
constraint_vectors <- cbind(constraint_vectors,
constr)
constraint_values <- cbind(rowSums(cbind(constraint_values,
0*constr
)))
}
## Repeat analogously for offsets if present
if(length(offset) > 0 & length(constraint_vectors) < 1){
offset_ind <- which(colnm_expansions %in% paste0(
'_', offset, '_'
))
constr <- Reduce('cbind', lapply(1:length(offset_ind), function(o){
rbind(sapply(1:(K+1), function(k){
vec <- rep(0, nc*(K+1))
vec[nc*(k-1) + offset_ind[o]] <- 1
vec
}))
}))
constraint_vectors <- constr
constraint_values <- cbind(rowSums(constr))
} else if(length(offset) > 0){
offset_ind <- which(colnm_expansions %in% paste0(
'_', offset, '_'
))
constr <- Reduce('cbind', lapply(1:length(offset_ind), function(o){
rbind(sapply(1:(K+1), function(k){
vec <- rep(0, nc*(K+1))
vec[nc*(k-1) + offset_ind[o]] <- 1
vec
}))
}))
constraint_vectors <- cbind(constraint_vectors,
constr)
constraint_values <- cbind(rowSums(cbind(constraint_values,
constr)))
}
## Adjust coefficients after un-standardizing
backtransform_coefficients <- function(coef) {
# Extract intercept and slope coefficients
intercept <- coef[intercept_col]
slopes <- coef[-intercept_col]
# Back-transform slope coefficients
backtransformed_slopes <- slopes / expansion_scales
# Combine intercept and back-transformed slopes
cbind(c(intercept, backtransformed_slopes))
}
## Adjust coefficients for future standardizing
forwtransform_coefficients <- function(coef) {
# Extract intercept and slope coefficients
intercept <- coef[intercept_col]
slopes <- coef[-intercept_col]
# Back-transform slope coefficients
forwtransformed_slopes <- slopes * expansion_scales
# Combine intercept and back-transformed slopes
cbind(c(intercept, forwtransformed_slopes))
}
if(verbose){
cat("Knot Expand\n")
}
## Get knot expansions
X <- knot_expand_list(partition_codes,
partition_bounds,
nr,
C,
K)
## Assign y to their partitions (y_og saves original y unstandardized)
y <- knot_expand_list(partition_codes,
partition_bounds,
nr,
cbind(y),
K)
## If custom variance-covariance structure specified
if(!is.null(VhalfInv) & !expansions_only){
VhalfInv <- try(methods::as(VhalfInv,'matrix'), silent = TRUE)
if(any(inherits(VhalfInv, 'try-error'))){
if(include_warnings){
warning('\n \t VhalfInv cannot be converted to a N by N matrix, it ',
'will not be considered here. \n')
}
VhalfInv <- NULL
} else if(any(unique(dim(VhalfInv)) != nr)){
if(include_warnings){
warning('\n \t VhalfInv is not an N by N matrix; it will not be',
' considered here. \n')
}
VhalfInv <- NULL
} else {
if(verbose){
cat("Applying Whitening Transform\n")
}
if((paste0(family)[[1]] != 'gaussian' |
paste0(family)[[2]] != 'identity') &
!is.null(VhalfInv)){
## GEEs require this, if not provided
if(is.null(Vhalf)){
Vhalf <- invert(VhalfInv)
}
}
## Overwrite dev.resids (if present) to match normal approximation
# May not ever be called, if custom loss functions for
# optimizing covariance structure and tuning penalties are used
# (like with Weibull AFT) - but does require variance(mu) to be avail.
if(!is.null(family$dev.resids)){
family$dev.resids <- function(y, mu, wt){
((y-mu)^2)*wt
}
family$linkfun <- function(mu)mu
}
## Expand V^{-1/2}y
y_expand_og <- y # save original
y <- knot_expand_list(partition_codes,
partition_bounds,
nr,
VhalfInv %**% cbind((y_og - mean_y)/sd_y),
K)
## Expand V^{-1/2}X_0, where X_0 = X when K = 0
X_expand_og <- X # save original
tempVC <- VhalfInv %**% C
colnames(tempVC) <- colnm_expansions
X <- knot_expand_list(partition_codes,
partition_bounds,
nr,
tempVC,
K)
tempVC <- NULL
}
}
## Get observation weight expansions
if(any(!is.null(observation_weights))){
## Coerce to N x 1 vector if not already
if(nrow(cbind(observation_weights)) != nr |
ncol(cbind(observation_weights)) != 1){
stop('\n \t Observation weights must be an N x 1 vector. \n')
}
observation_weights_og <- observation_weights
homogenous_weights <- (length(unique(observation_weights_og)) == 1)
observation_weights <-
knot_expand_list(partition_codes,
partition_bounds,
nr,
cbind(observation_weights),
K)
} else {
observation_weights_og <- rep(1, nr)
observation_weights <- lapply(1:(K+1), function(k)cbind(rep(1,
length(y[[k]]))))
homogenous_weights <- TRUE
}
## Save the original ordering to each partition
order_list <- knot_expand_list(partition_codes,
partition_bounds,
nr,
cbind(1:nr),
K)
og_order <- order(unlist(order_list))
## Return derivatives per-partition of an expanded matrix
if(!is.null(VhalfInv) & !expansions_only){
X_expand_new <- X
y_expand_new <- y
X <- X_expand_og
y <- y_expand_og
}
all_derivatives <- function(X,
just_first_derivatives = FALSE,
just_spline_effects = TRUE){
lapply(X, function(C){
make_derivative_matrix(
nc,
C,
power1_cols,
power2_cols,
nonspline_cols,
interaction_single_cols,
interaction_quad_cols,
triplet_cols,
K,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
colnm_expansions,
expansion_scales,
just_first_derivatives,
just_spline_effects
)
})
}
if(!is.null(VhalfInv) & !expansions_only){
X <- X_expand_new
y <- y_expand_new
X_expand_new <- NULL
y_expand_new <- NULL
}
if(verbose){
cat("2nd Derivative Penalty\n")
}
## Compute integrated squared second derivative of fitted function
# evaluated over bounds of the support
# can be replaced with arbitrary p by p matrix if desired
if(!(!(any(is.null(smoothing_spline_penalty))))){
## Compute the gram matrix for the squared integrated second derivative
# Standard standardization
max_min_C <- std_X(max_min_C)
smoothing_spline_penalty <-
get_2ndDerivPenalty_wrapper(K,
colnm_expansions,
max_min_C,
power1_cols,
power2_cols,
power3_cols,
power4_cols,
interaction_single_cols,
interaction_quad_cols,
triplet_cols,
nonspline_cols,
nc,
parallel & parallel_penalty,
cl)
colnames(smoothing_spline_penalty) <- colnames(C)
}
if(verbose){
cat("Constraint Matrix\n")
}
## Making a constraint matrix
A <- 0
if((K > 0 & length(numerics) == 1 & length(nonspline_cols) == 0)){
## Knot basis expansions
CKnots <- get_polynomial_expansions(inv_transf(cbind(knot_values)),
numerics,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for,
include_quadratic_terms,
include_cubic_terms,
include_quartic_terms,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
exclude_these_expansions,
custom_basis_fxn,
...)
if(K == 1){
CKnots <- rbind(CKnots)
}
if(nrow(CKnots) < K){
CKnots <- rbind(CKnots, matrix(0, K - nrow(CKnots), ncol = ncol(CKnots)))
}
## Constraint matrix A
A <- make_constraint_matrix(nc,
CKnots,
power1_cols,
power2_cols,
nonspline_cols,
interaction_single_cols,
interaction_quad_cols,
triplet_cols,
K,
include_constrain_fitted,
include_constrain_first_deriv,
include_constrain_second_deriv,
include_constrain_interactions,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
colnm_expansions,
expansion_scales)
## apply standardization to the rows of A,
# once constraints un-standardized are derived
if(length(constraint_vectors) > 0 & length(constraint_values > 0)){
if(length(offset) > 0){
## Remove offset from existing constraints, these are set to 1
offset_ind <- which(colnm_expansions %in% paste0(
'_', offset, '_'
))
offset_inds <- unlist(lapply(1:(K+1), function(k)nc*(k-1)+offset_ind))
A[offset_inds,] <- 0
}
if(no_intercept){
## Remove intercepts from constraints, if no_intercept is TRUE
A[unlist(lapply(1:(K+1),function(k)nc*(k-1)+1)),] <- 0
}
A <- cbind(A, constraint_vectors)
}
A <- sweep(A, 1, rep(c(1, expansion_scales), K+1), "/")
if(any(!is.finite(A))) stop(paste0('\n \t A is not finite \n', expansion_scales))
## Otherwise, if we do have knots.....
} else if(K > 0){
## how many chunks/individual matrices will A be composed of
chunk <- nrow(knot_values) %/% K
rem <- nrow(knot_values) %% K # don't forget straggling rows
## permute knot values
knot_values_perm <- knot_values[1:nrow(knot_values),,drop=FALSE]
if(parallel & parallel_make_constraint){
A <- Reduce("cbind",
parallel::parLapply(cl,
1:chunk,
function(i){
## Select the knot values in the chunk
knot_values_chunk <- knot_values_perm[1:K + (i-1)*K,,drop=FALSE]
## Get polynomial expansions of knot quantile values
CKnots_chunk <- rbind(get_polynomial_expansions(
inv_transf(knot_values_chunk),
numerics,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for,
include_quadratic_terms,
include_cubic_terms,
include_quartic_terms,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
exclude_these_expansions,
custom_basis_fxn,
...))
rownames(CKnots_chunk) <- rownames(knot_values_chunk)
## Constraint matrix A
make_constraint_matrix(nc,
CKnots_chunk,
power1_cols,
power2_cols,
nonspline_cols,
interaction_single_cols,
interaction_quad_cols,
triplet_cols,
K,
include_constrain_fitted,
include_constrain_first_deriv,
include_constrain_second_deriv,
include_constrain_interactions,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
colnm_expansions,
expansion_scales)
}))
} else {
for(i in 1:chunk){
## Permute knot_quantile_value_combinations
knot_values_chunk <- knot_values_perm[1:K + (i-1)*K,,drop=FALSE]
## Get polynomial expansions of knot quantile values
CKnots_chunk <- rbind(
get_polynomial_expansions(inv_transf(knot_values_chunk),
numerics,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for,
include_quadratic_terms,
include_cubic_terms,
include_quartic_terms,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
exclude_these_expansions,
custom_basis_fxn,
...))
rownames(CKnots_chunk) <- rownames(knot_values_chunk)
## Constraint matrix A
A <- cbind(A, make_constraint_matrix(nc,
CKnots_chunk,
power1_cols,
power2_cols,
nonspline_cols,
interaction_single_cols,
interaction_quad_cols,
triplet_cols,
K,
include_constrain_fitted,
include_constrain_first_deriv,
include_constrain_second_deriv,
include_constrain_interactions,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
colnm_expansions,
expansion_scales))
if(i == 1){
## Remove 0 column
A <- A[,-1,drop=FALSE]
}
}
}
if(rem > 0){
## Permute knot_quantile_value_combinations
knot_values_chunk <-
knot_values_perm[rev(c(nrow(knot_values_perm):1)[1:rem]),,drop=FALSE]
## Get polynomial expansions of knot quantile values
temp_dat <- inv_transf(knot_values_chunk)
only_1 <- FALSE
if(nrow(temp_dat) == 1){
only_1 <- TRUE
temp_dat <- rbind(temp_dat, temp_dat)
}
CKnots_chunk <- rbind(
get_polynomial_expansions(temp_dat,
numerics,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for,
include_quadratic_terms,
include_cubic_terms,
include_quartic_terms,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
exclude_these_expansions,
custom_basis_fxn,
...))
if(only_1){
CKnots_chunk <- CKnots_chunk[1,,drop=FALSE]
}
rownames(CKnots_chunk) <- rownames(knot_values_chunk)
dummy <- matrix(0, nrow = K - rem, ncol = ncol(CKnots_chunk))
rownames(dummy) <- paste0(sample(1:nrow(dummy)), '_', 2:(nrow(dummy)+1))
CKnots_chunk <- rbind(CKnots_chunk, dummy)
## Constraint matrix A
A <- cbind(A, make_constraint_matrix(nc,
CKnots_chunk,
power1_cols,
power2_cols,
nonspline_cols,
interaction_single_cols,
interaction_quad_cols,
triplet_cols,
K,
include_constrain_fitted,
include_constrain_first_deriv,
include_constrain_second_deriv,
include_constrain_interactions,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
colnm_expansions,
expansion_scales))
}
## Remove all 0 columns
A <- A[,which(apply(abs(A), 2, sum) > 1e-16),drop=FALSE]
## Bind other constraints, standardize
if(length(constraint_vectors) > 0){
if(length(offset) > 0){
## Remove offset from existing constraints, these are set to 1
offset_ind <- which(colnm_expansions %in% paste0(
'_', offset, '_'
))
offset_inds <- unlist(lapply(1:(K+1), function(k)nc*(k-1)+offset_ind))
A[offset_inds,] <- 0
}
if(no_intercept){
## Remove intercepts from constraints, if no_intercept is TRUE
A[unlist(lapply(1:(K+1),function(k)nc*(k-1)+1)),] <- 0
}
A <- cbind(A, constraint_vectors)
}
A <- sweep(A, 1, rep(c(1, expansion_scales), K+1), "/")
if(any(!is.finite(A))) stop(paste0('\n \t A is not finite \n', expansion_scales))
if(any(is.na(A))) stop(paste0('\n \t A is NA somewhere ',
'(any(is.na(A)) == TRUE) \n', expansion_scales))
} else {
## If missing constraints, apply custom constraints if desired only
## or do not include A at all
if(length(constraint_vectors) > 0){
A <- cbind(constraint_vectors)
A <- sweep(A, 1, rep(c(1, expansion_scales), K+1), "/")
} else {
A <- NULL
}
}
if(!(any(is.null(A)))){
nca <- ncol(A)
}
## Convert non-0 null vectors to (K+1) list of corresponding partitions
# Adjust for intercept being shifted by mean y
if(length(constraint_values) > 0){
constraint_values <- lapply(1:(K+1),function(k){
vec <- cbind(constraint_values)[1:nc + (k-1)*nc,,drop=FALSE]
vec[1,] <- (vec[1,] - mean_y)
vec * c(1, expansion_scales) / sd_y
})
}
## With only one predictor, we really only need one penalty
if(ncol(predictors) == 1){
unique_penalty_per_predictor <- FALSE
}
if(verbose){
cat("Predictor-and-Partiiton Penalty Setup\n")
}
## Getting unique penalties for predictors/partitions, if not specified
invsoftplus_penalty_vec <- c()
if(unique_penalty_per_predictor & any(is.null(predictor_penalties))){
## Initialize
predictor_penalties <- sapply(colnm_expansions[c(power1_cols,
nonspline_cols)],
function(j)rnorm(1, 0, 0.00001))
names(predictor_penalties) <- paste0('predictor',
colnm_expansions[c(power1_cols,
nonspline_cols)])
invsoftplus_penalty_vec <- c(invsoftplus_penalty_vec, predictor_penalties)
} else if(unique_penalty_per_predictor){
if(length(predictor_penalties) !=
length(c(power1_cols, nonspline_cols))){
stop('\n \t Custom predictor_penalties is not the same length as number ',
'of predictors in model. The number of penalties should coincide with ',
'the number of predictors, if supplied. \n')
}
if(any(predictor_penalties <= 0)){
stop('\n \t All predictor_penalties must be > 0 if supplied. You can set',
' unique_penalty_per_predictor = FALSE to remove predictor penalties. \n')
}
names(predictor_penalties) <- paste0('predictor',
colnm_expansions[c(power1_cols,
nonspline_cols)])
invsoftplus_penalty_vec <- c(invsoftplus_penalty_vec,
log(exp(predictor_penalties)-1))
}
if(unique_penalty_per_partition & any(is.null(partition_penalties))){
## Initialize
partition_penalties <- sapply(1:(K+1),
function(j)rnorm(1, 0, 0.00001))
names(partition_penalties) <- paste0('partition', 1:(K+1))
invsoftplus_penalty_vec <- c(invsoftplus_penalty_vec, partition_penalties)
} else if(unique_penalty_per_partition){
if(length(partition_penalties) !=
(K+1)){
stop('\n \t Custom partition_penalties is not the same length as number ',
'of partitions in model. Try setting K manually, and ensuring that ',
'the length of partition_penalties = K + 1. \n')
}
if(any(partition_penalties <= 0)){
stop('\n \t All partition_penalties must be > 0 if supplied. You can set',
' unique_penalty_per_partition = FALSE to remove partition penalties. \n')
}
names(partition_penalties) <- paste0('partition', 1:(K+1))
invsoftplus_penalty_vec <- c(invsoftplus_penalty_vec,
log(exp(partition_penalties)-1))
}
if(verbose){
cat("Parallel and Weighting Setup\n")
}
## Export components for parallel processing
if(parallel && !is.null(cl)) {
## Create shared environment LOCALLY
shared_env <- new.env(parent = emptyenv())
## Assign key variables to shared environment
shared_vars <- list(
A = A,
nca = ncol(A),
K = K,
nc = nc,
nr = nr,
chunk_size = chunk_size,
num_chunks = num_chunks,
rem_chunks = rem_chunks,
invsoftplus_penalty_vec = invsoftplus_penalty_vec,
unique_penalty_per_partition = unique_penalty_per_partition,
keep_weighted_Lambda = keep_weighted_Lambda,
custom_penalty_mat = custom_penalty_mat,
glm_weight_function = glm_weight_function,
shur_correction_function = shur_correction_function,
unconstrained_fit_fxn = unconstrained_fit_fxn,
observation_weights = observation_weights
)
## Assign variables to the local shared_env
for(nm in names(shared_vars)) {
assign(nm, get(nm, envir = environment()), envir = shared_env)
}
## Export the locally defined shared environment
tryCatch({
parallel::clusterExport(cl, "shared_env", envir = environment())
}, error = function(e) {
stop("Failed to export 'shared_env' to cluster: ",
e$message, call. = FALSE)
})
## Export efficient matrix multiplication function
parallel::clusterExport(cl, "efficient_matrix_mult", envir = environment())
## Setup each cluster node with necessary functions/variables
parallel::clusterEvalQ(cl, {
`%**%` <- efficient_matrix_mult
## Load variables from the exported shared environment into global env
if (exists("shared_env")) {
list2env(as.list(shared_env), envir = .GlobalEnv)
} else {
warning("shared_env not found on worker node.")
}
})
}
## X^{T}WX
# Account for weights
if(( (paste0(family)[1] == 'gaussian' &
paste0(family)[2] == 'identity')) &
!homogenous_weights){
X <- lapply(1:(K+1), function(k){
X[[k]] * c(sqrt(observation_weights[[k]]))
})
}
X_gram <- compute_gram_block_diagonal(X,
parallel & parallel_matmult,
cl,
chunk_size,
num_chunks,
rem_chunks)
## Switch back after
if(( (paste0(family)[1] == 'gaussian' &
paste0(family)[2] == 'identity')) &
!homogenous_weights){
X <- lapply(1:(K+1), function(k){
X[[k]] / c(sqrt(observation_weights[[k]]))
})
}
if(verbose){
cat("SQP Setup\n")
}
## Update quadprog variable, if the correct arguments are made
if(qp_negative_derivative | qp_monotonic_decrease |
qp_positive_derivative | qp_monotonic_increase |
any(!(is.null(qp_range_upper))) |
any(!(is.null(qp_range_lower))) |
(any(!is.null(qp_Amat_fxn)) &
any(!is.null(qp_bvec_fxn)) &
any(!is.null(qp_meq_fxn))) |
(any(!is.null(qp_Amat)) &
any(!is.null(qp_bvec)) &
any(!is.null(qp_meq)))){
quadprog <- TRUE
} else {
quadprog <- FALSE
}
## Quadprog setup
if(quadprog){
## Initialize empty constraint lists
qp_Amat_list <- list()
qp_bvec_list <- list()
qp_meq_list <- list()
## Big-matrix components (not memory efficient anymore)
X_block <- Reduce("rbind", lapply(1:(K+1), function(k){
dummy <- 0*X[[k]]
Reduce("cbind",lapply(1:(K+1),function(j){
if(nrow(X[[k]]) == 0){
return(X[[k]])
} else if(j == k) X[[k]] else 0*X[[k]]
}))
}))
## Get observations to apply qp constraints to
if(any(!is.null(qp_observations))){
## The order of observations has changed following partitioning,
# so the code below accounts for this
qp_observations <- try(c(qp_observations), silent = TRUE)
if(any(inherits(qp_observations, 'try-error'))){
stop('\n \t qp_observations must coercible to a numeric vector \n')
}
X_block <- X_block[c(unlist(order_list)) %in% qp_observations,,drop=FALSE]
}
## Constraints on range of fitted values
if(!(any(is.null(qp_range_upper))) | !any(is.null(qp_range_lower))){
## Account for link-function transforms
if(paste0(family)[2] != 'identity'){
if(any(!is.null(qp_range_upper))){
qp_range_upper <- family$linkfun(qp_range_upper)
}
if(any(!is.null(qp_range_lower))){
qp_range_lower <- family$linkfun(qp_range_lower)
}
if(any(is.na(c(qp_range_upper, qp_range_lower))) |
any(!is.finite(c(qp_range_upper,
qp_range_lower)))){
stop('\n\tQuadratic programming upper/lower range constraints ',
'are NA or not finite after link function transformation. ',
'Supply bounds on raw-response scale, which are in-range of ',
'link function transformations.\n')
}
}
## Both upper and lower
if(!(any(is.null(qp_range_upper))) & !any(is.null(qp_range_lower))){
qp_Amat <- cbind(t(X_block), -t(X_block))
if(length(qp_range_lower) == 1){
## If only single-bounds are given,
# then use unique values of qp Amatrix
if(length(qp_range_upper == 1)){
qp_Amat <- t(unique(t(qp_Amat)))
}
qp_bvec <- rep(qp_range_lower, ncol(qp_Amat)/2)
} else {
qp_bvec <- qp_range_lower
}
## Don't forget y is standardized
qp_bvec_lower <- (qp_bvec - mean_y) / sd_y
if(length(qp_range_upper) == 1){
qp_bvec <- rep(qp_range_upper, ncol(qp_Amat)/2)
} else {
qp_bvec <- qp_range_upper
}
## Don't forget y is standardized
qp_bvec_upper <- -(qp_bvec - mean_y) / sd_y
## Combine
qp_bvec <- c(qp_bvec_lower, qp_bvec_upper)
## Append
qp_Amat_list[[length(qp_Amat_list) + 1]] <- qp_Amat
qp_bvec_list[[length(qp_bvec_list) + 1]] <- qp_bvec
qp_meq_list[[length(qp_meq_list) + 1]] <- 0
## Just upper
} else if(!(any(is.null(qp_range_upper)))){
qp_Amat <- -t(X_block)
if(length(qp_range_upper) == 1){
qp_Amat <- t(unique(t(qp_Amat)))
qp_bvec <- rep(qp_range_upper, ncol(qp_Amat))
} else {
qp_bvec <- qp_range_upper
}
## Don't forget y is standardized
qp_bvec <- -(qp_bvec - mean_y) / sd_y
## Append
qp_Amat_list[[length(qp_Amat_list) + 1]] <- qp_Amat
qp_bvec_list[[length(qp_bvec_list) + 1]] <- qp_bvec
qp_meq_list[[length(qp_meq_list) + 1]] <- 0
## Just lower
} else if(!any(is.null(qp_range_lower))){
qp_Amat <- t(X_block)
if(length(qp_range_lower) == 1){
qp_Amat <- t(unique(t(qp_Amat)))
qp_bvec <- rep(qp_range_lower, ncol(qp_Amat))
} else {
qp_bvec <- qp_range_lower
}
## Don't forget y is standardized
qp_bvec <- (qp_bvec - mean_y) / sd_y
## Append
qp_Amat_list[[length(qp_Amat_list) + 1]] <- qp_Amat
qp_bvec_list[[length(qp_bvec_list) + 1]] <- qp_bvec
qp_meq_list[[length(qp_meq_list) + 1]] <- 0
}
}
## Derivative constraints
if(qp_positive_derivative){
## Ensure first derivatives are positive
derivs <- make_derivative_matrix(
nc, # number of columns
X_block, # design matrix
power1_cols, # linear term columns
power2_cols, # quadratic term columns
nonspline_cols, # non-spline effects
interaction_single_cols, # single interaction columns
interaction_quad_cols, # quadratic interaction columns
triplet_cols, # triplet interaction columns
K, # number of knots
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
colnm_expansions, # column names
expansion_scales, # scaling
just_first_derivatives = TRUE,
just_spline_effects = FALSE
)
## Extract first derivatives for each variable
first_derivative_constraints <- Reduce("rbind",
lapply(derivs$first_derivative, function(deriv_matrix) {
## Ensure non-negative first derivatives for monotonic increasing
t(Reduce('rbind', lapply(1:nrow(deriv_matrix), function(i) {
matrix(c(deriv_matrix[i,]), nrow = 1) # enforce non-negativity
})))
})
)
## Construct constraint matrix for quadprog
qp_Amat <- cbind(first_derivative_constraints)
qp_Amat <- t(unique(t(qp_Amat)))
qp_bvec <- rep(0, ncol(qp_Amat))
## Append
qp_Amat_list[[length(qp_Amat_list) + 1]] <- qp_Amat
qp_bvec_list[[length(qp_bvec_list) + 1]] <- qp_bvec
qp_meq_list[[length(qp_meq_list) + 1]] <- 0
} else if(qp_negative_derivative){
## Compute first derivative matrix
derivs <- make_derivative_matrix(
nc, # number of columns
X_block, # design matrix
power1_cols, # linear term columns
power2_cols, # quadratic term columns
nonspline_cols, # non-spline effects
interaction_single_cols, # single interaction columns
interaction_quad_cols, # quadratic interaction columns
triplet_cols, # triplet interaction columns
K, # number of knots
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
colnm_expansions, # column names
expansion_scales, # scaling
just_first_derivatives = TRUE,
just_spline_effects = FALSE
)
## Extract first derivatives for each variable
first_derivative_constraints <- Reduce("rbind",
lapply(derivs$first_derivative, function(deriv_matrix) {
## Ensure non-positive first derivatives for monotonic decreasing
t(Reduce('rbind', lapply(1:nrow(deriv_matrix), function(i) {
-matrix(c(deriv_matrix[i,]), nrow = 1) # enforce non-positive
})))
})
)
## Construct constraint matrices for quadprog
qp_Amat <- cbind(first_derivative_constraints)
qp_Amat <- t(unique(t(qp_Amat)))
qp_bvec <- rep(0, ncol(qp_Amat))
## Append
qp_Amat_list[[length(qp_Amat_list) + 1]] <- qp_Amat
qp_bvec_list[[length(qp_bvec_list) + 1]] <- qp_bvec
qp_meq_list[[length(qp_meq_list) + 1]] <- 0
}
## Monotonic increasing constraint
if(qp_monotonic_increase){
## First, create constraints for fitted values
value_constraints <- t(Reduce('rbind', lapply(2:nrow(X_block),
function(i) {
matrix(c(X_block[i,] - X_block[i-1,]), nrow = 1)
})))
## Construct constraint matrix for quadprog
qp_Amat <- cbind(value_constraints)
qp_Amat <- t(unique(t(qp_Amat)))
qp_bvec <- rep(0, ncol(qp_Amat))
## Append
qp_Amat_list[[length(qp_Amat_list) + 1]] <- qp_Amat
qp_bvec_list[[length(qp_bvec_list) + 1]] <- qp_bvec
qp_meq_list[[length(qp_meq_list) + 1]] <- 0
## Monotonic decreasing constraint
} else if(qp_monotonic_decrease){
## First, create constraints for fitted values
value_constraints <- -t(Reduce('rbind', lapply(2:nrow(X_block),
function(i) {
matrix(c(X_block[i,] - X_block[i-1,]), nrow = 1)
})))
## Construct constraint matrices for quadprog
qp_Amat <- cbind(value_constraints)
qp_Amat <- t(unique(t(qp_Amat)))
qp_bvec <- rep(0, ncol(qp_Amat))
## Append
qp_Amat_list[[length(qp_Amat_list) + 1]] <- qp_Amat
qp_bvec_list[[length(qp_bvec_list) + 1]] <- qp_bvec
qp_meq_list[[length(qp_meq_list) + 1]] <- 0
}
## Custom constraints
if(!is.null(qp_Amat_fxn) &
!is.null(qp_bvec_fxn) &
!is.null(qp_meq_fxn)){
qp_Amat <- qp_Amat_fxn(nr,
nc,
K,
X_block,
colnm_expansions,
expansion_scales,
all_derivatives,
...)
qp_bvec <- qp_bvec_fxn(qp_Amat,
nr,
nc,
K,
X_block,
colnm_expansions,
expansion_scales,
all_derivatives,
...)
qp_meq <- qp_meq_fxn(qp_Amat,
nr,
nc,
K,
X_block,
colnm_expansions,
expansion_scales,
all_derivatives,
...)
## Append
qp_Amat_list[[length(qp_Amat_list) + 1]] <- qp_Amat
qp_bvec_list[[length(qp_bvec_list) + 1]] <- qp_bvec
qp_meq_list[[length(qp_meq_list) + 1]] <- qp_meq
}
## Combined and overwrite constraint matrices/vectors
qp_Amat <- do.call(cbind, qp_Amat_list)
qp_bvec <- do.call(c, qp_bvec_list)
qp_meq <- sum(unlist(qp_meq_list))
## Reduce memory constraint
X_block <- NULL
}
## Return basis-expansions only and partitioned y without model fitting
# and associated components
if(expansions_only){
if(verbose) cat('Expansions Only Output Prep\n')
if(K == 0){
knots <- NULL
} else if(qcols == 1) {
knots <- inv_transf(knot_values)
}
if(quadprog){
quadprog_list <- list(
qp_Amat = qp_Amat,
qp_bvec = qp_bvec,
qp_meq = qp_meq
)
} else {
quadprog_list <- list(NA)
}
return(list(
X = X,
y = y,
A = A,
penalties = compute_Lambda(custom_penalty_mat,
smoothing_spline_penalty,
wiggle_penalty,
flat_ridge_penalty,
K,
nc,
unique_penalty_per_predictor,
unique_penalty_per_partition,
log(1+exp(invsoftplus_penalty_vec)),
colnm_expansions,
just_Lambda = FALSE),
order_list = order_list,
og_order = og_order,
expansion_scales = expansion_scales,
colnm_expansions = colnm_expansions,
K = K,
knots = knots,
make_partition_list = partitions,
partition_codes = partition_codes,
partition_bounds = partition_bounds,
constraint_vectors = constraint_vectors,
constraint_values = constraint_values,
quadprog_list = quadprog_list
))
}
if(verbose){
cat("Tune Smoothing Spline Penalty\n")
}
## This is to incorporate weights efficiently for linear regression outcomes
# Remember to back-transform after fitting B for the final time
if(( (paste0(family)[1] == 'gaussian' &
paste0(family)[2] == 'identity')) &
!homogenous_weights){
X <- lapply(1:(K+1), function(k){
X[[k]] * c(sqrt(observation_weights[[k]]))
})
}
if(((paste0(family)[1] == 'gaussian' &
paste0(family)[2] == 'identity')) &
!homogenous_weights){
y <- lapply(1:(K+1), function(k){
y[[k]] * c(sqrt(observation_weights[[k]]))
})
}
## Model components
if(!(!any(is.null(previously_tuned_penalties)))){
## Prior precision Lambda
tL <- try({
tune_Lambda(
y,
X,
X_gram,
smoothing_spline_penalty,
A,
K,
nc,
nr,
opt,
use_custom_bfgs,
C,
colnm_expansions,
wiggle_penalty,
flat_ridge_penalty,
invsoftplus_initial_wiggle,
invsoftplus_initial_flat,
unique_penalty_per_predictor,
unique_penalty_per_partition,
invsoftplus_penalty_vec,
meta_penalty,
family,
unconstrained_fit_fxn,
keep_weighted_Lambda,
iterate_tune,
qp_score_function,
quadprog,
qp_Amat,
qp_bvec,
qp_meq,
tol,
sd_y,
delta,
constraint_values,
parallel,
parallel_eigen,
parallel_trace,
parallel_aga,
parallel_matmult,
parallel_unconstrained,
cl,
chunk_size,
num_chunks,
rem_chunks,
shared_env,
custom_penalty_mat,
order_list,
glm_weight_function,
shur_correction_function,
need_dispersion_for_estimation,
dispersion_function,
observation_weights,
homogenous_weights,
blockfit,
just_linear_without_interactions,
Vhalf,
VhalfInv,
verbose_tune,
include_warnings,
...)}, silent = TRUE)
if(inherits(tL, 'try-error')){
if(include_warnings) print(tL)
return(tL)
}
} else {
## Use previously-submitted Lambda
tL <- previously_tuned_penalties
previously_tuned_penalties <- NULL
}
flat_ridge_penalty <- tL$flat_ridge_penalty
wiggle_penalty <- tL$wiggle_penalty
if(verbose){
cat("Prep for final fitting\n")
}
## Final fit
if(K == 0){
## ensuring compatibility with no A
if(any(is.null(A))){
## for compatibility, albeit inefficient
A <- cbind(rep(0, (K+1)*nc))
A <- cbind(A, A)
nca <- 2
}
}
Xy <- vectorproduct_block_diagonal(X, y, K)
shur_corrections <- lapply(1:(K+1), function(k)0)
G_list <-
compute_G_eigen(X_gram,
tL$Lambda,
K,
parallel & parallel_eigen,
cl,
chunk_size,
num_chunks,
rem_chunks,
family,
unique_penalty_per_partition,
tL$L_partition_list,
keep_G = (return_G |
return_U |
estimate_dispersion |
return_varcovmat),
shur_corrections)
if(verbose){
cat('Last fit\n')
}
## Get coefficient and correlation matrix estimates
return_G_getB <- TRUE
B_list <-
try({get_B(
X,
X_gram,
tL$Lambda,
keep_weighted_Lambda,
unique_penalty_per_partition,
tL$L_partition_list,
A,
Xy,
y,
K,
nc,
nca,
G_list$Ghalf,
G_list$GhalfInv,
parallel & parallel_eigen,
parallel & parallel_aga,
parallel & parallel_matmult,
parallel & parallel_unconstrained,
cl,
chunk_size,
num_chunks,
rem_chunks,
family,
unconstrained_fit_fxn,
iterate_final_fit,
qp_score_function,
quadprog,
qp_Amat,
qp_bvec,
qp_meq,
prevB = NULL,
prevUnconB = NULL,
iter_count = 0,
prev_diff = Inf,
tol,
constraint_values,
order_list,
glm_weight_function,
shur_correction_function,
need_dispersion_for_estimation,
dispersion_function,
observation_weights,
homogenous_weights,
return_G_getB,
blockfit,
just_linear_without_interactions,
Vhalf,
VhalfInv,
...)}, silent = TRUE)
if(any(inherits(B_list, 'try-error'))){
if(include_warnings) print(B_list)
stop('\n \t Failure in fitting final model \n')
}
B <- B_list$B
G_list <- B_list$G_list
## This is backtransforming from earlier,
# if we have Gaussian weighted response
if(( (paste0(family)[1] == 'gaussian' &
paste0(family)[2] == 'identity')) &
!homogenous_weights){
X <- lapply(1:(K+1), function(k){
X[[k]] / c(sqrt(observation_weights[[k]]))
})
}
if(((paste0(family)[1] == 'gaussian' &
paste0(family)[2] == 'identity')) &
!homogenous_weights){
y <- lapply(1:(K+1), function(k){
y[[k]] / c(sqrt(observation_weights[[k]]))
})
}
## Get original design matrix now, and original y expansions, after fitting
# with VhalfInv NOT involved
if(!is.null(VhalfInv)){
X <- X_expand_og
y <- y_expand_og
X_expand_og <- NULL
y_expand_og <- NULL
}
if(verbose){
cat("After Fitting Processing \n")
}
## For assigning out of data clusters
if(length(c(numerics, nonspline_cols)) > 1 & K > 0){
assign_partition <- partitions$assign_partition
} else if(length(numerics) == 1 & K > 0){
assign_partition <- function(x)rowMeans(cbind(x))
} else {
assign_partition <- function(x)0.5
}
## Raw coefficients, useful for incorporation into Bayesian techniques
# i.e. we generate draws on the raw scale, since G and U are constructed
# on raw scale, then backtransform in the function generate_posterior()
B_raw <- B
## Un-scale, based on centered-and-scaled y
B <- lapply(B, function(b)b * sd_y) # multiply all by sd of y
## Then add mean of y to all intercepts
B <- lapply(1:(K+1), function(k){
b <- B[[k]]
b[1] <-
b[1] + mean_y
b
})
## Rename B coefficients for interpretability,
# adjust for unstandardized predictors
B <- lapply(1:(K+1),function(k){
B[[k]] <- backtransform_coefficients(B[[k]])
names(B[[k]]) <- colnm_expansions
B[[k]]
})
names(B) <- paste0('partition',1:(K+1))
## Predict function for new data
predict_function <- function(new_predictors = predictors,
parallel = FALSE,
cl = NULL,
chunk_size = NULL,
num_chunks = NULL,
rem_chunks = NULL,
B_predict = B,
take_first_derivatives = FALSE,
take_second_derivatives = FALSE,
expansions_only = FALSE){
## Check compatibility, that new_predictors should be a matrix
if(any(!is.null(new_predictors))){
new_predictors <- try(methods::as(cbind(new_predictors), 'matrix'), silent = TRUE)
if(any(inherits(new_predictors, 'try-error'))){
stop('\n \t New predictors should be able to be coerced into matrix form. \n')
}
}
## Avoid R rbind issue with 1 row only for certain internal functions
if(nrow(new_predictors) == 1){
new_predictors <- rbind(new_predictors, new_predictors)
only_1 <- TRUE
} else {
only_1 <- FALSE
}
## Accept predictors as matrix
new_predictors <- transf(methods::as(new_predictors, 'matrix'))
## Needed for determining knot locations,
# compute l2-norms of rows of standardized columns
partition_codes_new <- assign_partition(new_predictors)
## Back transform, now that knots have been established
new_predictors <- inv_transf(new_predictors)
## Cubic/polynomial expansions
C_new <- get_polynomial_expansions(new_predictors,
numerics,
just_linear_with_interactions,
just_linear_without_interactions,
exclude_interactions_for,
include_quadratic_terms,
include_cubic_terms,
include_quartic_terms,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
exclude_these_expansions,
custom_basis_fxn,
...)
## Knot expansions
X_new <- knot_expand_list(
partition_codes_new,
partition_bounds,
length(partition_codes_new),
C_new,
K
)
## If just the expansions are desired
if(expansions_only){
if(only_1){
partition_codes_new <- partition_codes_new[1]
C_new <- C_new[1, , drop=FALSE]
X_new <- lapply(X_new,function(x){
if(!any(is.null(x))){
if(nrow(x) == 2){
return(x[1,,drop=FALSE])
} else {
x
}
} else{
x
}
})
}
return(list("expansions" = X_new,
"partition_codes" = partition_codes_new,
"partition_bounds" = partition_bounds))
}
## Re-order predictions after
order_list <- knot_expand_list(
partition_codes_new,
partition_bounds,
length(partition_codes_new),
cbind(1:nrow(C_new)),
K)
## Only use relevant blocks
keep_blocks <- which(sapply(1:(K+1),function(k){
nrow(X_new[[k]]) > 0
}))
order_list <- order_list[keep_blocks]
## Predictions
preds <-
unlist(
matmult_block_diagonal(
X_new[keep_blocks],
B_predict[keep_blocks],
length(keep_blocks) - 1,
parallel,
cl,
chunk_size,
num_chunks,
rem_chunks))[order(unlist(order_list))]
if(only_1){
preds <- preds[1]
}
final_preds <- family$linkinv(preds)
## If returning derivatives
if(take_first_derivatives | take_second_derivatives){
derivs <- make_derivative_matrix(
nc,
C_new,
power1_cols,
power2_cols,
nonspline_cols,
interaction_single_cols,
interaction_quad_cols,
triplet_cols,
K,
include_2way_interactions,
include_3way_interactions,
include_quadratic_interactions,
colnm_expansions,
expansion_scales,
!take_second_derivatives)
if(only_1){
partition_codes_new <- partition_codes_new[1]
}
## Account for derivatives of link transform
if (is.null(family$linkinvderiv) || is.null(family$linkinvderiv2)) {
if (family$link == 'inverse') {
family$linkinvderiv <- function(mu) 1 / mu # First derivative
family$linkinvderiv2 <- function(mu) 2 / mu^3 # Second derivative
} else if (family$link == 'logit') {
family$linkinvderiv <- function(mu) mu * (1 - mu) # First derivative
family$linkinvderiv2 <- function(mu) mu * (1 - mu) * (1 - 2 * mu) # Second derivative
} else if (family$link == 'log') {
family$linkinvderiv <- function(mu) mu # First derivative
family$linkinvderiv2 <- function(mu) mu # Second derivative
} else if (family$link == 'identity') {
family$linkinvderiv <- function(mu) 1 # First derivative
family$linkinvderiv2 <- function(mu) 0 # Second derivative
} else if (family$link == 'probit') {
family$linkinvderiv <- function(mu) dnorm(qnorm(mu)) # First derivative
family$linkinvderiv2 <- function(mu) -dnorm(qnorm(mu)) * qnorm(mu) # Second derivative
} else if (family$link == 'sqrt') {
family$linkinvderiv <- function(mu) 2 * sqrt(mu) # First derivative
family$linkinvderiv2 <- function(mu) mu^(-1/2) # Second derivative
} else if (family$link == 'inverse.sqrt') {
family$linkinvderiv <- function(mu) 2 * mu^(3/2) # First derivative
family$linkinvderiv2 <- function(mu) 3 * mu^(1/2) # Second derivative
} else if (family$link == 'cloglog') {
family$linkinvderiv <- function(mu) -1 / log(1 - mu) * (1 - mu) # First derivative
family$linkinvderiv2 <- function(mu) 2 / (log(1 - mu))^2 * (1 - mu) # Second derivative
} else if (family$link == 'cauchit') {
family$linkinvderiv <- function(mu) pi * (1 + (qcauchy(mu))^2) # First derivative
family$linkinvderiv2 <- function(mu) -2 * pi * qcauchy(mu) * dcauchy(qcauchy(mu)) # Second derivative
} else if (family$link == 'log1p') {
family$linkinvderiv <- function(mu) 1 / (1 + mu) # First derivative
family$linkinvderiv2 <- function(mu) -1 / (1 + mu)^2 # Second derivative
} else {
if (include_warnings) {
warning('\n\t',
'Link function not recognized: supply a custom "linkinvderiv"',
' function to your custom "family" object to properly compute ',
'derivatives accounting for link function transforms for GLMs.',
' The derivatives will be returned on the link-transformed ',
'scale.\n'
)
}
family$linkinvderiv <- function(mu) 1 # Default first derivative
family$linkinvderiv2 <- function(mu) 0 # Default second derivative
}
}
if(take_first_derivatives | take_second_derivatives){
Cprime_new <- Reduce("rbind",
lapply(1:length(derivs$first_derivative),
function(var){
d <- derivs$first_derivative[[var]]
if(only_1){
return(d[1,,drop=FALSE])
} else{
return(d)
}
}))
## Knot expansions
Xprime_new <- knot_expand_list(
partition_codes_new,
partition_bounds,
length(partition_codes_new),
Cprime_new,
K
)
## Derivative of predictions
preds_prime <-
unlist(
matmult_block_diagonal(
Xprime_new[keep_blocks],
B_predict[keep_blocks],
length(keep_blocks) - 1,
parallel,
cl,
chunk_size,
num_chunks,
rem_chunks))
## Return g'(f(t))f'(t)
final_preds_prime <- family$linkinvderiv(final_preds) * preds_prime
} else {
final_preds_prime <- NULL
}
## If returning second derivatives
if(take_second_derivatives){
Cdprime_new <- Reduce("rbind",
lapply(1:length(derivs$second_derivative),
function(var){
d <- derivs$second_derivative[[var]]
if(only_1){
return(d[1,,drop=FALSE])
} else{
return(d)
}
}))
## Knot expansions of second derivatives
Xdprime_new <- knot_expand_list(
partition_codes_new,
partition_bounds,
length(partition_codes_new),
Cdprime_new,
K
)
## Second derivative of predictions
preds_dprime <-
unlist(
matmult_block_diagonal(
Xdprime_new[keep_blocks],
B_predict[keep_blocks],
length(keep_blocks) - 1,
parallel,
cl,
chunk_size,
num_chunks,
rem_chunks))
## Return g''(t)*f'(t)^2 + g'(t)*f''(t)
final_preds_dprime <-
family$linkinvderiv2(final_preds)*preds_prime^2 +
family$linkinvderiv(final_preds)*preds_dprime
} else {
final_preds_dprime <- NULL
}
return(list(
preds = final_preds,
first_deriv = final_preds_prime,
second_deriv = final_preds_dprime
))
} else {
return(final_preds)
}
}
## Get fitted values
ytilde <- predict_function()
## Clean knots, back transform to raw-scale
if(K == 0){
knots <- NULL
} else {
knots <- inv_transf(knot_values)
if(length(numerics) == 1 & length(nonspline_cols) == 0){
rownames(knots) <- paste0(1:K, '_', 2:(K+1))
}
}
## For compatibility without knots
if(K == 0){
knots <- NULL
} else if(qcols == 1) {
knots <- inv_transf(knot_values)
}
## For saving quadratic programming components
if(quadprog){
quadprog_list <- list(
qp_Amat = qp_Amat,
qp_bvec = qp_bvec,
qp_meq = qp_meq
)
} else {
quadprog_list <- list(NA)
}
qp_Amat <- NULL
qp_bvec <- NULL
qp_meq <- NULL
## List of items to return
return_list <- list("y" = y_og,
"ytilde" = ytilde,
"X" = X,
"A" = A,
"B" = B,
"B_raw" = B_raw,
"K" = K,
"p" = nc,
"q" = ncol(predictors),
"P" = (K+1)*nc,
"N" = nr,
"penalties" = tL,
"knot_scale_transf" = transf,
"knot_scale_inv_transf" = inv_transf,
"knots" = knots,
"partition_codes" = partition_codes,
"knot_expand_function" = knot_expand_list,
"predict" = predict_function,
"assign_partition" = assign_partition,
"family" = family,
"estimate_dispersion" = estimate_dispersion,
"unbias_dispersion" = unbias_dispersion,
"backtransform_coefficients" = backtransform_coefficients,
"forwtransform_coefficients" = forwtransform_coefficients,
"mean_y" = mean_y,
"sd_y" = sd_y,
"og_order" = og_order,
"order_list" = order_list,
"constraint_values" = constraint_values,
"constraint_vectors" = constraint_vectors,
"make_partition_list" = partitions,
"expansion_scales" = expansion_scales,
"take_derivative" = take_derivative,
"take_interaction_2ndderivative" =
take_interaction_2ndderivative,
"get_all_derivatives_insample" = function(expansions){
all_derivatives(expansions)},
"numerics" = numerics,
"power1_cols" = power1_cols,
"power2_cols" = power2_cols,
"power3_cols" = power3_cols,
"power4_cols" = power4_cols,
"quad_cols" = quad_cols,
"interaction_single_cols" = interaction_single_cols,
"interaction_quad_cols" = interaction_quad_cols,
"triplet_cols" = triplet_cols,
"nonspline_cols" = nonspline_cols,
"return_varcovmat" = return_varcovmat,
"raw_expansion_names" = colnm_expansions,
"std_X" = std_X,
"unstd_X" = unstd_X,
"parallel_cluster_supplied" = parallel,
"weights" = observation_weights_og,
"VhalfInv" = VhalfInv,
"quadprog_list" = quadprog_list)
if(verbose){
cat("Optional Components\n")
}
## We need U and sigma^2 to compute sigma^2*UG
if(return_varcovmat){
return_U <- TRUE
estimate_dispersion <- TRUE
}
## Option is offered to not return these matrices to save memory/time
## Return scaled variance-covariance matrix components of coefficients
# Note: these are on centered-and-scaled y, standardized-X scale
# Backtransforms are needed to get the varcov on raw scale
# An option provided below
if(return_G){
return_list$G <- G_list$G
}
if(return_Ghalf){
return_list$Ghalf <- G_list$Ghalf
}
if(return_U){
if(verbose){
cat("U\n")
}
if(K == 0 & length(constraint_values) == 0){
return_list$U <- diag(nc*(K+1))
## ensuring compatibility with no A
if(any(is.null(A))){
## for compatibility, albeit inefficient
A <- cbind(rep(0, (K+1)*nc))
A <- cbind(A, A)
nca <- 2
}
} else {
return_list$U <- get_U(
G_list$G,
A,
K,
nc,
nca
)
}
}
## Estimate sigma^2
if(estimate_dispersion){
if(verbose){
cat("Variance Est \n")
}
## Compute trace of XUGX^{T} = trace of UGX^{T}X
if(K == 0){
trace_XUGX <- sum(unlist(sapply(
matmult_block_diagonal(
G_list$G,
X_gram,
K,
parallel =
FALSE,
cl = NULL,
chunk_size,
num_chunks,
rem_chunks),
diag)))
} else {
## Equivalent commented out for reference
# if(!is.null(return_list$U)){
# UG <- matmult_U(
# return_list$U,
# G_list$G,
# nc,
# K
# )
# UGXX <- matmult_U(UG,
# X_gram,
# nc,
# K)
# trace_XUGX <- sum(diag(UGXX))
# UGXX <- NULL
# } else {
## Compute trace
trace_XUGX <- compute_trace_UGXX_wrapper(
G_list$G,
A,
# GX^{T}X
matmult_block_diagonal(G_list$G,
X_gram,
K,
parallel = parallel & parallel_matmult,
cl = cl,
chunk_size,
num_chunks,
rem_chunks),
# (A^{T}GA)^{-1}
invert(AGAmult_wrapper(G_list$G,
A,
K,
nc,
nca,
parallel = parallel & parallel_aga,
cl = cl,
chunk_size,
num_chunks,
rem_chunks)),
nc,
nca,
K,
parallel = FALSE,
cl = cl,
chunk_size,
num_chunks,
rem_chunks)
}
#}
if(trace_XUGX < 0 & include_warnings){
warning('\n \t Trace of XUGX^{T} is < 0, which most often indicates a',
' failure of convergence when fitting (i.e. the constrained ',
'maximum likelihood estimate was not found). Try re-fitting, ',
'different knot locations, greater penalties, or a less ',
'complicated model. Alteratively, try to recompute the trace ',
'manually using XUGUX^{T} instead. \n')
}
## Determines a scaling effect on dispersion estimate
if(unbias_dispersion){
scale_by <- nr/(nr - trace_XUGX)
} else {
scale_by <- 1
}
## Estimating exponential dispersion or variance
if(paste0(family)[1] == 'gaussian' & paste0(family)[2] == 'identity'){
if(!is.null(VhalfInv)){
## Dispersion estimate (variance for Gaussian family) with correlation
return_list$sigmasq_tilde <-
mean((observation_weights_og *
(VhalfInv %**% cbind(y_og - ytilde)))^2) *
scale_by
} else {
## Dispersion estimate (variance for Gaussian family) no correlation
return_list$sigmasq_tilde <-
mean(observation_weights_og * (y_og - ytilde)^2) *
scale_by
}
} else {
if(!is.null(VhalfInv)){
## Dispersion estimate (using custom function)
return_list$sigmasq_tilde <- dispersion_function(
VhalfInv %**% cbind(ytilde),
VhalfInv %**% cbind(y_og),
1:length(y_og), # this is original order!
family,
observation_weights_og,
...
) * scale_by
} else {
## Dispersion estimate (using custom function)
return_list$sigmasq_tilde <- dispersion_function(
ytilde,
y_og,
1:length(y_og), # this is original order!
family,
observation_weights_og,
...
) * scale_by
}
}
## Effective degrees of freedom is the trace, when we have penalization
return_list$trace_XUGX <- trace_XUGX
} else {
## Otherwise, return 1 for dispersion
return_list$sigmasq_tilde <- 1
}
if(return_varcovmat){
if(verbose){
cat("VarCov Mat \n")
}
## Use UGU^{T} parameterization rather than just UG for numeric stability
# and ensuring symmetry/positive-definiteness
return_list$varcovmat <-
matmult_U(return_list$U, G_list$G, nc, K) %**%
t(return_list$U)
## Un-standardize
d <- rep(c(1, 1/expansion_scales), each = K + 1)
return_list$varcovmat <-
return_list$sigmasq_tilde *
t(t(return_list$varcovmat * d) * d)
## Replace < 0 diagonals with 0
if(any(diag(return_list$varcovmat) < 0) & include_warnings){
warning("\n \t Variance-covariance matrix has diagonal elements < 0,",
" model most likely did not converge when fitting. Try ",
"re-fitting, a simpler model, changing knot locations, or ",
"increasing the penalties. \n")
for(ij in 1:nrow(return_list$varcovmat)){
return_list$varcovmat[ij,ij] <- max(0,
return_list$varcovmat[ij, ij])
}
}
}
## Afterwards, update X to be unstandardized
return_list$X <- lapply(return_list$X,
unstd_X)
return(return_list)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.