R/monotonic.R

Defines functions smooth.construct.moi.smooth.spec

Documented in smooth.construct.moi.smooth.spec

#' Monotonic splines in \pkg{mvgam} models
#'
#' Uses constructors from package \pkg{splines2} to build monotonically increasing
#' or decreasing splines. Details also in Wang & Yan (2021).
#'
#' @inheritParams mgcv::smooth.construct.bs.smooth.spec
#' @param object A smooth specification object, usually generated by a term
#' `s(x, bs = "moi", ...)` or `s(x, bs = "mod", ...)`
#'
#' @details The constructor is not normally called directly,
#' but is rather used internally by [mvgam]. If they are not supplied then the
#' knots of the spline are placed evenly throughout the covariate values to
#' which the term refers: For example, if fitting 101 data with an 11
#' knot spline of x then there would be a knot at every 10th (ordered) x value.
#' The spline is an implementation of the closed-form I-spline basis based
#' on the recursion formula given by Ramsay (1988), in which the basis coefficients
#' must be constrained to either be non-negative (for monotonically increasing
#' functions) or non-positive (monotonically decreasing)
#' \cr
#' \cr
#' Take note that when using either monotonic basis, the number of basis functions
#' `k` must be supplied as an even integer due to the manner in
#' which monotonic basis functions are constructed
#'
#' @return An object of class `"moi.smooth"` or `"mod.smooth"`. In addition to
#' the usual elements of a smooth class documented under \code{\link[mgcv]{smooth.construct}},
#' this object will contain a slot called `boundary` that defines the endpoints beyond
#' which the spline will begin extrapolating (extrapolation is flat due to the first
#' order penalty placed on the smooth function)
#'
#' @note This constructor will result in a valid smooth if using a call to
#' \code{\link[mgcv]{gam}} or \code{\link[mgcv]{bam}}, however the resulting
#' functions will not be guaranteed to be monotonic because constraints on
#' basis coefficients will not be enforced
#'
#' @references
#' Wang, Wenjie, and Jun Yan. "Shape-Restricted Regression Splines with R Package splines2."
#' Journal of Data Science 19.3 (2021).
#' \cr
#' \cr
#' Ramsay, J. O. (1988). Monotone regression splines in action. Statistical Science, 3(4), 425--441.
#' @importFrom mgcv smooth.construct
#' @export
#' @author Nicholas J Clark
#' @name monotonic
#' @examples
#' \donttest{
#' # Simulate data from a monotonically increasing function
#' set.seed(123123)
#' x <- runif(80) * 4 - 1
#' x <- sort(x)
#' f <- exp(4 * x) / (1 + exp(4 * x))
#' y <- f + rnorm(80) * 0.1
#' plot(x, y)
#'
#' # A standard TRPS smooth doesn't capture monotonicity
#' library(mgcv)
#' mod_data <- data.frame(y = y, x = x)
#' mod <- gam(y ~ s(x, k = 16),
#'            data = mod_data,
#'            family = gaussian())
#'
#' library(marginaleffects)
#' plot_predictions(mod,
#'                  by = 'x',
#'                  newdata = data.frame(x = seq(min(x) - 0.5,
#'                                               max(x) + 0.5,
#'                                               length.out = 100)),
#'                  points = 0.5)
#'
#' # Using the 'moi' basis in mvgam rectifies this
#' mod_data$time <- 1:NROW(mod_data)
#' mod2 <- mvgam(y ~ s(x, bs = 'moi', k = 18),
#'              data = mod_data,
#'              family = gaussian(),
#'              chains = 2,
#'              silent = 2)
#'
#' plot_predictions(mod2,
#'                  by = 'x',
#'                  newdata = data.frame(x = seq(min(x) - 0.5,
#'                                               max(x) + 0.5,
#'                                               length.out = 100)),
#'                  points = 0.5)
#'
#' plot(mod2, type = 'smooth', realisations = TRUE)
#'
#' # 'by' terms that produce a different smooth for each level of the 'by'
#' # factor are also allowed
#' set.seed(123123)
#' x <- runif(80) * 4 - 1
#' x <- sort(x)
#'
#' # Two different monotonic smooths, one for each factor level
#' f <- exp(4 * x) / (1 + exp(4 * x))
#' f2 <- exp(3.5 * x) / (1 + exp(3 * x))
#' fac <- c(rep('a', 80), rep('b', 80))
#' y <- c(f + rnorm(80) * 0.1,
#'        f2 + rnorm(80) * 0.2)
#' plot(x, y[1:80])
#' plot(x, y[81:160])
#'
#' # Gather all data into a data.frame, including the factor 'by' variable
#' mod_data <- data.frame(y, x, fac = as.factor(fac))
#' mod_data$time <- 1:NROW(mod_data)
#'
#' # Fit a model with different smooths per factor level
#' mod <- mvgam(y ~ s(x, bs = 'moi', by = fac, k = 8),
#'              data = mod_data,
#'              family = gaussian(),
#'              chains = 2,
#'              silent = 2)
#'
#' # Visualise the different monotonic functions
#' plot_predictions(mod, condition = c('x', 'fac', 'fac'),
#'                  points = 0.5)
#' plot(mod, type = 'smooth', realisations = TRUE)
#'
#' # First derivatives (on the link scale) should never be
#' # negative for either factor level
#' (derivs <- slopes(mod, variables = 'x',
#'                  by = c('x', 'fac'),
#'                  type = 'link'))
#' all(derivs$estimate > 0)
#' }
smooth.construct.moi.smooth.spec <- function(object, data, knots) {
  insight::check_if_installed("splines2")

  # Check arguments
  object$p.order <- 1
  if (object$bs.dim < 0) object$bs.dim <- 10
  `k(bs = 'moi')` <- object$bs.dim
  if (`k(bs = 'moi')` <= 1) stop("Basis dimension is too small", call. = FALSE)
  validate_pos_integer(`k(bs = 'moi')`)
  validate_even(`k(bs = 'moi')`)

  # Number of knots must be k / 2
  nk <- object$bs.dim / 2L

  if (!is.null(object$id))
    stop("Monotonic splines don't work with ids", call. = FALSE)

  # Check basis dimension
  if (length(object$term) != 1)
    stop("Monotonic basis only handles 1D smooths", call. = FALSE)

  # Find the data and specified knots
  x <- data[[object$term]]
  k <- knots[[object$term]]
  if (length(unique(x)) < nk)
    warning("basis dimension is larger than number of unique covariates")

  # Checks on knots
  if (is.null(k)) {
    xl <- min(x)
    xu <- max(x)
  } else {
    xl <- min(k)
    xu <- max(k)
    if (xl > min(x) || xu < max(x))
      stop("knot range does not include data", call. = FALSE)
    if (length(k) != nk)
      stop(paste("there should be ", nk - 1, " supplied knots"), call. = FALSE)
  }

  if (!is.null(k)) {
    if (sum(colSums(object$X) == 0) > 0)
      warning("there is *no* information about some basis coefficients")
  }

  if (is.null(k)) {
    # Generate knots if missing
    k <- seq(xl, xu, length.out = nk + 2)[2:nk + 1]
  }

  # Set anchor points beyond which extrapolation will occur
  xr <- xu - xl
  boundary <- c(xl - xr * 0.01, xu + xr * 0.01)

  # Generate basis functions
  i_spline_basis <- splines2::iSpline(
    x,
    knots = k,
    degree = nk,
    Boundary.knots = boundary,
    intercept = TRUE
  )

  nbasis <- dim(i_spline_basis)[2]

  # Fill in object
  object$boundary <- boundary
  object$bs.dim <- nbasis
  object$knots <- k
  class(object) <- c("moi.smooth")
  object$X <- i_spline_basis
  if (!is.null(object$xt$S))
    stop(
      'Cannot accept supplied penalty matrices for monotonic splines',
      call. = FALSE
    )
  object$S <- list(diag(object$bs.dim))
  object$rank <- object$bs.dim
  object$null.space.dim <- 0
  object$C <- matrix(0, 0, ncol(object$X))
  object$random <- TRUE
  return(object)
}

#' @export
#' @author Nicholas J Clark
#' @rdname monotonic
smooth.construct.mod.smooth.spec <- function(object, data, knots) {
  insight::check_if_installed("splines2")

  # Check arguments
  object$p.order <- 1
  if (object$bs.dim < 0) object$bs.dim <- 10
  `k(bs = 'moi')` <- object$bs.dim
  if (`k(bs = 'moi')` <= 1) stop("Basis dimension is too small", call. = FALSE)
  validate_pos_integer(`k(bs = 'moi')`)
  validate_even(`k(bs = 'moi')`)

  # Number of knots must be k / 2
  nk <- object$bs.dim / 2L

  if (!is.null(object$id))
    stop("Monotonic splines don't work with ids", call. = FALSE)

  # Check basis dimension
  if (length(object$term) != 1)
    stop("Monotonic basis only handles 1D smooths", call. = FALSE)

  # Find the data and specified knots
  x <- data[[object$term]]
  k <- knots[[object$term]]
  if (length(unique(x)) < nk)
    warning("basis dimension is larger than number of unique covariates")

  # Checks on knots
  if (is.null(k)) {
    xl <- min(x)
    xu <- max(x)
  } else {
    xl <- min(k)
    xu <- max(k)
    if (xl > min(x) || xu < max(x))
      stop("knot range does not include data", call. = FALSE)
    if (length(k) != nk)
      stop(paste("there should be ", nk - 1, " supplied knots"), call. = FALSE)
  }

  if (!is.null(k)) {
    if (sum(colSums(object$X) == 0) > 0)
      warning("there is *no* information about some basis coefficients")
  }

  if (is.null(k)) {
    # Generate knots if missing
    k <- seq(xl, xu, length.out = nk + 2)[2:nk + 1]
  }

  # Set anchor points beyond which extrapolation will occur
  xr <- xu - xl
  boundary <- c(xl - xr * 0.01, xu + xr * 0.01)

  # Generate basis functions
  i_spline_basis <- splines2::iSpline(
    x,
    knots = k,
    degree = nk,
    Boundary.knots = boundary,
    intercept = TRUE
  )

  nbasis <- dim(i_spline_basis)[2]

  # Fill in object
  object$boundary <- boundary
  object$bs.dim <- nbasis
  object$knots <- k
  class(object) <- c("mod.smooth")
  object$X <- i_spline_basis
  if (!is.null(object$xt$S))
    stop(
      'Cannot accept supplied penalty matrices for monotonic splines',
      call. = FALSE
    )
  object$S <- list(diag(object$bs.dim))
  object$rank <- object$bs.dim
  object$null.space.dim <- 0
  object$C <- matrix(0, 0, ncol(object$X))
  object$random <- TRUE
  return(object)
}

# Prediction function for the `moi' smooth class
#' @rdname monotonic
#' @importFrom mgcv Predict.matrix
#' @export
Predict.matrix.moi.smooth <- function(object, data) {
  insight::check_if_installed("splines2")

  # Ensure extrapolation is flat (1st degree penalty behaviour)
  x <- data[[object$term]]
  boundary <- object$boundary
  x[x > max(boundary)] <- max(boundary)
  x[x < min(boundary)] <- min(boundary)
  Xp <- suppressWarnings(splines2::iSpline(
    x,
    knots = object$knots,
    degree = object$bs.dim / 2,
    Boundary.knots = boundary,
    intercept = TRUE
  ))
  return(as.matrix(Xp))
}

# Prediction function for the `mod' smooth class
#' @rdname monotonic
#' @importFrom mgcv Predict.matrix
#' @export
Predict.matrix.mod.smooth <- function(object, data) {
  insight::check_if_installed("splines2")

  # Ensure extrapolation is flat (1st degree penalty behaviour)
  x <- data[[object$term]]
  boundary <- object$boundary
  x[x > max(boundary)] <- max(boundary)
  x[x < min(boundary)] <- min(boundary)
  Xp <- suppressWarnings(splines2::iSpline(
    x,
    knots = object$knots,
    degree = object$bs.dim / 2,
    Boundary.knots = boundary,
    intercept = TRUE
  ))
  return(as.matrix(Xp))
}

add_mono_model_file = function(model_file, model_data, mgcv_model) {
  # Which smooths are monotonic?
  smooth_labs <- do.call(
    rbind,
    lapply(seq_along(mgcv_model$smooth), function(x) {
      data.frame(
        label = mgcv_model$smooth[[x]]$label,
        class = class(mgcv_model$smooth[[x]])[1]
      )
    })
  )

  # Clean labels for inclusion in Stan code
  mono_names <- smooth_labs$label[which(
    smooth_labs$class %in% c('moi.smooth', 'mod.smooth')
  )]
  mono_names_clean <- clean_gpnames(mono_names)

  # What directions are the constraints?
  mono_directions <- smooth_labs %>%
    dplyr::filter(class %in% c('moi.smooth', 'mod.smooth')) %>%
    dplyr::mutate(
      direction = dplyr::case_when(
        class == 'moi.smooth' ~ 1,
        class == 'mod.smooth' ~ -1,
        TRUE ~ -1
      )
    ) %>%
    dplyr::pull(direction)

  # Update model file to include constrained coefficients
  b_line <- max(grep('b[', model_file, fixed = TRUE))
  b_edits <- paste0(
    'b[b_idx_',
    mono_names_clean,
    '] = abs(b_raw[b_idx_',
    mono_names_clean,
    ']) * ',
    mono_directions,
    ';',
    collapse = '\n'
  )
  model_file[b_line] <- paste0(model_file[b_line], '\n', b_edits)
  model_file <- readLines(textConnection(model_file), n = -1)

  # Add the necessary indices to the model_data and data block
  mono_stan_lines <- ''
  for (covariate in seq_along(mono_names)) {
    coef_indices <- which(
      grepl(mono_names[covariate], names(coef(mgcv_model)), fixed = TRUE) &
        !grepl(
          paste0(mono_names[covariate], ':'),
          names(coef(mgcv_model)),
          fixed = TRUE
        ) ==
          TRUE
    )
    mono_stan_lines <- paste0(
      mono_stan_lines,
      paste0(
        'array[',
        length(coef_indices),
        '] int b_idx_',
        mono_names_clean[covariate],
        '; // monotonic basis coefficient indices\n'
      )
    )
    mono_idx_data <- list(coef_indices)
    names(mono_idx_data) <- paste0('b_idx_', mono_names_clean[covariate])
    model_data <- append(model_data, mono_idx_data)
  }

  model_file[grep(
    'int<lower=0> ytimes[n, n_series];',
    model_file,
    fixed = TRUE
  )] <-
    paste0(
      model_file[grep(
        'int<lower=0> ytimes[n, n_series];',
        model_file,
        fixed = TRUE
      )],
      '\n',
      mono_stan_lines
    )
  model_file <- readLines(textConnection(model_file), n = -1)

  return(list(model_file = model_file, model_data = model_data))
}
nicholasjclark/mvgam documentation built on April 17, 2025, 9:39 p.m.