R/FDM.R
In vital: Tidy Analysis Tools for Mortality, Fertility, Migration and Population Data

Documented in FDM forecast.FDM report.FDM

#' Functional data model
#'
#' Functional data model of mortality or fertility rates as a function of age.
#' `FDM()` returns a functional data model applied to the formula's response
#' variable as a function of age.
#'
#' @aliases report.FDM
#'
#' @param formula Model specification.
#' @param order Number of principal components to fit.
#' @param ts_model_fn Univariate time series modelling function for the coefficients. Any
#' model that works with the fable package is ok. Default is [fable::ARIMA()].
#' @param coherent If TRUE, fitted models are stationary, other than for the case of
#' a key variable taking the value `geometric_mean`. This is designed to work with
#' vitals produced using \code{\link{make_pr}()}. Default is FALSE. It only works
#' when `ts_model_fn` is \code{\link[fable]{ARIMA}()}.
#' @param ... Not used.
#'
#' @references Hyndman, R. J., and Ullah, S. (2007) Robust forecasting of
#' mortality and fertility rates: a functional data approach.
#' *Computational Statistics & Data Analysis*, 5, 4942-4956.
#' <https://robjhyndman.com/publications/funcfor/>
#' Hyndman, R. J., Booth, H., & Yasmeen, F. (2013). Coherent mortality
#' forecasting: the product-ratio method with functional time series models.
#' *Demography*, 50(1), 261-283.
#' <https://robjhyndman.com/publications/coherentfdm/>
#' @author Rob J Hyndman
#' @return A model specification.
#'
#' @examples
#' hu <- norway_mortality |>
#'   dplyr::filter(Sex == "Female", Year > 2010) |>
#'   smooth_mortality(Mortality) |>
#'   model(hyndman_ullah = FDM(log(.smooth)))
#' report(hu)
#' autoplot(hu)
#' @export
FDM <- function(formula, order = 6, ts_model_fn = fable::ARIMA,
                coherent = FALSE, ...) {
  if(coherent & !identical(ts_model_fn, fable::ARIMA)) {
    stop("coherent = TRUE only works with ts_model_fn = fable::ARIMA")
  }
  if(!coherent) {
    coherent <- NULL
  }
  fd_model <- new_model_class("fdm", train = train_fdm)
  new_model_definition(fd_model, !!enquo(formula),
    order = order, ts_model_fn = ts_model_fn, coherent = coherent, ...)
}

train_fdm <- function(.data, specials, order, ts_model_fn, coherent, ...) {
  indexvar <- index_var(.data)
  vvar <- vital_var_list(.data)
  agevar <- vvar$age
  measures <- measured_vars(.data)
  measures <- measures[!(measures %in% c(agevar, vvar$population))]
  measures <- measures[1]
  out <- fdm(.data, order = order, ts_model_fn = ts_model_fn, coherent = coherent)

  fitted <- out$data |>
    mutate(
      .innov = .data[[measures]] - .fitted,
      .innov = if_else(.innov < -1e20, NA, .innov),
    ) |>
    select(all_of(c(indexvar, agevar, ".fitted", ".innov")))

  ts_models <- out$ts_models
  out$data <- out$ts_models <- NULL
  structure(
    list(
      model = out,
      fitted = fitted,
      ts_models = ts_models,
      nobs = sum(!is.na(.data[[measures]]))
    ),
    class = "FDM"
  )
}

#' @rdname forecast
#' @export

forecast.FDM <- function(object, new_data = NULL, h = NULL, point_forecast = list(.mean = mean),
  simulate = FALSE, bootstrap = FALSE, times = 5000, ...) {

# simulation/bootstrap not actually used here as forecast.mdl_vtl_ts
# handles this using generate() and forecast.FDM is never called.
# The arguments are included to avoid a warning message.

  # Forecast all beta series using stored models
  h <- length(unique(new_data[[index_var(new_data)]]))
  fc <- purrr::map(object$ts_model, function(x) {
      forecast(x, h = h) |>
      select(-.mean, -.model) |>
      as_tibble()
    }
  )
  indexvar <- index_var(object$model$by_t)
  fc <- purrr::reduce(fc, left_join, by=indexvar)

  # Create forecasts of response series
  agevar <- colnames(object$model$by_x)[1]
  fc <- new_data |>
    left_join(object$model$by_x, by = agevar) |>
    left_join(fc, by = indexvar)
  fc$out <- fc$mean
  for(i in seq_along(object$ts_model)) {
    fc$out <- fc$out + fc[[paste0("beta", i)]] * fc[[paste0("phi",i)]]
  }
  fc |>
    pull(out)
}

#' @export
generate.FDM <- function(x, new_data = NULL, h = NULL,
  bootstrap = FALSE, times = 1,
  forecast_fn, ...) {
  agevar <- age_var(new_data)
  indexvar <- index_var(new_data)
  if(times != length(unique(new_data$.rep)))
    stop("We have a problem")

  # Simulate all beta series using stored models
  h <- length(unique(new_data[[index_var(new_data)]]))
  fc <- purrr::map(x$ts_models, function(x) {
      out <- generate(x, h = h, bootstrap = bootstrap, times = times) |>
        as_tibble()
      out$.model <- out$.innov <- NULL
      return(out)
    }
  )
  fc <- purrr::reduce(fc, left_join, by=c(indexvar, ".rep"))
  names(fc)[-(1:2)] <- names(x$ts_models)

  # Create simulations of response series
  fc <- new_data |>
    left_join(x$model$by_x, by = agevar) |>
    left_join(fc, by = c(indexvar, ".rep"))
  fc$out <- fc$mean
  for(i in seq_along(x$ts_models)) {
    fc$out <- fc$out + fc[[paste0("beta", i)]] * fc[[paste0("phi",i)]]
  }
  fc |>
    select(.rep, sym(indexvar), !!agevar, .sim=out) |>
    transmute(group_by_key(new_data), .sim)
}

#' @export
glance.FDM <- function(x, ...) {
  tibble(
    nobs = x$nobs,
    varprop = sum(x$model$varprop)
  )
}

#' @export
tidy.FDM <- function(x, ...) {
  return(NULL)
}

#' @export
report.FDM <- function(object, ...) {
  cat("\n")
  cat("Basis functions\n")
  print(object$model$by_x, n=5)
  cat("\nCoefficients\n")
  print(object$model$by_t, n=5)
  cat("\nTime series models\n")
  models <- names(object$ts_models)
  for(i in seq_along(object$ts_models)) {
    cat("  ", models[i], ": ")
    cat(model_sum(object$ts_model[[i]]$fit[[1]]), "\n")
  }
  cat("\nVariance explained\n  ")
  cat(paste(round(object$model$varprop*100, 2), collapse=" + "))
  cat(paste0(" = ", round(sum(object$model$varprop)*100, 2), "%\n"))
}

#' @export
model_sum.FDM <- function(x) {
  paste0("FDM")
}


#' @export
time_components.FDM <- function(object, ...) {
  modelname <- attributes(object)$model
  object <- object |>
    mutate(out = purrr::map(object[[modelname]], function(x){x$fit$model})) |>
    as_tibble()
  object[[modelname]] <- NULL
  index <- index_var(object$out[[1]]$by_t)
  keys <- head(colnames(object), -1)
  object$out <- lapply(object$out, function(x) as_tibble(x$by_t))
  object |>
    tidyr::unnest("out") |>
    as_tsibble(index = index, key=all_of(keys))
}

#' @export
age_components.FDM <- function(object, ...) {
  modelname <- attributes(object)$model
  object <- object |>
    mutate(out = purrr::map(object[[modelname]], function(x){x$fit$model})) |>
    as_tibble()
  object[[modelname]] <- NULL
  object$out  <- lapply(object$out, function(x) as_tibble(x$by_x))
  object |> tidyr::unnest("out")
}

#' @export
autoplot.FDM <- function(object, show_order = 2, ...) {
  obj_time <- time_components(object)
  obj_x <- age_components(object)

  meanvar <- "mean"
  tmp <- colnames(obj_time)
  timevar <- tmp[grepl("beta", tmp)]
  tmp <- colnames(obj_x)
  agevar <- tmp[grepl("phi", tmp)]
  index <- index_var(obj_time)
  keys <- head(colnames(object), -1)

  # Set up list of plots
  p <- list()
  p[[1]] <- age_plot(obj_x, meanvar, keys) + ggplot2::ylab(meanvar)
  for (i in seq(show_order)) {
    p[[i + 1]] <- age_plot(obj_x, agevar[i], keys)
  }
  p[[show_order + 2]] <- patchwork::guide_area()
  for (i in seq(show_order)) {
    p[[i + 2 + show_order]] <- time_plot(obj_time, timevar[i], keys)
  }
  patchwork::wrap_plots(p) +
    patchwork::plot_layout(ncol = show_order + 1, nrow = 2, guides = "collect")
}

# Function based on demography::fdm() and ftsa::ftsm()
# But assumes transformation already done

fdm <- function(data, order = 6, ts_model_fn = fable::ARIMA, coherent = NULL) {
  if(is.null(coherent)) {
    coherent <- FALSE
  }
  # Grab variable names
  indexvar <- index_var(data)
  vvar <- vital_var_list(data)
  agevar <- vvar$age
  measures <- measured_vars(data)
  measures <- measures[!(measures %in% c(agevar, vvar$population))]
  measures <- measures[1]

  # Create rates matrix
  year <- sort(unique(data[[indexvar]]))
  ages <- sort(unique(data[[agevar]]))
  mx <- data |>
    as_tibble() |>
    dplyr::select(all_of(c(indexvar, agevar, measures))) |>
    tidyr::pivot_wider(values_from = all_of(measures), names_from = all_of(agevar))
  mx[[indexvar]] <- NULL
  mx <- as.matrix(mx)
  mx[mx == -Inf] <- NA
  # PC decomposition
  y.pca <- fdpca(mx, order = order)

  # Compute fitted values
  fits <- as.data.frame(y.pca$basis %*% t(y.pca$coeff))
  colnames(fits) <- year
  fits <- fits |>
    dplyr::mutate(Age = ages) |>
    tidyr::pivot_longer(-Age, names_to = "Year", values_to = ".fitted") |>
    dplyr::mutate(Year = as.integer(Year))

  # Add fitted values and residuals to original data
  output <- data |>
    as_tibble() |>
    dplyr::left_join(fits, by = c("Year", "Age")) |>
    as_vital(index = sym(indexvar), key = sym(agevar), .age = agevar)

  by_x <- as_tibble(y.pca$basis)
  by_x[[agevar]] <- sort(unique(data[[agevar]]))
  by_x <- by_x |>
    select(sym(agevar), everything())
  by_t <- as_tibble(y.pca$coeff)
  by_t[[indexvar]] <- sort(unique(data[[indexvar]]))
  by_t <- as_tsibble(by_t, index = sym(indexvar)) |>
    select(sym(indexvar), everything())

  # Fit ts_models to coefficients
  ts_coefs <- names(by_t)
  ts_coefs <- ts_coefs[grepl("beta", ts_coefs)]
  fits <- purrr::map(ts_coefs, function(x) {
    if(coherent) {
      mod <- by_t |>
        fabletools::model(fit = ts_model_fn(!!sym(x),
          order_constraint = (p + q + P + Q <= 6) & (d + D == 0))
        )
    } else {
      mod <- by_t |>
        fabletools::model(fit = ts_model_fn(!!sym(x)))
    }
    return(mod)
  })
  names(fits) <- ts_coefs

  # Return object
  list(
    data = output,
    by_x = by_x,
    by_t = by_t,
    ts_models = fits,
    varprop = y.pca$varprop
  )
}


# Functional PCA

fdpca <- function(X, order = 2, ngrid = 500) {
  y <- t(X)
  x <- seq(NCOL(X))
  n <- NCOL(y)
  m <- length(x)
  if (order < 0) {
    stop("Order must be at least 0")
  }
  if (ngrid < n) {
    stop("Grid should be larger than number of observations per time period.")
  }
  # Interpolate data onto grid using interpolating splines
  yy <- matrix(NA, nrow = ngrid, ncol = n)
  for (i in seq(n)) {
    miss <- is.na(y[, i])
    yy[, i] <- stats::spline(x[!miss], y[!miss, i], n = ngrid)$y
  }
  xx <- seq(min(x), max(x), l = ngrid)
  # Compute smooth means
  ax <- rowMeans(yy, na.rm = TRUE)
  # Centre data
  yy <- sweep(yy, 1, ax)
  # Standard error in mean estimate
  axse <- stats::approx(xx, sqrt(apply(yy, 1, stats::var) / n), xout = x)$y
  # Set up coeff and basis for order 0
  coeff <- matrix(1, nrow=n, ncol=1)
  basis <- matrix(stats::approx(xx, ax, xout = x)$y, ncol=1)
  colnames(coeff)[1] <- colnames(basis)[1] <- "mean"
  if (order == 0) {
    return(list(
      basis = basis, coeff = coeff,
      v = rep(1, n), mean.se = axse
    ))
  }
  # Compute SVD
  s <- La.svd(t(yy))
  # Eigenvectors and eigenvalues
  Phi <- as.matrix(t(s$vt)[, seq(order)])
  eigen_value <- varprop <- s$d^2
  varprop <- varprop / sum(s$d^2)
  # Normalize eigenvectors
  Phinorm <- matrix(NA, length(x), order)
  Phinormngrid <- matrix(NA, ngrid, order)
  delta <- xx[2] - xx[1]
  for (i in seq(order)) {
    Phinorm[, i] <- stats::approx(xx, Phi[, i], xout = x)$y / delta /
      (sqrt(sum((stats::approx(xx, Phi[, i], xout = x)$y / delta)^2)))
    Phinormngrid[, i] <- stats::approx(x, Phinorm[, i], xout = xx)$y
  }
  # Extract coeff and basis matrices
  B <- t(yy) %*% Phinormngrid
  v <- colSums((yy - Phinormngrid %*% t(B))^2) * delta
  colnames(B) <- paste0("beta", seq(order))
  coeffdummy <- B * delta
  colmeanrm <- matrix(colMeans(coeffdummy), dim(B)[2], 1)
  coeff <- cbind(coeff, sweep(coeffdummy, 2, colmeanrm))
  m <- ncol(basis)
  basis <- basis + Phinorm %*% colmeanrm
  colnames(basis) <- "mean"
  for (i in seq(order)) {
    basis <- cbind(basis, Phinorm[, i])
    if (sum(basis[, i + m]) < 0) {
      basis[, i + m] <- -basis[, i + m]
      coeff[, i + m] <- -coeff[, i + m]
    }
  }
  colnames(basis)[m + seq(order)] <- paste0("phi", seq(order))

  # Return results
  return(list(
    basis = basis, coeff = coeff,
    varprop = varprop[seq(order)], eigen_value = eigen_value,
    v = v, mean.se = axse
  ))
}



utils::globalVariables(c(".model", "out", "object", ".fitted", ".rep"))
utils::globalVariables(c("p", "P", "d", "D", "q", "Q", "constant"))

Any scripts or data that you put into this service are public.

vital documentation built on June 22, 2024, 9:56 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

vital
Tidy Analysis Tools for Mortality, Fertility, Migration and Population Data

R/FDM.R
In vital: Tidy Analysis Tools for Mortality, Fertility, Migration and Population Data

Defines functions fdpca fdm autoplot.FDM age_components.FDM time_components.FDM model_sum.FDM report.FDM tidy.FDM glance.FDM generate.FDM forecast.FDM train_fdm FDM

Documented in FDM forecast.FDM report.FDM

Try the vital package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

vital Tidy Analysis Tools for Mortality, Fertility, Migration and Population Data

R/FDM.R In vital: Tidy Analysis Tools for Mortality, Fertility, Migration and Population Data

Defines functions fdpca fdm autoplot.FDM age_components.FDM time_components.FDM model_sum.FDM report.FDM tidy.FDM glance.FDM generate.FDM forecast.FDM train_fdm FDM

Documented in FDM forecast.FDM report.FDM

Try the vital package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

vital
Tidy Analysis Tools for Mortality, Fertility, Migration and Population Data

R/FDM.R
In vital: Tidy Analysis Tools for Mortality, Fertility, Migration and Population Data