Nothing
# --------------------------------------------------- #
# Author: Marius D. Pascariu
# License: MIT
# Last update: Thu Nov 07 11:21:49 2019
# --------------------------------------------------- #
#' Univariate Penalized Composite Link Model (PCLM)
#'
#' Fit univariate penalized composite link model (PCLM) to ungroup binned
#' count data, e.g. age-at-death distributions grouped in age classes.
#'
#' @details The PCLM method is based on the composite link model, which extends
#' standard generalized linear models. It implements the idea that the observed
#' counts, interpreted as realizations from Poisson distributions, are indirect
#' observations of a finer (ungrouped) but latent sequence. This latent sequence
#' represents the distribution of expected means on a fine resolution and has
#' to be estimated from the aggregated data. Estimates are obtained by
#' maximizing a penalized likelihood. This maximization is performed efficiently
#' by a version of the iteratively reweighted least-squares algorithm. Optimal
#' values of the smoothing parameter are chosen by minimizing Bayesian or
#' Akaike's Information Criterion.
#' @param x Vector containing the starting values of the input intervals/bins.
#' For example: if we have 3 bins \code{[0,5), [5,10) and [10, 15)},
#' \code{x} will be defined by the vector: \code{c(0, 5, 10)}.
#' @param y Vector with counts to be ungrouped. It must have the same dimension
#' as \code{x}.
#' @param nlast Length of the last interval. In the example above \code{nlast}
#' would be 5.
#' @param offset Optional offset term to calculate smooth mortality rates.
#' A vector of the same length as x and y. See
#' \insertCite{rizzi2015;textual}{ungroup} for further details.
#' @param out.step Length of estimated intervals in output.
#' Values between 0.1 and 1 are accepted. Default: 1.
#' @param ci.level Level of significance for computing confidence intervals.
#' Default: \code{95}.
#' @param verbose Logical value. Indicates whether a progress bar should be
#' shown or not.
#' Default: \code{FALSE}.
#' @param control List with additional parameters: \itemize{
#' \item{\code{lambda}} -- Smoothing parameter to be used in pclm estimation.
#' If \code{lambda = NA} an algorithm will find the optimal values.
#' \item{kr} -- Knot ratio. Number of internal intervals used for defining
#' 1 knot in B-spline basis construction. See \code{\link{MortSmooth_bbase}}.
#' \item{\code{deg}} -- Degree of the splines needed to create equally-spaced
#' B-splines basis over an abscissa of data.
#' \item{\code{int.lambda}} -- If \code{lambda} is optimized an interval to be
#' searched needs to be specified. Format: vector containing the end-points.
#' \item{\code{diff}} -- An integer indicating the order of differences of the
#' components of PCLM coefficients.
#' \item{\code{opt.method}} -- Selection criterion of the model.
#' Possible values are \code{"AIC"} and \code{"BIC"}.
#' \item{\code{max.iter}} -- Maximal number of iterations used in fitting
#' procedure.
#' \item{\code{tol}} -- Relative tolerance in PCLM fitting procedure.}
#'
#' @return The output is a list with the following components:
#' \item{input}{ A list with arguments provided in input. Saved for
#' convenience.}
#' \item{fitted}{ The fitted values of the PCLM model.}
#' \item{ci}{ Confidence intervals around fitted values.}
#' \item{goodness.of.fit}{ A list containing goodness of fit measures:
#' standard errors, AIC and BIC.}
#' \item{smoothPar}{ Estimated smoothing parameters: \code{lambda, kr}
#' and \code{deg}.}
#' \item{bins.definition}{ Additional values to identify the bins limits and
#' location in input and output objects.}
#' \item{deep}{ A list of objects created in the fitting process. Useful
#' in diagnosis of possible issues.}
#' \item{call}{ An unevaluated function call, that is, an unevaluated
#' expression which consists of the named function applied to the given
#' arguments.}
#' @seealso
#' \code{\link{control.pclm}}
#' \code{\link{plot.pclm}}
#' @references \insertAllCited{}
#' @examples
#' # Data
#' x <- c(0, 1, seq(5, 85, by = 5))
#' y <- c(294, 66, 32, 44, 170, 284, 287, 293, 361, 600, 998,
#' 1572, 2529, 4637, 6161, 7369, 10481, 15293, 39016)
#' offset <- c(114, 440, 509, 492, 628, 618, 576, 580, 634, 657,
#' 631, 584, 573, 619, 530, 384, 303, 245, 249) * 1000
#' nlast <- 26 # the size of the last interval
#'
#' # Example 1 ----------------------
#' M1 <- pclm(x, y, nlast)
#' ls(M1)
#' summary(M1)
#' fitted(M1)
#' plot(M1)
#'
#' # Example 2 ----------------------
#' # ungroup even in smaller intervals
#' M2 <- pclm(x, y, nlast, out.step = 0.5)
#' head(fitted(M1))
#' plot(M1, type = "s")
#' # Note, in example 1 we are estimating intervals of length 1. In example 2
#' # we are estimating intervals of length 0.5 using the same aggregate data.
#'
#' # Example 3 ----------------------
#' # Do not optimise smoothing parameters; choose your own. Faster.
#' M3 <- pclm(x, y, nlast, out.step = 0.5,
#' control = list(lambda = 100, kr = 10, deg = 10))
#' plot(M3)
#'
#' summary(M2)
#' summary(M3) # not the smallest BIC here, but sometimes is not important.
#'
#' # Example 4 -----------------------
#' # Grouped x & grouped offset (estimate death rates)
#' M4 <- pclm(x, y, nlast, offset)
#' plot(M4, type = "s")
#'
#' # Example 5 -----------------------
#' # Grouped x & ungrouped offset (estimate death rates)
#'
#' ungroupped_Ex <- pclm(x, y = offset, nlast, offset = NULL)$fitted # ungroupped offset data
#'
#' M5 <- pclm(x, y, nlast, offset = ungroupped_Ex)
#' @export
pclm <- function(x, y, nlast,
offset = NULL,
out.step = 1,
ci.level = 95,
verbose = FALSE,
control = list()){
# Check input
y <- as.numeric(y)
control <- do.call("control.pclm", control)
input <- I <- as.list(environment()) # save all the input for later use
I$nlast <- validate.nlast(x, nlast, out.step)
type <- "1D"
pclm.input.check(input, type)
# Preliminary; start the clock
if (verbose) {pb <- startpb(0, 100); on.exit(closepb(pb)); setpb(pb, 1)}
I[c("x", "y", "nlast", "offset")] <- create.artificial.bin(I) # ***
# Deal with offset term
if (!is.null(offset)) {
if (length(offset) == length(y)) {
if (verbose) { setpb(pb, 5); cat(" Ungrouping offset")}
I$offset <- pclm(x = I$x,
y = I$offset,
nlast = I$nlast,
offset = NULL,
out.step = out.step,
ci.level = ci.level,
verbose = FALSE,
control = control)$fitted
}
}
# Find lambda
L <- I$control$lambda
if (any(is.na(L))) L <- optimize_par(I, type)
# solve the PCLM
M <- with(control, pclm.fit(x = I$x,
y = I$y,
nlast = I$nlast,
offset = I$offset,
out.step = out.step,
verbose = verbose,
lambda = L,
kr = kr,
deg = deg,
diff = diff,
max.iter = max.iter,
tol = tol,
type = type))
SE <- with(M, compute_standard_errors(B, QmQ, QmQP))
R <- with(M, pclm.confidence(fit, out.step, y, SE, ci.level, type, offset))
R <- delete.artificial.bin(R) # ***
G <- map.bins(x, nlast, out.step)
dn <- G$output$names
names(R$fit) <- names(R$lower) <- names(R$upper) <- names(R$SE) <- dn
# Output
Fcall <- match.call()
Par <- with(control, c(lambda = L, kr = kr, deg = deg))
gof <- list(AIC = AIC.pclm(M),
BIC = BIC.pclm(M),
standard.errors = R$SE)
ci <- list(upper = R$upper,
lower = R$lower)
# exit
out <- list(input = input,
fitted = R$fit,
ci = ci,
goodness.of.fit = gof,
smoothPar = Par,
bin.definition = G,
deep = M,
call = Fcall)
out <- structure(class = "pclm", out)
if (verbose) setpb(pb, 100)
return(out)
}
# ----------------------------------------------
#' Extract PCLM Deviance Residuals
#'
#' @inherit stats::residuals params return
#' @examples
#' x <- c(0, 1, seq(5, 85, by = 5))
#' y <- c(294, 66, 32, 44, 170, 284, 287, 293, 361, 600, 998,
#' 1572, 2529, 4637, 6161, 7369, 10481, 15293, 39016)
#' M1 <- pclm(x, y, nlast = 26)
#'
#' residuals(M1)
#' @export
residuals.pclm <- function(object, ...) {
if (!is.null(object$input$offset)) {
stop("residuals method not implemented for hazard rates", call. = FALSE)
}
C <- object$deep$C
C <- C[-nrow(C), -ncol(C)]
y.obs <- object$input$y
y.hat <- as.numeric(C %*% fitted(object))
res <- y.obs - y.hat
names(res) <- object$bin.definition$input$names
return(res)
}
#' Print for pclm method
#' @param x An object of class \code{"pclm"}
#' @inheritParams base::print
#' @keywords internal
#' @export
print.pclm <- function(x, ...){
cat("\nPenalized Composite Link Model (PCLM)")
cat("\nPCLM Type : Univariate")
cat("\nNumber of input groups :", length(x$input$x))
cat("\nNumber of fitted values :", length(x$fitted))
cat("\nLength of estimate bins :", x$input$out.step, "\n")
cat("\n")
}
#' Summary for pclm method
#' @inheritParams base::summary
#' @keywords internal
#' @export
summary.pclm <- function(object, ...) {
cl <- object$call
AIC <- round(object$goodness.of.fit$AIC, 2)
BIC <- round(object$goodness.of.fit$BIC, 2)
sPar <- round(object$smoothPar)
dim.y <- length(object$input$y)
dim.f <- length(fitted(object))
out.step <- object$input$out.step
out <- structure(class = "summary.pclm", as.list(environment()))
return(out)
}
#' Print for summary.pclm method
#' @param x An object of class \code{"summary.pclm"}
#' @inheritParams print.pclm
#' @keywords internal
#' @export
print.summary.pclm <- function(x, ...) {
with(x, {
cat("\nPenalized Composite Link Model (PCLM)")
cat("\n\nCall:\n"); print(cl)
cat("\nPCLM Type : Univariate")
cat("\nNumber of input groups :", dim.y)
cat("\nNumber of fitted values :", dim.f)
cat("\nLength of estimate bins :", out.step)
cat("\nSmoothing parameter lambda :", sPar[1])
cat("\nB-splines intervals/knot (kr):", sPar[2])
cat("\nB-splines degree (deg) :", sPar[3])
cat("\nAIC :", AIC)
cat("\nBIC :", BIC)
cat("\n")
})
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.