# Author: Tim Riffe, based on earlier version from Juan Galeano
###############################################################################
#' Objective function to minimize Feeney's zigzag method residual
#' @description This function is auxiliary to \code{smooth_age_5_zigzag_inner()}, see \code{?smooth_age_5_zigzag_inner} for a description.
#' @details This function is not intended to be used at the top level, but just in case, make sure that \code{ageMax = ageMin + 10 * length(p)}. Age groups \code{ >= ageMin} AND \code{ <= ageMin} must be in 5-year age groups. This function does no checks.
#' @param Value numeric vector of (presumably) counts in 5-year age groups.
#' @param Age integer vector of age group lower bounds.
#' @param ageMin integer. Lower age bound to adjust values.
#' @param ageMax integer. Upper age bound to adjust values.
#' @param p numeric vector of adjustment parameters.
#' @return positive residual to minimize.
#' @export
#' @importFrom stats optim
#' @references
#' Feeney, G. 2013 "Removing "Zigzag" from Age Data," http://demographer.com/white-papers/2013-removing-zigzag-from-age-data/
#' @examples
#'Age <- c(0,1,seq(5,90,by=5))
#'smooth_age_5_zigzag_min(dth5_zigzag,Age,ageMin=40,ageMax = 80,p=rep(.05,4))
#'# it's used like this in zigzag()
#'(p <- optim(
#' rep(.05,4),
#' smooth_age_5_zigzag_min,
#' Value = dth5_zigzag,
#' Age = Age,
#' ageMin = 40,
#' ageMax = 80)$par)
#' Smoothed <- smooth_age_5_zigzag_p(dth5_zigzag,Age,40,80,p)
#' # de facto unit test:
#' # check result using results frozen in Feeney spreadsheet
#' # after fixing probable cell range 'error'
#' p.feeney <- c(0.0235802695087692,0.0724286618207911,
#' 0.0242327829742267,0.0883411499065237)
#' ans <- 106.1147411629
#' stopifnot(abs(smooth_age_5_zigzag_min(dth5_zigzag, Age, 40,80,p.feeney) - ans) < 1e-6)
smooth_age_5_zigzag_min <- function(Value,
Age,
ageMin = 40,
ageMax = 80,
p) {
# first, we need an odd number of age groups to smooth over.
# we enforce this
Smoothed <- smooth_age_5_zigzag_p(
Value = Value,
Age = Age,
ageMin = ageMin,
ageMax = ageMax,
p = p
)
# multiplier
id <- abs(Smoothed - Value) > 1e-6
# adjacent avg
avg2 <- avg_adj(Smoothed) * id
# difference squared
diffsq <- (Smoothed - avg2) ^ 2 * id
# return weighted sum sq dev
sum(diffsq * 1 / Smoothed, na.rm = TRUE)
}
#' Smooth population counts using Feeney's zigzag method and smoothing parameters.
#' @description This function is auxiliary to \code{zigzag()}, see \code{?zigzag} for a description.
#' @details This function is not intended to be used at the top level, but just in case, make sure that \code{ageMax = ageMin + 10 * length(p)}. Age groups \code{ >= ageMin} AND \code{ <= ageMin} must be in 5-year age groups. This function does no checks.
#' @param Value numeric vector of (presumably) counts in 5-year age groups.
#' @param Age integer vector of age group lower bounds.
#' @param ageMin integer. Lower age bound to adjust values.
#' @param ageMax integer. Upper age bound to adjust values.
#' @param p numeric vector of adjustment parameters.
#' @return numeric vector of smoothed population counts.
#' @export
#' @references
#' Feeney, G. 2013 "Removing "Zigzag" from Age Data," http://demographer.com/white-papers/2013-removing-zigzag-from-age-data/
#' @examples
#'Age <- c(0,1,seq(5,90,by=5))
#'smooth_age_5_zigzag_min(dth5_zigzag,Age,ageMin=40,ageMax = 80,p=rep(.05,4))
#'# it's used like this in smooth_age_5_zigzag_inner()
#'(p <- optim(
#' rep(.05,4),
#' smooth_age_5_zigzag_min,
#' Value = dth5_zigzag,
#' Age = Age,
#' ageMin = 40,
#' ageMax = 80)$par)
#' Smoothed <- smooth_age_5_zigzag_p(dth5_zigzag,Age,40,80,p)
smooth_age_5_zigzag_p <- function(Value,
Age,
ageMin = 40,
ageMax = 80,
p = rep(0.05, floor((ageMax - ageMin) / 10))) {
# 1) p used differently
p_ages <- as.integer(seq(ageMin + 5, ageMin + length(p) * 10 - 5, length = length(p)))
mid5 <- Age %in% p_ages
down5 <- Age %in% (p_ages - 5)
up5 <- Age %in% (p_ages + 5)
# 2) this can be a multiplier later
id <- rep(0, length(Age))
id[Age >= (min(p_ages) - 5) & Age <= (max(p_ages) + 5)] <- 1
# 2) average of value above and below
avg1 <- avg_adj(Value) * id
# 3) take difference
diff1 <- (Value - avg1) * id
# 5) get Np
Np <- Value[mid5] * p # should be 4 numbers in example
# 6) use Np to smooth
Smoothed <- Value
Smoothed[mid5] <- Value[mid5] - Np
Smoothed[down5] <- Smoothed[down5] + Np / 2
Smoothed[up5] <- Smoothed[up5] + Np / 2
# return smoothed
names(Smoothed) <- Age
Smoothed
}
#' G. Feeney's method of removing the zigzag from counts in 5-year age groups.
#' @description If age heaping is much worse on 0's than on 5's then even counts in 5-year age bins can preserve a sawtooth pattern. Most graduation techniques translate the zig-zag/sawtooth pattern to a wave pattern. It is not typically desired. This method redistributes counts 'from' every second 5-year age group in a specified range 'to' the adjacent age groups. How much to redistribute depends on a detection of roughness in the 5-year binned data, which follows the formulas recommended by Feeney.
#' @details Determining the degree to redistribute population counts is an optimization problem, so this function has two auxiliary functions, \code{p_zigzag()}, which redistributes counts according to a set of specified proportions \code{p}, and \code{zigzag_min()} which is the function minimized to determine the optimal vector of \code{p}. If data is not in 5-year age groups then it is grouped as necessary (unless abridged, in which case grouping is preserved). Only ages \code{>= ageMin} and \code{<= ageMax} will be adjusted. \code{ageMax} is inclusive and interpreted as the lower bound of the highest age group to include. The number of 5-year age groups adjusted must be odd, and \code{ageMax} may be reduced internally without warning in order to force this condition. The open age group is excluded from adjustment. This function also has a wrapper \code{smooth_age_5_zigzag()}, called by \code{smooth_age_5()}.
#' @param Value numeric vector of (presumably) counts in 5-year age groups.
#' @param Age integer vector of age group lower bounds.
#' @param OAG logical. Whether or not the top age group is open. Default \code{TRUE}.
#' @param ageMin integer. Lower age bound to adjust values.
#' @param ageMax integer. Upper age bound to adjust values.
#' @return numeric vector of smoothed counts in 5-year age groups.
#' @export
#' @references
#' Feeney, G. 2013 "Removing "Zigzag" from Age Data," http://demographer.com/white-papers/2013-removing-zigzag-from-age-data/
#' @examples
#' Age <- c(0,1,seq(5,90,by=5))
#' # defaults
#' zz1 <- smooth_age_5_zigzag_inner(dth5_zigzag, Age, OAG = TRUE, ageMin = 40, ageMax = 80)
#' # shift age range up by 5
#' zz2 <- smooth_age_5_zigzag_inner(dth5_zigzag, Age, OAG = TRUE, ageMin = 45, ageMax = 85)
#' \dontrun{
#' plot(Age, dth5_zigzag,pch = 16)
#' lines(Age,zz1,col="red",lty=1)
#' lines(Age,zz2,col="blue",lty=1)
#' # even smoother:
#' lines(Age,(zz2+zz1)/2,col="green",lty=1)
#' legend("bottom",pch=c(16,NA,NA,NA),
#' lty=c(NA,1,1,1),
#' col = c("black","red","blue","green"),
#' legend = c("Original","40-80","45-85","avg"))
#' }
smooth_age_5_zigzag_inner <- function(Value,
Age,
OAG = TRUE,
ageMin = 40,
ageMax = max(Age) - max(Age) %% 10 - 5) {
stopifnot(length(Age) == length(Value))
Age <- as.integer(Age)
if (!is_abridged(Age)) {
Value5 <- groupAges(Value, Age = Age, N = 5)
Age5 <- as.integer(names(Value5))
} else {
Value5 <- Value
Age5 <- Age
}
N <- length(Value5)
NN <- N
# make sure we have a suitable span of age.
Ndecades <- floor((ageMax - ageMin) / 10) * 10
if ((ageMax - ageMin) - Ndecades != 5) {
ageMax <- ageMin + Ndecades + 5
}
# enforce 5-year age groups. Data could be given as such,
# in abridged ages, or in single ages, but will be returned
# in 5-year age groups.
if (OAG) {
OAGvalue <- Value5[N]
Value5[N] <- NA
NN <- N - 1
}
# make sure ageMax isn't too high. Likely doesn't come into play.
if (ageMax > max(Age5[1:NN])) {
hm <- ceiling((ageMax - max(Age5[1:NN])) / 10) * 10
ageMax <- ageMax - hm
}
Ndecades <- floor((ageMax - ageMin) / 10)
# get starting values for p. Number of values to optimize
# depends on age range in question.
p <- rep(0.05, Ndecades)
# get optimal p
p <-
optim(
p,
smooth_age_5_zigzag_min,
Value = Value5,
Age = Age5,
ageMin = ageMin,
ageMax = ageMax
)$par
# and resulting smoothed pops
Smoothed <-
smooth_age_5_zigzag_p(
Value = Value5,
Age = Age5,
ageMin = ageMin,
ageMax = ageMax,
p = p
)
# put OAG back.
if (OAG) {
Smoothed[N] <- OAGvalue
}
names(Smoothed) <- Age5
Smoothed
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.