# Author: Juan Galeano
###############################################################################
#' Zelnik 11-term moving average to adjust for digit preference.
#' @description Zelnik's method is used to adjust distributions by year of age for digit preference.
#' @param Value numeric. A vector of demographic counts in single age groups.
#' @param p integer. Perturbation vector used, 1 or 2. Default 1.
#' @param Age integer. Vector of lower bound of age classes.
#' @details Single year age groups are assumed. If \code{q=1}, age x is affected by ages x-10 through x+10. If \code{q=2}, age x is affected by ages x-15 through x+15.
#' A vector is returned of the same length as \code{Value}, but the boundary values are imputed with \code{NA}.
#' If \code{q=1}, ages 0-9 and the final 10 ages are \code{NA}. If \code{q=2}, ages 0-14 and the final 15 ages are returned as \code{NA}.
#' Note results do not sum to the total from the same age-range of \code{Value} because smoothing draws from ages outside that range.
#' @return A vector with the adjusted values.
#' @export
#' @references
#' \insertRef{gray1987missingages}{DemoTools}
#' @examples
#' # data from gray1987missingages, Table 2, page 21: Aplication of Q1 and
#' # Q2 linear operators, Bangladesh Census, 1 March 1974-Males.
#' Pop <- c(941307,1041335,1237034,1411359,1383853,1541942,1321576,1285877,1563448,886705,
#' 1623998,562924,1485173,543216,771219,903496,686431,370007,942999,250820,
#' 1023667,200131,688640,222011,281738,1239965,288363,263326,483143,78635,
#' 1349886,68438,415127,101596,100758,1392434,178633,126351,286520,50836,
#' 1331036,48995,251153,58393,54995,1033812,68792,72766,175943,28254,
#' 1038747,32894,136179,37667,38230,596049,52602,36493,74106,16759,
#' 790643,20596,70109,18044,19891,357491,15253,17489,31057,8481,
#' 429816,7951,35583,8612,6589,454645)
#' Age <-0:75
#' z1 <- zelnik(Pop,1,Age)
#' z2 <- zelnik(Pop,2,Age)
#' \dontrun{
#' plot(Age,Pop,type='l',col = gray(.4))
#' lines(as.integer(names(z1)),z1,col = "blue",lwd=2)
#' lines(as.integer(names(z2)),z2,col = "red",lwd = 2)
#' }
#' \dontshow{
#' # here some units tests:
#' # a function required to make original table work
#' perturb <- function(x, pert = 0, pos = 45){
#' x[pos] <- x[pos] + pert
#' x
#' }
#' p1_answer <- c(rep(NA,10),1151172,1077888,982103,891354,
#' 828173,762583,718528,649206,579004,535521,552769,572789,
#' 543715,504766,456185,484627,516545,487585,458525,437117,
#' 442480,456052,440706,415651,400515,403565,409668,402329,
#' 391982,392801,362520,327180,322889,318215,316334,296278,
#' 276958,275057,272350,279354,244466,209374,214178,210162,
#' 208499,191717,173916,173882,172936,176417,158124,138487,
#' 141009,142779,145479,rep(NA,11))
#'
#' p2_answer <- c(rep(NA,15),769826,711733,663995,623069,
#' 584797,552352,530538,515578,505073,497808,489977,479740,
#' 469326,460446,454699,449240,439744,429856,422980,419200,
#' 414487,403537,389871,378716,370274,361289,348796,334849,
#' 321901,310028,298308,286242,274796,263983,253143,242464,
#' 232007,221636,211372,200807,191057,183573,176971,170539,164776,
#' rep(NA,16))
#' \dontrun{
#' # using census as given in table. Symmetry makes TR suspect this
#' # is due to a single value. Trial and error ensued.
#' plot(Age,zelnik(Pop,1,Age) - p1_answer,xlim=c(32,54),type='l')
#' # change value of age 44 by removing 40 individuals. Suspected
#' # error in original table (or in value used by original authors to carry out calcs)
#' # 54995 now is 54955, and we get a match.
#' lines(Age,round(zelnik(perturb(Pop,-40,45),1,Age)) - p1_answer, col = "blue")
#'
#' # that the same adjustment works for p2 makes me suspect that the error
#' # is indeed in the value given for the census count at age 44.
#' plot(Age,round(zelnik(Pop,2,Age)) - p2_answer,xlim=c(30,55),type='l')
#' lines(Age,round(zelnik(perturb(Pop,-40, 45), 2, Age)) - p2_answer, col = "blue")
#' }
#' # we use this for the de facto unit test:
#'
#' d1 <- round(zelnik(perturb(Pop,-40,45),1,Age)) - p1_answer
#' d2 <- round(zelnik(perturb(Pop,-40,45),2,Age)) - p2_answer
#' stopifnot(all(d1[!is.na(d1)] == 0))
#' stopifnot(all(d2[!is.na(d2)] == 0))
#' }
zelnik <- function(Value, p, Age) {
# edited/rewritten by TR 21-10-2017
# Table 11, pp.20 Linear operators for use in deriving unbiased estimates of single-age distributions
if (p == 1) {
pi <- c( -0.0025, -0.01, -0.02, -0.03,
-0.04, 0.05, 0.14, 0.13,
0.12, 0.11, 0.105, 0.11,
0.12, 0.13, 0.14, 0.05,
-0.04, -0.03, -0.02, -0.01,
-0.0025
)
}
if (p == 2) {
pi <-
c( -0.00025, -0.0015, -0.0045, -0.0095,
-0.0165, -0.018, -0.0065, 0.0105,
0.0255, 0.0385, 0.05025, 0.063,
0.079, 0.099, 0.123, 0.136,
0.123, 0.099, 0.079, 0.063,
0.05025, 0.0385, 0.0255, 0.0105,
-0.0065, -0.018, -0.0165, -0.0095,
-0.0045, -0.0015, -0.00025
)
}
n <- (length(pi) - 1) / 2
# Multiply the vector of single ages by the linear operators Q1 or Q2
multi <- Value %*% t(pi)
# Get the diagonal for each age and split the vector
# used for aggregation
diagonal <- row(multi) - col(multi) + n
# this replaces sapply, and if statements to select ranges
Q <- tapply(multi, diagonal, sum)[as.character(Age)]
# impute ends with NA
Q[1:n] <- NA
Q[(length(Q) - n):length(Q)] <- NA
Q
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.