# Author: tim
###############################################################################
#' the basic Sprague age-splitting method
#'
#' @description This method is based on the first stage of the Sprague R
#' script prepared by Thomas Buettner and Patrick Gerland, itself based on the description
#' in Siegel and Swanson, 2004, p. 727.
#'
#' @param popmat a numeric matrix of population counts in 5-year age groups, with integer-labeled
#' margins (age in rows and year in columns).
#' @details Ages should refer to lower age bounds, ending in the open age group in the last row (not a closed terminal age).
#' Dimension labelling is necessary. There must be at least six age groups (including the open group). One year of data will
#' work as well, as long as it's given as a single-column matrix.
#'
#' @return an age-period matrix od split population counts with the same number of
#' columns as \code{popmat}, and single ages in rows.
#'
#' @references
#' \insertRef{sprague1880explanation}{DemoTools}
#' \insertRef{shryock1973methods}{DemoTools}
#' \insertRef{siegel2004methods}{DemoTools}
#' @export
#'
#' @examples
#' p5 <- structure(c(54170, 44775, 42142, 38464, 34406, 30386, 26933,
#' 23481, 20602, 16489, 14248, 9928, 8490, 4801, 3599, 2048, 941,
#' 326, 80, 17, 0, 57424, 44475, 41752, 39628, 34757, 30605, 27183,
#' 23792, 20724, 17056, 14059, 10585, 8103, 5306, 3367, 2040, 963,
#' 315, 80, 16, 1, 60272, 44780, 41804, 40229, 35155, 30978, 27456,
#' 24097, 20873, 17546, 13990, 11146, 7841, 5738, 3184, 2062, 961,
#' 311, 80, 15, 1, 62727, 45681, 42101, 40474, 35599, 31439, 27758,
#' 24396, 21055, 17958, 14046, 11589, 7731, 6060, 3086, 2083, 949,
#' 312, 79, 14, 1, 64816, 47137, 42508, 40532, 36083, 31940, 28092,
#' 24693, 21274, 18299, 14223, 11906, 7785, 6255, 3090, 2084, 938,
#' 316, 80, 14, 2),
#' .Dim = c(21L, 5L),
#' .Dimnames = list(seq(0,100,by=5), 1950:1954))
#' head(p5) # this is the entire matrix
#' p1 <- sprague(p5)
#' head(p1); tail(p1)
#' colSums(p1) - colSums(p5)
#'
#' # another case, starting with single ages
#' # note \sprague() does not group ages. You need to do it
#' # first.
#' Value <- c(9544406,7471790,11590109,11881844,11872503,12968350,11993151,10033918,
#' 14312222,8111523,15311047,6861510,13305117,7454575,9015381,10325432,
#' 9055588,5519173,12546779,4784102,13365429,4630254,9595545,4727963,
#' 5195032,15061479,5467392,4011539,8033850,1972327,17396266,1647397,
#' 6539557,2233521,2101024,16768198,3211834,1923169,4472854,
#' 1182245,15874081,1017752,3673865,1247304,1029243,12619050,1499847,
#' 1250321,2862148,723195,12396632,733501,2186678,777379,810700,
#' 7298270,1116032,650402,1465209,411834,9478824,429296,1190060,
#' 446290,362767,4998209,388753,334629,593906,178133,
#' 4560342,179460,481230,159087,155831,1606147,166763,93569,182238,
#' 53567,1715697,127486,150782,52332,48664,456387,46978,34448,
#' 44015,19172,329149,48004,28574,9200,7003,75195,13140,5889,
#' 18915,21221,72373)
#' Age <- 0:100
#' # group ages
#' Val5 <- groupAges(Value, Age)
#' # name the vector (or dims of matrix if you end up
#' # producing a matrix)
#' names(Val5) <- Age[Age %% 5 == 0]
#' # notice how this particular case produces a negative value in the last age
#' # before OAG:
#' (pops <- sprague(Val5))
#' # this replaces ages 90+, guaranteed no negatives.
#' spragueCloseout(Val5, pops = pops)
#' # Note: there are no kludges built into sprague() to handle such cases.
#' # these ought to be handled by wrappers as appropriate.
sprague <- function(popmat){
popmat <- as.matrix(popmat)
scm <- spragueExpand(popmat)
pop1 <- scm %*% popmat
rg <- range(as.integer(rownames(popmat)))
dimnames(pop1) <- list(rg[1]:rg[2], colnames(popmat))
pop1
}
#' create the Sprague coefficient matrix
#'
#' @description The resulting coefficient matrix is based on the number of rows in \code{popmat}
#' where we assume that each row of data is a 5-year age group. The final row may be an open
#' or closed age group, as indicated by the \code{OAG} argument.
#'
#' @param popmat numeric matrix of age-period population counts in 5-year age groups
#' @param OAG logical (default \code{TRUE}. Is the final age group open?
#'
#' @details The \code{popmat} matrix is really just a placeholder in this case. This function is
#' a utility called by the Sprague family of functions, where it is most convenient to just pass
#' in the same matrix being used in those calcs to determine the layout of the coefficient matrix.
#'
#' @export
#'
#' @references
#' \insertRef{sprague1880explanation}{DemoTools}
#' \insertRef{shryock1973methods}{DemoTools}
#' \insertRef{siegel2004methods}{DemoTools}
#' @examples
#' p5 <- structure(c(54170, 44775, 42142, 38464, 34406, 30386, 26933,
#' 23481, 20602, 16489, 14248, 9928, 8490, 4801, 3599, 2048, 941,
#' 326, 80, 17, 0, 57424, 44475, 41752, 39628, 34757, 30605, 27183,
#' 23792, 20724, 17056, 14059, 10585, 8103, 5306, 3367, 2040, 963,
#' 315, 80, 16, 1, 60272, 44780, 41804, 40229, 35155, 30978, 27456,
#' 24097, 20873, 17546, 13990, 11146, 7841, 5738, 3184, 2062, 961,
#' 311, 80, 15, 1, 62727, 45681, 42101, 40474, 35599, 31439, 27758,
#' 24396, 21055, 17958, 14046, 11589, 7731, 6060, 3086, 2083, 949,
#' 312, 79, 14, 1, 64816, 47137, 42508, 40532, 36083, 31940, 28092,
#' 24693, 21274, 18299, 14223, 11906, 7785, 6255, 3090, 2084, 938,
#' 316, 80, 14, 2),
#' .Dim = c(21L, 5L),
#' .Dimnames = list(seq(0,100,by=5), 1950:1954))
#' coefsOA <- spragueExpand(p5, TRUE)
#' coefsclosed <- spragueExpand(p5, FALSE)
#' dim(coefsOA)
#' dim(coefsclosed)
spragueExpand <- function(popmat, OAG = TRUE){
popmat <- as.matrix(popmat)
# figure out ages and years
Age5 <- as.integer(rownames(popmat))
Age1 <- min(Age5):max(Age5)
yrs <- as.integer(colnames(popmat))
# nr 5-year age groups
m <- nrow(popmat)
# nr rows in coef mat.
n <- m * 5 - ifelse(OAG, 4, 0)
# number of middle blocks
MP <- m - ifelse(OAG, 5, 4)
# get the split coefficients
# block for ages 0-9
g1g2 <- matrix(c(
0.3616, -0.2768, 0.1488, -0.0336, 0.0000,
0.2640, -0.0960, 0.0400, -0.0080, 0.0000,
0.1840, 0.0400, -0.0320, 0.0080, 0.0000,
0.1200, 0.1360, -0.0720, 0.0160, 0.0000,
0.0704, 0.1968, -0.0848, 0.0176, 0.0000,
0.0336, 0.2272, -0.0752, 0.0144, 0.0000,
0.0080, 0.2320, -0.0480, 0.0080, 0.0000,
-0.0080, 0.2160, -0.0080, 0.0000, 0.0000,
-0.0160, 0.1840, 0.0400, -0.0080, 0.0000,
-0.0176, 0.1408, 0.0912, -0.0144, 0.0000),
nrow = 10, ncol = 5, byrow = TRUE)
# block for middle ages
g3 <- matrix(c(-0.0128, 0.0848, 0.1504, -0.0240, 0.0016,
-0.0016, 0.0144, 0.2224, -0.0416, 0.0064,
0.0064, -0.0336, 0.2544, -0.0336, 0.0064,
0.0064, -0.0416, 0.2224, 0.0144, -0.0016,
0.0016, -0.0240, 0.1504, 0.0848, -0.0128),
5, 5, byrow = TRUE)
# block prior to closeout
g4g5 <- matrix(c(0.0000, -0.0144, 0.0912, 0.1408, -0.0176,
0.0000, -0.0080, 0.0400, 0.1840, -0.0160,
0.0000, 0.0000, -0.0080, 0.2160, -0.0080,
0.0000, 0.0080, -0.0480, 0.2320, 0.0080,
0.0000, 0.0144, -0.0752, 0.2272, 0.0336,
0.0000, 0.0176, -0.0848, 0.1968, 0.0704,
0.0000, 0.0160, -0.0720, 0.1360, 0.1200,
0.0000, 0.0080, -0.0320, 0.0400, 0.1840,
0.0000, -0.0080, 0.0400, -0.0960, 0.2640,
0.0000, -0.0336, 0.1488, -0.2768, 0.3616),
nrow = 10, ncol = 5, byrow = TRUE)
## create a Sprague coefficient matrix for 5-year age groups
bm <- matrix(0, nrow = n, ncol = m)
## insert upper left block
bm[1:10, 1:5] <- g1g2
# determine positions of middle blocks
rowpos <- matrix(11:((MP * 5) + 10), ncol = 5, byrow = TRUE)
colpos <- row(rowpos) + col(rowpos) - 1
for (i in (1:MP)) {
# calculate the slices and add middle panels accordingly
bm[rowpos[i, ], colpos[i, ]] <- g3
}
## insert last two panels
fr <- nrow(bm) - ifelse(OAG,10,9)
lr <- fr + 9
fc <- ncol(bm) - ifelse(OAG, 5, 4)
lc <- fc + 4
bm[fr:lr,fc:lc] <- g4g5
if (OAG){
# preserve open ended age group
bm[nrow(bm), ncol(bm)] <- 1
}
bm
}
#' create the Grabill coefficient matrix
#'
#' @description The resulting coefficient matrix is based on the number of rows in \code{popmat}
#' where we assume that each row of data is a 5-year age group and the final row is an open age group
#' to be preserved as such.
#'
#' @param popmat numeric matrix of age-period population counts in 5-year age groups
#'
#' @details The \code{popmat} matrix is really just a placeholder in this case. This function is
#' a utility called by the Grabill family of functions, where it is most convenient to just pass
#' in the same matrix being used in those calcs to determine the layout of the coefficient matrix.
#' Note that these coefficients do not constrain population counts to their year totals. This function
#' is called by \code{grabill()}, which ensures matching marginals by 1) blending boundary ages
#' into the Sprague estimated population, and 2) a second constraint on the middle age groups to enforce
#' matching sums.
#'
#' @references
#' \insertRef{shryock1973methods}{DemoTools}
#'
#' @export
grabillExpand <- function(popmat){
popmat <- as.matrix(popmat)
# nr 5-year age groups
m <- nrow(popmat)
# nr closed single ages
m1 <- m * 5 - 5
# number of middle blocks
MP <- m - 5
## create a Grabill coefficient matrix for 5-year age groups
scmg <- matrix(0, nrow = m1 + 1, ncol = m)
## insert last two panels
fr <- (m - 3) * 5 + 1
lr <- (m - 1) * 5
fc <- MP
lc <- MP + 4
# preserve open age group
scmg[m1 + 1, m] <- 1
# primary grabill coef block
g3g <- matrix(
c(
0.0111, 0.0816, 0.0826, 0.0256, -0.0009,
0.0049, 0.0673, 0.0903, 0.0377, -0.0002,
0.0015, 0.0519, 0.0932, 0.0519, 0.0015,
-0.0002, 0.0377, 0.0903, 0.0673, 0.0049,
-0.0009, 0.0256, 0.0826, 0.0816, 0.0111),
5, 5, byrow = TRUE)
# ----------------------------------------------------------
# Note: for the boundary ages we keep shuffling in g3g, the same grabill
# coefs. The columns on the boundaries will NOT sum to 1. These coefs are
# used just for the firs pass, then results blend into the Sprague boundary
# estimates.
# ----------------------------------------------------------
# the young age coefficients
g1g2g <- matrix(0,nrow=10,ncol=5)
g1g2g[1:5, 1:3] <- g3g[,3:5]
g1g2g[6:10, 1:4] <- g3g[,2:5]
# the old age coefficients
g4g5g <- matrix(0,nrow=10,ncol=5)
g4g5g[1:5, 2:5] <- g3g[,1:4]
g4g5g[6:10, 3:5] <- g3g[,1:3]
scmg[1:10, 1:5] <- g1g2g
scmg[fr:lr,fc:lc] <- g4g5g
# determine positions of middle blocks
rowpos <- matrix(11:((MP*5) + 10), ncol = 5, byrow = TRUE)
colpos <- row(rowpos) + col(rowpos) - 1
for (i in (1:MP)) {
# calculate the slices and add middle panels accordingly
scmg[rowpos[i,], colpos[i, ]] <- g3g
}
# return coefficient matrix
scmg
}
#' the basic Grabill age-splitting method
#'
#' @description This method uses Grabill's aggressive redistribution of middle ages and blends into
#' Sprague estimated single-age population counts for the first and final ten ages. Open age groups
#' are preserved, as are annual totals.
#'
#' @param popmat a numeric matrix of population counts in 5-year age groups, with integer-labeled
#' margins (age in rows and year in columns).
#' @details Ages should refer to lower age bounds, ending in the open age group in the last row (not a closed terminal age).
#' Dimension labelling is necessary. There must be at least six age groups (including the open group). One year of data will
#' work as well, as long as it's given as a single-column matrix.
#'
#' @return an age-period matrix od split population counts with the same number of
#' columns as \code{popmat}, and single ages in rows.
#'
#' @references
#' \insertRef{shryock1973methods}{DemoTools}
#'
#' @export
#'
#' @examples
#' p5 <- structure(c(54170.08, 44774.6, 42141.587, 38463.515, 34405.607,
#' 162369.816, 57424.3568738, 44475.4981681, 41751.7574114, 39628.4338929,
#' 34756.9473002, 164194.0485702, 60272.2061248, 44780.1982856,
#' 41803.6541424, 40229.0292664, 35154.7682192, 166275.9022992,
#' 62726.896388, 45681.1355532, 42100.72506, 40473.8600572, 35598.545404,
#' 168556.5331816, 64815.5458002, 47136.5341033, 42508.3026466,
#' 40532.3096745, 36082.7490698, 170990.473735, 66579.122, 49070.407,
#' 42953.604, 40534.586, 36596.844, 173545.633), .Dim = c(6L, 6L
#' ), .Dimnames = list(seq(0,25,5), 1950:1955))
#' head(p5) # this is the entire matrix
#' p1 <- grabill(p5)
#' head(p1); tail(p1)
#' colSums(p1) - colSums(p5)
#' p1 - sprague(p5)
grabill <- function(popmat){
popmat <- as.matrix(popmat)
# get coefficient matrices for Sprague and Grabill
scmg <- grabillExpand(popmat)
scm <- spragueExpand(popmat)
# split pop counts
pops <- scm %*% popmat
popg <- scmg %*% popmat
# ---------------------------------------------
# now we graft the two estimates in together,
# preserving the middle part for grabill, and blending
# aggressively into the young and closeout parts of Sprague
# weights for grafting in grabill
m <- nrow(pops)
lr <- m - 1
fr <- lr - 9
# these weights do much better than linear weights.
w10 <- exp(row(pops[1:10, , drop = FALSE]) ) / exp(10.1)
# blend together young ages
popg[1:10, ] <- w10 * popg[1:10, ] + (1 - w10) * pops[1:10, ]
# blend together old ages
popg[fr:lr, ] <- w10[10:1, ] * popg[fr:lr, ] + (1 - w10[10:1, ]) * pops[fr:lr, ]
# ---------------------------------------------
# now we take care of the marginal constraint problem
# make weighting matrix
wr <- pops * 0 + 1
wr[1:10, ] <- w10
wr[fr:lr, ] <- w10[10:1, ]
wr[nrow(wr), ] <- 0
# weighted marginal sums. The difference we need to redistribute
redist <- colSums(pops) - colSums(popg)
middle.part <- popg * wr
# the difference to redistribute
add.in <- t(t(prop.table(middle.part,2)) * redist)
popg <- popg + add.in
# ---------------------------------------------
# label dims and return
rg <- range(as.integer(rownames(popmat)))
dimnames(popg) <- list(rg[1]:rg[2], colnames(popmat))
popg
}
#' split age groups using a monotonic spline
#' @description Take the cumulative sum of \code{Value} and then run a monotonic spline through it. The first
#' differences split back single-age estimates of \code{Value}. Optionally keep the open age group untouched.
#'
#' @details We use the \code{"monoH.FC"} method of \code{stats::splinefun()} to fit the spline because 1)
#' it passes exactly through the points, 2) it is monotonic and therefore guarantees positive counts, and 3)
#' it seems to be a bit less wiggly (lower average first differences of split counts) than a pchip tends to do,
#' at least in the tested data.
#'
#' @param Value numeric vector of counts in age groups
#' @param Age5 integer vector of lower bound of age groups
#' @param keep.OAG logical (default \code{FALSE}). Would we like to re-impute the last
#' element of \code{Value} as the open age group?
#' @return numeric vector of single age counts
#' @importFrom stats splinefun
#' @references
#' \insertRef{fritsch1980monotone}{}
#' @export
#' @examples
#' Value <- structure(c(88623, 90842, 93439, 96325, 99281, 102051, 104351,
#' 106555, 109170, 112188, 113582, 112614, 108904, 102622, 95867,
#' 80874, 60196, 37523, 17927, 5642, 1110), .Names = c("0", "5",
#' "10", "15", "20", "25", "30", "35", "40", "45", "50", "55", "60",
#' "65", "70", "75", "80", "85", "90", "95", "100"))
#'
#' splitMono(Value)
#' splitMono(Value, keep.OAG = TRUE)
splitMono <- function(Value, Age5 = seq(0, length(Value)*5-5, 5), keep.OAG = FALSE){
AgePred <- min(Age5):(max(Age5) + 1)
y <- c(0, cumsum(Value))
x <- c(0, Age5 + 5)
y1 <- splinefun(y ~ x, method = "monoH.FC")(AgePred)
single.out <- diff(y1)
if (keep.OAG){
single.out[length(single.out)] <- Value[length(Value)]
}
single.out
}
#' blend the Sprague upper boundary age estimates into monotonic spline estimates
#'
#' @description A simple monotonic spline on the cumulative sum of population counts
#' may return more convincing single age count estimates than the Sprague splitting method.
#' This function blends the Sprague estimates starting at \code{pivotAge}.
#'
#' @param popmat a numeric matrix of population counts in 5-year age groups, with integer-labeled
#' margins (age in rows and year in columns).
#' @param pops optional numeric matrix of single age population counts derived from \code{popmat}.
#' @param pivotAge integer (default 90). Age at which to switch to spline-based estimates.
#'
#' @return numeric matrix of age by year estimates of single-age counts.
#'
#' @details The \code{pivotAge} must be at least 10 years below the maximum age detected from
#' \code{rownames(popmat)}, but not lower than 80. In the exact \code{pivotAge}, we may either take
#' the Sprague estimates or the spline estimates, depending on which is larger, then the single-age estimates
#' for this 5-year age group are rescaled to sum to the original total in \code{popmat}. Higher ages are taken from
#' the spline-based age splits. The spline results are derive from the \code{"monoH.FC"} method of \code{splinefun()}
#' on the cumulative sum of the original age grouped data. One could use this function to perform the same
#' closeout to Grabill estimates, if these are given via the \code{pops} argument. See examples. Note
#' that the Grabill split method mixed with this closeout will not necessarily preserve the annual totals,
#' and this function performs to rescaling. The open age group is preserved (and must be included in \code{popmat}).
#'
#' @export
#'
#' @examples
#' popmat <- structure(c(54170, 44775, 42142, 38464, 34406, 30386, 26933,
#' 23481, 20602, 16489, 14248, 9928, 8490, 4801, 3599, 2048, 941,
#' 326, 80, 17, 0, 57424, 44475, 41752, 39628, 34757, 30605, 27183,
#' 23792, 20724, 17056, 14059, 10585, 8103, 5306, 3367, 2040, 963,
#' 315, 80, 16, 1, 60272, 44780, 41804, 40229, 35155, 30978, 27456,
#' 24097, 20873, 17546, 13990, 11146, 7841, 5738, 3184, 2062, 961,
#' 311, 80, 15, 1, 62727, 45681, 42101, 40474, 35599, 31439, 27758,
#' 24396, 21055, 17958, 14046, 11589, 7731, 6060, 3086, 2083, 949,
#' 312, 79, 14, 1, 64816, 47137, 42508, 40532, 36083, 31940, 28092,
#' 24693, 21274, 18299, 14223, 11906, 7785, 6255, 3090, 2084, 938,
#' 316, 80, 14, 2), .Dim = c(21L, 5L), .Dimnames = list(c("0", "5",
#' "10", "15", "20", "25", "30", "35", "40", "45", "50", "55", "60",
#' "65", "70", "75", "80", "85", "90", "95", "100"), c("1950", "1951",
#' "1952", "1953", "1954")))
#'
#' closed.out <- spragueCloseout(popmat)
#' colSums(closed.out) - colSums(popmat)
#' spragueCloseout(popmat, pivotAge = 85)
#' # giving a different single-age split to close out this way:
#' popg <- grabill(popmat)
#' grabill.closed.out <- spragueCloseout(popmat, popg)
#' # totals not necessarily preserved if mixed w Grabill
#' # I wouldn't recommend a rescale of the total, since the
#' # only part we mess with here is the old age section. Ergo,
#' # one may wish to instead rescale results colSums() of
#' # popg at age pivotAge and higher.
#' colSums(grabill.closed.out) - colSums(popmat)
#' # also works on an age-labelled vector of data
#' popvec <- popmat[,1]
#' closed.vec <- spragueCloseout(popvec)
#' # let's compare this one with sprague()
#' simple.vec <- sprague(popvec)
#' # and with a simple monotonic spline
#' mono.vec <- splitMono(popvec)
#' \dontrun{
#' plot(85:100,simple.vec[86:101], type = 'l', main = "In this case sprague() is the smoothest")
#' lines(85:100,closed.vec[86:101], col = "red", lwd = 2)
#' lines(85:100,mono.vec[86:101], col = "blue", lty = 2)
#' legend("topright",lty=c(1,2,2), col = c("black","red","blue"),lwd = c(1,2,1),
#' legend = c("sprague()","spragueCloseout()", "splitMono()"))
#' }
spragueCloseout <- function(popmat, pops, pivotAge = 90){
popmat <- as.matrix(popmat)
if (missing(pops)){
pops <- sprague(popmat)
}
# get the spline population split
popmono <- apply(popmat, 2, splitMono, keep.OAG = TRUE)
# some age pars
Age5 <- as.integer(rownames(popmat))
Age1 <- min(Age5):max(Age5)
# some checks on pivotAge...
if (!(max(Age1) - 10) >= pivotAge){
pivotAge <- max(Age1) - 10
if (pivotAge < 80){
warning("pivotAge wasn't in rownames(popmat), moved it to 3rd
from bottom row of popmat, but appears to be < 80
so returning sprague() output as-is, no extra closeout performed.")
return(pops)
}
warning("pivotAge moved to ", pivotAge, ", continued.")
}
# -----------------------------
# now begin the closeout blend.
p.i <- which(Age1 == pivotAge)
## substitute Sprague interpolation if > pchip for better belnding of the two series
pop.c <- popmono[p.i:(p.i + 4), , drop = FALSE]
ind <- pops[p.i, ] > pop.c[1, ]
pop.c[1, ind] <- pops[p.i, ind]
## adjust back on initial pop 5x5 for age 90-94
## proportional distribution
pop.c[is.na(pop.c )] <- 0
prop <- prop.table(pop.c, margin = 2)
pivot5 <- popmat[as.character(pivotAge), ]
pop.c <- t(t(prop) * pivot5)
## append the remaining of the age groups (except last open age)
## 95-99 onward
m <- nrow(pops)
pop.c <- rbind(pop.c, popmono[(p.i + 5):m, , drop = FALSE])
## append Sprague interpolation before age 90
pop.c <- rbind(pops[1:(p.i - 1), , drop = FALSE], pop.c)
## deal with negative values if applicable (but in principle should not be happening)
pop.c[pop.c < 0] <- 0
# label and return
#dimnames(pop.c) <- list(Age1, colnames(popmat))
rownames(pop.c) <- Age1
colnames(pop.c) <- colnames(popmat)
pop.c
}
#' an oscillatory average of Sprague age splits
#' @description Single ages can be grouped into 5-year age groups in 5 ways by staggering terminal digits.
#' This method is a bit smoother than the standard \code{sprague()} method, but not as smooth as \code{grabill()}.
#'
#' @details This function works on a single vector of single-age counts, not on a matrix. Results are not
#' constrained to any particular age group, but are constrained to the total count.
#' The option to closeout using \code{spragueCloseout()} is recommended because it usually gives
#' more plausible results and it avoids negative values. This is run separately on each Sprague split,
#' rather than on the aggregate results.
#'
#' @param Value numeric vector of single age counts
#' @param Age integer vector of single ages (lower bound)
#' @param OAG logical (default \code{TRUE}). Is the last value the open age group?
#' @param closeout logical (default \code{TRUE}). Shall we close out each sprague split with a monotonic spline fit?
#'
#' @return numeric vector of Sprague-smoothed counts
#' @export
#' @examples
#' Value <- c(9544406,7471790,11590109,11881844,11872503,12968350,11993151,10033918,
#' 14312222,8111523,15311047,6861510,13305117,7454575,9015381,10325432,
#' 9055588,5519173,12546779,4784102,13365429,4630254,9595545,4727963,
#' 5195032,15061479,5467392,4011539,8033850,1972327,17396266,1647397,
#' 6539557,2233521,2101024,16768198,3211834,1923169,4472854,
#' 1182245,15874081,1017752,3673865,1247304,1029243,12619050,1499847,
#' 1250321,2862148,723195,12396632,733501,2186678,777379,810700,
#' 7298270,1116032,650402,1465209,411834,9478824,429296,1190060,
#' 446290,362767,4998209,388753,334629,593906,178133,
#' 4560342,179460,481230,159087,155831,1606147,166763,93569,182238,
#' 53567,1715697,127486,150782,52332,48664,456387,46978,34448,
#' 44015,19172,329149,48004,28574,9200,7003,75195,13140,5889,
#' 18915,21221,72373)
#' Age <- 0:100
#' names(Value) <- Age
#' #barplot(Value, main = "yup, these have heaping!")
#' # this is the basic case we compare with:
#' pop0 <- sprague(groupAges(Value,Age))
#' # note: this function needs single ages to work because
#' # ages are grouped into 5-year age groups in 5 different ways.
#' (pop1 <- spragueOscillate(Value, Age, closeout = FALSE))
#' # see the NaN value? That because there were some negatives produced by
#' # sprague(). We can call spragueCloseout() inside spragueOscillate()
#' # to handle such cases:
#' (pop2 <- spragueOscillate(Value, Age, closeout = TRUE))
#' # what's smoother, spragueOscillate() or grabill()?
#' # note, same closeout problem, can be handled by spragueCloseout()
#' (pop3 <- grabill(groupAges(Value, Age)))
#' #pop4 <- spragueCloseout(groupAges(Value, Age), pops = pop3)
#' \dontrun{
#' plot(Age, Value)
#' lines(Age, pop0, col = "blue")
#' # slightly smoother (also shifted though)
#' lines(Age, pop1)
#' # only different at very high ages, small nrs
#' lines(Age, pop2, col = "red", lty = 2, lwd = 2)
#' lines(Age, pop3, col = "magenta")
#' legend("topright", lty = c(1,1,2,1), lwd = c(1,1,2,1), col = c("blue","black","red","magenta"),
#' legend = c("sprague()",
#' "spragueOscillate(closeout = FALSE)",
#' "spragueOscillate(closeout = TRUE)",
#' "grabill()"))
#' }
spragueOscillate <- function(Value, Age, OAG = TRUE, closeout = TRUE){
N <- length(Value)
if (OAG){
open <- Value[N]
OA <- Age[N]
Value <- Value[-N]
Age <- Age[-N]
N <- N - 1
}
TOT <- sum(Value)
# select which ages to keep:
p1x1 <- matrix(nrow = length(Value), ncol = 5)
rownames(p1x1) <- Age
for (i in 0:4){
# regroup ages
Age.i.5 <- calcAgeN(Age, shiftdown = i)
# only use age groups w 5 single ages represented
keep.i <- rep(rle(Age.i.5)$leng, rle(Age.i.5)$leng) == 5
# cut vector down to those cases
Age.i.5 <- Age.i.5[keep.i]
# cut counts down to those cases
Val.i <- Value[keep.i]
# group ages into said 5-year age groups
Val.i.5 <- groupAges(Val.i, AgeN = Age.i.5)
# make fake open age
Val.i.5 <- c(Val.i.5, pi)
names(Val.i.5) <- c(unique(Age.i.5), max(Age.i.5) + 5)
# get first run estimate
pop.est <- sprague(Val.i.5)
if (closeout){
pop.est <- spragueCloseout(Val.i.5, pop.est)
}
pop.est[pop.est < 0] <- NA
pop.est <- pop.est[-length(pop.est)]
p1x1[keep.i, i + 1] <- pop.est
}
# take average per age
p.out <- rowMeans(p1x1, na.rm = TRUE)
# rescale to proper total
p.out <- p.out * TOT / sum(p.out, na.rm = TRUE)
# re-append the open age group if needed
if (OAG){
Age <- c(Age, OA)
p.out <- c(p.out, open)
names(p.out) <- Age
}
pop.c
}
Value = c(`0` = 322450, `1` = 847314, `2` = 890815, `3` = 973324, `4` = 922761,
`5` = 1353923, `6` = 446856, `7` = 617082, `8` = 729849, `9` = 793670,
`10` = 839793, `11` = 765702, `12` = 898146, `13` = 834513, `14` = 662849,
`15` = 835059, `16` = 878426, `17` = 783401, `18` = 877720, `19` = 1005015,
`20` = 1049289, `21` = 758123, `22` = 838049, `23` = 761735,
`24` = 623166, `25` = 920335, `26` = 579951, `27` = 632815, `28` = 620611,
`29` = 402083, `30` = 891491, `31` = 336850, `32` = 517889, `33` = 384614,
`34` = 287516, `35` = 900013, `36` = 345989, `37` = 360808, `38` = 431534,
`39` = 273062, `40` = 817674, `41` = 244233, `42` = 444999, `43` = 347551,
`44` = 215034, `45` = 825759, `46` = 270679, `47` = 272653, `48` = 313531,
`49` = 195134, `50` = 605866, `51` = 176668, `52` = 311271, `53` = 228275,
`54` = 221397, `55` = 504827, `56` = 203723, `57` = 162895, `58` = 201118,
`59` = 141621, `60` = 369704, `61` = 126829, `62` = 170236, `63` = 132706,
`64` = 99812, `65` = 364710, `66` = 76709, `67` = 96552, `68` = 64537,
`69` = 38543, `70` = 234801, `71` = 36986, `72` = 55415, `73` = 45925,
`74` = 29362, `75` = 127797, `76` = 28914, `77` = 20376, `78` = 19258,
`79` = 10656, `80` = 70877, `81` = 9732, `82` = 12709, `83` = 9495,
`84` = 7199, `85` = 27178, `86` = 5804, `87` = 3769, `88` = 2598,
`89` = 1462, `90` = 9682, `91` = 1303, `92` = 1357, `93` = 1194,
`94` = 725, `95` = 2259, `96` = 443, `97` = 323, `98` = 329,
`99` = 1291)
Age = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
98, 99)
graduate_grabill(Value, Age, OAG = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.