dev/AGEINT.R

# TR: 2018-08-20 either complementing or replacing previous pop interpolater.
# this one more general.
###############################################################################

#' Interpolate between two population age distributions.
#' @description The interpolation is done by age using a linear, exponential, or power function. This comes from the PAS spreadsheet called \code{AGEINT}. Be aware that this is not cohort-component interpolation.

#' @param popmat numeric. An age-period matrix of data to interpolate over. Age in rows, time in columns.
#' @param datesIn vector of dates. The exact dates in each column. See details for ways to express it.
#' @param datesOut vector of dates. The desired dates to interpolate to. See details for ways to express it.
#' @param method string. The method to use for the interpolation, either \code{"linear"}, \code{"exponential"}, or \code{"power"}. Default \code{"linear"}.
#' @param power numeric power to interpolate by, if \code{method = "power"}. Default 2.

#' @details The age group structure of the output is the same as that of the input. Ideally, \code{datesOut} should be within the range of \code{datesIn}. If not, the left-side and right-side output are held constant outside the range if \code{rule = 2} is passed in, otherwise \code{NA} is returned (see examples). Dates can be given in three ways 1) a \code{Date} class object, 2) an unambiguous character string in the format \code{"YYYY-MM-DD"}, or 3) as a decimal date consisting in the year plus the fraction of the year passed as of the given date. 
#' 
#' For methods \code{"exponential"} and \code{"power"}, calculations are carried out using linear interpolation through \code{log(popmat)}, or \code{popmat^(1/power)} respectively, then back-transformed. If the data contain 0s, \code{method = "exponential"} will fail, but \code{"power"} would still generally work. 
#' 
#' @return numeric matrix (age-period) (or vector if \code{length(datesOut) == 1} of the interpolated data for the requested dates. 
#' @author Tim Riffe
#' @references 
#' \insertRef{PAS}{DemoTools}
#' @export
#' @importFrom stats approx
#' 
#' @examples
#' # Example of interpolating over time series of age-structured
#' # data. NOTE: age classes must conform. They don't have to be
#' # uniform, just the same in each year.
#' popmat <- structure(c(2111460L, 1971927L, 1651421L, 1114149L, 921120L, 
#' 				859806L, 806857L, 847447L, 660666L, 567163L, 493799L, 322936L, 
#' 				320796L, 163876L, 133531L, 120866L, 2548265L, 2421813L, 2581979L, 
#' 				2141854L, 1522279L, 1321665L, 1036480L, 1024782L, 935787L, 789521L, 
#' 				719185L, 481997L, 479943L, 268777L, 202422L, 167962L, 3753848L, 
#' 				3270658L, 2930638L, 2692898L, 2222672L, 1788443L, 1514610L, 1491751L, 
#' 				1054937L, 972484L, 796138L, 673137L, 554010L, 352264L, 293308L, 
#' 				195307L, 3511776L, 3939121L, 4076601L, 3602857L, 2642620L, 2105063L, 
#' 				1993212L, 1914367L, 1616449L, 1408498L, 994936L, 776537L, 706189L, 
#' 				507085L, 315767L, 240302L, 3956664L, 3936424L, 3995805L, 4371774L, 
#' 				4012982L, 3144046L, 2406725L, 2301514L, 2061252L, 1871502L, 1538713L, 
#' 				1210822L, 896041L, 638954L, 401297L, 375648L), .Dim = c(16L, 
#' 				5L), .Dimnames = list(c("0", "5", "10", "15", "20", "25", "30", 
#' 						"35", "40", "45", "50", "55", "60", "65", "70", "75"), c("1960", 
#' 						"1976", "1986", "1996", "2006")))
#' 
#' # We have irregularly spaced census inputs.
#' \dontrun{
#' 	matplot(seq(0,75,by=5),popmat,type='l',col=1:5)
#' 	text(20,popmat["20",],colnames(popmat),col=1:5)
#' }
#' # census dates as decimal: no need for year rounding.
#' # could also specify as "YYYY-MM-DD"
#' # (rounded to 2 digits here- not necessary)
#' datesIn  <- c(1960.73, 1976.90, 1986.89, 1996.89, 2006.89)
#' # could ask for single years or anything
#' datesOut <- seq(1960, 2005, by = 5)
#' 
#' # NOTE: rule is an argument to approx(), which does the work.
#' # if not passed in, 1960 is returned as NAs (outside the range
#' # of dates given). In this case, rule = 2 assumes constant 
#' # equal to nearest neighbor, meaning 1960 data will be returned
#' # as equal to 1960.73 input data. So, function should only be 
#' # allowed to extrapolate for short distances.
#' interp(popmat, datesIn, datesOut, "linear", rule = 2)
#' interp(popmat, datesIn, datesOut, "exponential", rule = 2)
#' interp(popmat, datesIn, datesOut, "power", rule = 2)
#' 
#' # ----------------------------------------------------------
#' # standard example data, taken from AGEINT PAS spreadsheet.
#' p1    <- c(100958, 466275, 624134, 559559, 446736, 370653, 301862, 249409,
#'   247473, 223014, 172260, 149338, 127242, 105715, 79614, 53660, 31021, 34596)
#' p2    <- c(201916, 932550, 1248268, 1119118, 893472, 741306, 603724, 498818, 
#'  494946, 446028, 344520, 298676, 254484, 211430, 159228, 107320, 62042, 69192)
#' 
#'  # YYYY-MM-DD dates as character
#'  d1              <- "1980-04-07"
#'  d2              <- "1990-10-06"
#'  dout            <- "1985-07-01"
#'  
#'  (p_lin <- interp(cbind(p1,p2),c(d1,d2),dout,method = "linear"))
#'  (p_exp <- interp(cbind(p1,p2),c(d1,d2),dout,method = "exponential"))
#'  (p_pow <- interp(cbind(p1,p2),c(d1,d2),dout,method = "power"))
#'  
#'  \dontshow{
#'  print(dec.date(d1),digits=18)
#'  print(dec.date(d2),digits=18)
#'  print(dec.date(dout),digits=18)
#'  # these were set to different dates: recreate above dates...
#' expcheck <- c(142637.629438722,658772.565488024,881802.276313982,
#' 790568.050982602,631167.059816327,523673.857092558,426483.092945892,
#' 352375.329549735,349640.068841387,315083.384096823,243376.038026844,
#' 210990.890321914,179772.749510111,149358.515383768,112481.945265698,
#' 75813.0628150498,43827.7491909366,48878.656748965)
#' lincheck <- c(151295.597805253,698759.433295475,935326.835323442,
#' 838554.779337049,669478.29970015,555460.361866426,452370.21082717,
#' 373764.176717154,370862.888286807,334208.645663947,258148.731927464,
#' 223797.836576011,190684.784325522,158424.435131266,119309.492300436,
#' 80414.843580795,46488.0518583646,51845.5446984939)
#' 
#' stopifnot(all(abs(expcheck - p_exp)<1e-6))
#' stopifnot(all(abs(lincheck - p_lin)<1e-6))
#' # do controlled spreadsheet test, passing in decimal dates, etc
#' }
#' \dontrun{
#' age <- c(0,1,seq(5,80,by=5))
#' plot(age, p2, type = 'l', lwd = 2, main = "Interpolation options")
#' lines(age, p1, lwd = 2, lty = "8282")
#' lines(age, p_lin,col = "blue")
#' lines(age, p_pow,col = gray(.4))
#' lines(age, p_exp,col = "red")
#' text(30, p1[8], d1, pos = 1, cex = 1)
#' text(20, p2[6], d2, pos = 4, cex = 1)
#' legend("topright", lty = 1, col = c("blue", gray(.4), "red"),
#' 		legend = c("linear", "power 2", "exponential"), 
#' 		title = paste0("Interpolated 1985-07-01"))
#' 
#' }

interp <- function(popmat, 
		datesIn, 
		datesOut, 
		method = c("linear","exponential","power")[1], 
		power = 2,
		...){ # ... args passed to stats::approx . Can give control over extrap assumptions
	# a basic check
	stopifnot(ncol(popmat) == length(datesIn))
	
	# no sense documenting this wrapper ...
	.approxwrap <- function(x, y, xout, ...){
		stats::approx(x = x, y = y, xout = xout, ...)$y
	}
	
	# -----------------------
	# clean method declaration
	method <- tolower(method)
	if (grepl(method, pattern = "exp")){
		pattern <- "exponential"
	}
	if (grepl(method, pattern = "lin")){
		pattern <- "linear"
	}
	if (grepl(method, pattern = "pow")){
		pattern <- "power"
	}
	# -----------------------
	
	# coerce dates to decimal if necessary
	datesIn  <- sapply(datesIn, dec.date)
	datesOut <- sapply(datesOut, dec.date)
	
	
	# carry out transform 1
	if (method == "exponential"){
		if (any(popmat == 0)){
			stop("popmat contains 0s, so exponential interpolation won't work.\n
Either handle 0s then do this, or\n
maybe try method = 'power'.")
		}
		popmat <- log(popmat)
	}
	if (method == "power"){
		popmat <- popmat ^ (1 / power)
	}
	
	int <- apply(
			popmat,
			1,
			.approxwrap,
			x = datesIn,
			xout = datesOut,
			...)
	dims <- dim(int)
	if (!is.null(dims)){
		int <- t(int)
		rownames(int) <- rownames(popmat)
		colnames(int) <- datesOut
	} else {
		names(int)    <- rownames(popmat)
	}
	
	# transform back
	if (method == "exponential"){
		int <- exp(int)
	}
	if (method == "power"){
		int <- int^power
	}
	
	int
}
timriffe/DemoTools documentation built on Oct. 14, 2024, 12:53 p.m.