# Author: sean
# edited by TR 9-Dec-2017 & 28 Feb-2018
###############################################################################
#' Interpolate between two population age distributions.
#' @description The interpolation is done by age (not cohort) using a linear or exponential function. This comes from the PAS spreadsheet called ADJINT.
#' @param Pop1 numeric. A vector of demographic counts by age at time 1 (earlier date).
#' @param Pop2 numeric. A vector of demographic counts by age at time 2 (later date).
#' @param Date1 date. The date corresponding to the population age distribution at time 1. See details for ways to express it.
#' @param Date2 date. The date corresponding to the population age distribution at time 2. See details for ways to express it.
#' @param DesiredDate date. The desired date of the output population age distribution. See details for ways to express it.
#' @param method string. The method to use for the interpolation, either "linear" or "exponential". Default "linear".
#' @param roundoutput logical. Whether or not to return integers. Default \code{FALSE}.
#' @details The age group structure of the output is the same as that of the input. Ideally, \code{DesiredDate} should be between the Date1 and Date2.
#' Dates can be given in three ways 1) a \code{Date} class object, 2) an unambiguous character string in the format \code{"YYYY-MM-DD"},
#' or 3) as a decimal date consisting in the year plus the fraction of the year passed as of the given date.
#' @return A vector of the interpolated population for the requested date.
#' @author Sean Fennel
#' @references
#' \insertRef{PAS}{DemoTools}
#' @export
#'
#' @examples
#' # YYYY-MM-DD dates as character
#' EarlyDate <- "1980-04-07"
#' LaterDate <- "1990-10-06"
#' DesiredDate <- "1985-07-01"
#'
#' interpolatePop(popA_earlier, popA_later, EarlyDate, LaterDate, DesiredDate)
#' interpolatePop(popA_earlier, popA_later, EarlyDate, LaterDate, DesiredDate, method = "exponential")
#' \dontrun{
#'plot(popA_earlier, t ='l', col='red',
#' ylim = c(400,1220000),
#' ylab = 'The counts', xlab = 'Age groups')
#'
#'lines(interpolatePop(popA_earlier, popA_later, EarlyDate, LaterDate, DesiredDate),
#' t = 'l', col='black')
#'
#'lines(popA_later,
#' t = 'l', col='blue')
#'
#'legend(12,1000000,
#' legend = c('Pop in 1980-04-07',
#' 'Pop in 1985-07-01',
#' 'Pop in 1990-10-06'),
#' col=c('red','black','blue'),
#' lty = 1)
#' }
interpolatePop <-
function(Pop1,
Pop2,
Date1,
Date2,
DesiredDate,
method = "linear",
roundoutput = FALSE) {
stopifnot(length(Pop1) == length(Pop2))
earlyDateDec <- dec.date(Date1)
laterDateDec <- dec.date(Date2)
desireDateDec <- dec.date(DesiredDate)
interpolateFactor <-
(desireDateDec - earlyDateDec) / (laterDateDec - earlyDateDec)
if (method == "exponential") {
adjustedPop <- Pop1 * exp(interpolateFactor * log(Pop2 / Pop1))
}
else {
adjustedPop <- Pop1 + (Pop2 - Pop1) * interpolateFactor
}
if (roundoutput) {
adjustedPop <- round(adjustedPop)
}
return(adjustedPop)
}
# TR: 2018-08-20 either complementing or replacing previous pop interpolater.
# this one more general.
###############################################################################
#' Interpolate between two population age distributions.
#' @description The interpolation is done by age using a linear, exponential, or power function. This comes from the PAS spreadsheet called \code{AGEINT}. Be aware that this is not cohort-component interpolation.
#' @param popmat numeric. An age-period matrix of data to interpolate over. Age in rows, time in columns.
#' @param datesIn vector of dates. The exact dates in each column. See details for ways to express it.
#' @param datesOut vector of dates. The desired dates to interpolate to. See details for ways to express it.
#' @param method string. The method to use for the interpolation, either \code{"linear"}, \code{"exponential"}, or \code{"power"}. Default \code{"linear"}.
#' @param power numeric power to interpolate by, if \code{method = "power"}. Default 2.
#' @param extrap logical. In case \code{datesOut} is out of range of `datesIn`, do extrapolation using slope in extreme pairwise. Default \code{FALSE}.
#' @param negatives logical. In case negative output are accepted, set to \code{TRUE}. Default \code{FALSE}.
#' @param ... arguments passed to \code{stats::approx}. For example, \code{rule}, which controls extrapolation behavior.
#' @details The age group structure of the output is the same as that of the input. Ideally, \code{datesOut} should be within the range of \code{datesIn}. If not, the left-side and right-side output are held constant outside the range if \code{rule = 2} is passed in, otherwise \code{NA} is returned (see examples). Dates can be given in three ways 1) a \code{Date} class object, 2) an unambiguous character string in the format \code{"YYYY-MM-DD"}, or 3) as a decimal date consisting in the year plus the fraction of the year passed as of the given date.
#'
#' For methods \code{"exponential"} and \code{"power"}, calculations are carried out using linear interpolation through \code{log(popmat)}, or \code{popmat^(1/power)} respectively, then back-transformed. If the data contain 0s, \code{method = "exponential"} will fail, but \code{"power"} would still generally work.
#'
#' @return numeric matrix (age-period) (or vector if \code{length(datesOut) == 1} of the interpolated data for the requested dates.
#' @author Tim Riffe
#' @references
#' \insertRef{PAS}{DemoTools}
#' @export
#' @importFrom stats approx
#'
#' @examples
#' # Example of interpolating over time series of age-structured
#' # data. NOTE: age classes must conform. They don't have to be
#' # uniform, just the same in each year.
#' popmat <- structure(c(2111460L, 1971927L, 1651421L, 1114149L, 921120L,
#' 859806L, 806857L, 847447L, 660666L, 567163L, 493799L, 322936L,
#' 320796L, 163876L, 133531L, 120866L, 2548265L, 2421813L, 2581979L,
#' 2141854L, 1522279L, 1321665L, 1036480L, 1024782L, 935787L, 789521L,
#' 719185L, 481997L, 479943L, 268777L, 202422L, 167962L, 3753848L,
#' 3270658L, 2930638L, 2692898L, 2222672L, 1788443L, 1514610L, 1491751L,
#' 1054937L, 972484L, 796138L, 673137L, 554010L, 352264L, 293308L,
#' 195307L, 3511776L, 3939121L, 4076601L, 3602857L, 2642620L, 2105063L,
#' 1993212L, 1914367L, 1616449L, 1408498L, 994936L, 776537L, 706189L,
#' 507085L, 315767L, 240302L, 3956664L, 3936424L, 3995805L, 4371774L,
#' 4012982L, 3144046L, 2406725L, 2301514L, 2061252L, 1871502L, 1538713L,
#' 1210822L, 896041L, 638954L, 401297L, 375648L), .Dim = c(16L,
#' 5L), .Dimnames = list(c("0", "5", "10", "15", "20", "25", "30",
#' "35", "40", "45", "50", "55", "60", "65", "70", "75"), c("1960",
#' "1976", "1986", "1996", "2006")))
#'
#' # We have irregularly spaced census inputs.
#' \dontrun{
#' matplot(seq(0,75,by=5),popmat,type='l',col=1:5)
#' text(20,popmat["20",],colnames(popmat),col=1:5)
#' }
#' # census dates as decimal: no need for year rounding.
#' # could also specify as "YYYY-MM-DD"
#' # (rounded to 2 digits here- not necessary)
#' datesIn <- c(1960.73, 1976.90, 1986.89, 1996.89, 2006.89)
#' # could ask for single years or anything
#' datesOut <- seq(1960, 2005, by = 5)
#'
#' # NOTE: rule is an argument to approx(), which does the work.
#' # if not passed in, 1960 is returned as NAs (outside the range
#' # of dates given). In this case, rule = 2 assumes constant
#' # equal to nearest neighbor, meaning 1960 data will be returned
#' # as equal to 1960.73 input data. So, function should only be
#' # allowed to extrapolate for short distances.
#' interp(popmat, datesIn, datesOut, "linear", rule = 2)
#' interp(popmat, datesIn, datesOut, "exponential", rule = 2)
#' interp(popmat, datesIn, datesOut, "power", rule = 2)
#'
#' # ----------------------------------------------------------
#' # standard example data, taken from AGEINT PAS spreadsheet.
#'
#' # YYYY-MM-DD dates as character
#' d1 <- "1980-04-07"
#' d2 <- "1990-10-06"
#' dout <- "1985-07-01"
#'
#' (p_lin <- interp(cbind(popA_earlier,popA_later),c(d1,d2),dout,method = "linear"))
#' (p_exp <- interp(cbind(popA_earlier,popA_later),c(d1,d2),dout,method = "exponential"))
#' (p_pow <- interp(cbind(popA_earlier,popA_later),c(d1,d2),dout,method = "power"))
#'
#' \dontshow{
#' print(dec.date(d1),digits=18)
#' print(dec.date(d2),digits=18)
#' print(dec.date(dout),digits=18)
#' # these were set to different dates: recreate above dates...
#' expcheck <- c(142611.789832724, 658653.225145641, 881642.532889494, 790424.835149364,
#' 631052.720366013, 523578.990638372, 426405.833143345, 352311.49478387,
#' 349576.729583329, 315026.304975883, 243331.949093535, 210952.668139616,
#' 179740.182668986, 149331.458251614, 112461.568530899, 75799.3288538202,
#' 43819.8095485345, 48869.8021063505)
#'
#' lincheck <- c(151269.209810283, 698637.560216026, 935163.70169507, 838408.524061807,
#' 669361.533645738, 555363.482079782, 452291.311354737, 373698.987198367,
#' 370798.204791894, 334150.355163834, 258103.707303228, 223758.803211712,
#' 190651.52632461, 158396.803770816, 119288.683114125, 80400.8181463556,
#' 46479.9437144632, 51836.5021355072)
#'
#' stopifnot(all(abs(expcheck - p_exp)<1e-6))
#' stopifnot(all(abs(lincheck - p_lin)<1e-6))
#' # do controlled spreadsheet test, passing in decimal dates, etc
#' }
#' \dontrun{
#' age <- c(0,1,seq(5,80,by=5))
#' plot(age, popA_later, type = 'l', lwd = 2, main = "Interpolation options")
#' lines(age, popA_earlier, lwd = 2, lty = "8282")
#' lines(age, p_lin,col = "blue")
#' lines(age, p_pow,col = gray(.4))
#' lines(age, p_exp,col = "red")
#' text(30, popA_earlier[8], d1, pos = 1, cex = 1)
#' text(20, popA_later[6], d2, pos = 4, cex = 1)
#' legend("topright", lty = 1, col = c("blue", gray(.4), "red"),
#' legend = c("linear", "power 2", "exponential"),
#' title = paste0("Interpolated 1985-07-01"))
#'
#' }
interp <- function(popmat,
datesIn,
datesOut,
method = c("linear", "exponential", "power"),
power = 2,
extrap = FALSE,
negatives = FALSE,
...) {
# ... args passed to stats::approx . Can give control over extrap assumptions
# IW: extrap=T for extrapolate following each slope in extreme pairwise.
# If not is explicit extrap=T, returns NA at those points
# a basic check
stopifnot(ncol(popmat) == length(datesIn))
# no sense documenting this wrapper ...
.approxwrap <- function(x, y, xout, extrap, ...) {
# interp
yout = stats::approx(x = x,
y = y,
xout = xout,
...)$y
if (extrap){
# extrap (each side)
rg <- range(x)
xext <- xout < rg[1]
if(any(xext))
yout[xext] <- (y[2]-y[1])/(x[2]-x[1])*(xout[xext]-x[1])+y[1]
xext <- xout > rg[2]
n <- length(y)
if(any(xext))
yout[xext] <- (y[n]-y[n-1])/(x[n]-x[n-1])*(xout[xext]-x[n-1])+y[n-1]
}
return(yout)
}
# -----------------------
# clean method declaration
# match.arg does partial matching and it's safer:
# match.arg("lin", c("linear", "exponential", "power"))
method <- tolower(match.arg(method,
choices = c("linear", "exponential", "power")))
# -----------------------
# coerce dates to decimal if necessary
datesIn <- dec.date(datesIn)
datesOut <- dec.date(datesOut)
# carry out transform 1
if (method == "exponential") {
if (any(popmat == 0)) {
stop(
"popmat contains 0s, so exponential interpolation won't work.\n
Either handle 0s then do this, or\n
maybe try method = 'power'."
)
}
popmat <- log(popmat)
}
if (method == "power") {
popmat <- popmat ^ (1 / power)
}
int <- apply(popmat,
1,
.approxwrap,
x = datesIn,
xout = datesOut,
extrap = extrap,
...)
dims <- dim(int)
if (!is.null(dims)) {
int <- t(int)
rownames(int) <- rownames(popmat)
colnames(int) <- datesOut
} else {
names(int) <- rownames(popmat)
}
# transform back
if (method == "exponential") {
int <- exp(int)
}
if (method == "power") {
int <- int ^ power
}
# IW: no negatives when extrapolate. Thinking in pop and lt expressions. Inactive when explicitly are accepted negatives
if(!negatives & all(!is.na(int)) & any(int < 0)){
cat("Negative values have been replaced with 0s.\nNegatives not accepted in population counts,\n fertility rates or life table functions.\nYou can allow negatives (e.g. interpolating net migration)\n by specifying negatives = TRUE")
int[int < 0] <- 0
}
int
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.