#' Lee-Carter method with limited data.
#'
#' @description Given a data frame with dates, sex and mortality data by age (rates, conditioned probabilities of death
#' or survival function), this function interpolate/extrapolate life tables
#' using the method for limited data suggested by Li et. al (2004) (at least three observed years).
#'
#' @details Based on spreadsheet "Li_2018_Limited_Lee-Carter-v4.xlsm" from UN.
#' Useful for abridged or single ages, and allows output in both formats also.
#' One option is the use of non-divergent method for sex coherency (Li & Lee, 2005).
#' The other is the possibility of fitting `"k"` to replicate `"e_0"` at some given dates.
#'
#' @note Draft Version
#'
#' @param input data.frame with cols: Date, Sex, Age, nMx (opt), nqx (opt), lx (opt)
#' @param dates_out numeric. Vector of decimal years to interpolate or extrapolate.
#' @param Single logical. Whether or not the lifetable output is by single ages.
#' @param dates_e0 numeric. Vector of decimal years where `"e_0"` should be fitted when apply method.
#' @param e0_Males numeric. Vector of life expectancy by year to be fitted. Same length than `"dates_e0"`.
#' @param e0_Females numeric. Vector of life expectancy by year to be fitted. Same length than `"dates_e0"`.
#' @param prev_divergence logical. Whether or not prevent divergence and sex crossover. Default `FALSE.`
#' @param OAG logical. Whether or not the last element of `nMx` (or `nqx` or `lx`) is an open age group. Default `TRUE.`
#' @param verbose logical. Default `FALSE`.
#' @param SVD logical. Use Singular Value Decomposition for estimate b and k or Maximum Likelihood Estimation. Default `FALSE` for Maximum Likelihood Estimation.
#' @param ... Other arguments to be passed on to the \code{\link[DemoTools]{lt_abridged}} function.
#' @seealso
#' \code{\link[DemoTools]{lt_abridged}}
#' @export
# TR: you can use markdown for this sort of thing, just getting used to it
#' @return List with:
#' \itemize{
#' \item Interpolated/extrapolated lifetables in a data.frame with columns:
#' * `Date` numeric. Dates included in dates_out,
#' * `Sex` character. Male `"m"` or female `"f"`,
#' * `Age` integer. Lower bound of abridged age class,
#' * `AgeInt`` integer. Age class widths.
#' * `nMx` numeric. Age-specific central death rates.
#' * `nAx` numeric. Average time spent in interval by those deceased in interval.
#' * `nqx` numeric. Age-specific conditional death probabilities.
#' * `lx` numeric. Lifetable survivorship
#' * `ndx` numeric. Lifetable deaths distribution.
#' * `nLx` numeric. Lifetable exposure.
#' * `Sx` numeric. Survivor ratios in uniform 5-year age groups.
#' * `Tx` numeric. Lifetable total years left to live above age x.
#' * `ex` numeric. Age-specific remaining life expectancy.
#' \item List with estimated Lee-Carter parameters for each sex:
#' * `kt` numeric time vector. Time trend in mortality level.
#' * `ax` numeric age vector. Average time of `log(m_{x,t})`.
#' * `bx` numeric age vector. Pattern of change in response to `kt`.
#' }
#' @references
#' \insertRef{Li2005}{DemoTools}
#' \insertRef{Li2004}{DemoTools}
#'
#' @examples
#' # mortality rates from Sweden, for specific dates
#'
#' # needs mortality rates in this dates:
#' dates_out <- as.Date(paste0(seq(1948,2018,5),"-07-01"))
#'
#' # apply LC with limited data to extrap/interpolate
#' lc_lim_data <- interp_lc_lim(input = mA_swe, dates_out = dates_out, OAG = FALSE)$lt_hat
#'
#' \dontrun{
#' lc_lim_data %>% ggplot(aes(Age,nMx,col=factor(round(Date,1)))) +
#' geom_step() + scale_color_viridis_d() +
#' scale_y_log10() + theme_classic() + facet_wrap(~Sex)
#' }
#'
#' # with simple ages as output
#' lc_lim_data_single <- interp_lc_lim(input = mA_swe, dates_out = dates_out, OAG = FALSE,
#' Single = TRUE)$lt_hat
#'
#' \dontrun{
#' lc_lim_data_single %>% ggplot(aes(Age,nMx,col=factor(round(Date,1)))) +
#' geom_step() + scale_color_viridis_d() +
#' scale_y_log10() + theme_classic() + facet_wrap(~Sex)
#' }
#'
#' # Avoiding cross-over between sex.
#' lc_lim_nondiv <- interp_lc_lim(input = mA_swe, dates_out = dates_out, OAG = FALSE,
#' prev_divergence = TRUE)$lt_hat
#' \dontrun{
#' lc_lim_nondiv %>% ggplot(aes(Age,nMx,col=factor(round(Date,1)))) +
#' geom_step() + scale_color_viridis_d() +
#' scale_y_log10() + theme_classic() + facet_wrap(~Sex)
#' }
#'
#' # Fitting information about e0 in Sweden for past years.
#' lc_lim_fite0 <- interp_lc_lim(input = mA_swe, dates_out = dates_out, OAG = FALSE,
#' dates_e0 = unique(e0_swe$Date),
#' e0_Males = e0_swe$e0[e0_swe$Sex=="m"],
#' e0_Females = e0_swe$e0[e0_swe$Sex=="f"])$lt_hat
#' \dontrun{
#' ggplot() +
#' geom_point(data = e0_swe, aes(Date,e0,col=factor(Sex)))+
#' geom_line(data = lc_lim_fite0[lc_lim_fite0$Age==0,], aes(Date,ex,col=factor(Sex)))+
#' labs(color = "Sex")+
#' theme_classic()
#' }
#'
#' # smooth and/or extend open age group, in this case input is for 80+, and chosen law is Makeham.
#' lc_lim_extOAg <- interp_lc_lim(input = mA_swe[mA_swe$Age<=80,], dates_out = dates_out,
#' OAG = FALSE,
#' OAnew=100,
#' extrapLaw = "makeham")$lt_hat
#' \dontrun{
#' ggplot() +
#' geom_step(data = lc_lim_extOAg, aes(Age,nMx,col=factor(round(Date,1)))) +
#' scale_y_log10() + scale_color_viridis_d() + theme_classic() + facet_wrap(~Sex)
#' }
#' #End
interp_lc_lim <- function(input = NULL,
dates_out = dates_in,
Single = FALSE,
dates_e0 = NULL,
e0_Males = NULL,
e0_Females = NULL,
prev_divergence = FALSE,
OAG = TRUE,
verbose = TRUE,
SVD = FALSE,
...){
# TR: ExtraArgs has a problem in that it won't capture & pass NULL defaults
# mget(names(formals()),sys.frame(sys.nframe()))
# IW: this captures everything: dots & NULLS
ExtraArgs = c(as.list(environment()), list(...))
ExtraArgs = ExtraArgs[! names(ExtraArgs) %in% c("input","Single")]
# dates
dates_in <- unique(input$Date) %>% dec.date()
dates_out <- dec.date(dates_out)
# take care if lc with limited data is suitable
if (length(dates_in)<3){
stop("\nYou need three observed dates at least.")
}
if (all(diff(dates_in)==1)){
stop("\nYou have single-year-interval data and probably should use basic Lee-Carter method.")
}
# Two tries for dates_e0, otherwise we error
if(!is.null(e0_Males)){
if (is.null(dates_e0)){
if (length(e0_Males) == length(dates_in)){
dates_e0 <- dates_in
if (verbose){
cat("\ndates_e0 not specified, assuming:\n",paste(dates_in,collapse = ", "),"\n" )
}
}
}
if (is.null(dates_e0)){
if (length(e0_Males) == length(dates_out)){
dates_e0 <- dates_out
if (verbose){
cat("\ndates_e0 not specified, assuming:\n",paste(dates_out, collapse = ", "),"\n" )
}
}
}
if (is.null(dates_e0)){
stop("\nSorry we can't guess the argument dates_e0, you'll need to specify it\n")
}
}
if (!any(names(input)%in%c("nMx", "nqx", "lx"))){
stop("\nSorry we need some column called nMx, nqx or lx\n")
}
# get always Mx -----------------------------------------------------------
# TR data.table() is preferred,
# and maybe this function shouldn't be anonymous, but rather called inside
# inputdt[, new_function(.SD,...),by=list(Sex,Date)] # or similar.
. <- NULL
# inputdt <- split(input, list(input$Sex, input$Date)) %>%
# lapply(
# function(X) do.call(lt_smooth_ambiguous,
# c(list(input=X), ExtraArgs))) %>%
# do.call("rbind", .)%>%
# as.data.table()
inputdt <- split(input, list(input$Sex, input$Date)) %>%
lapply( function(X) {
Age <- X$Age
Sex_i <- unique(X$Sex)
types <- c("nMx","nqx","lx")
this_type <- types[types %in% colnames(X)]
if (length(this_type) > 1){
ind <- X[,this_type] %>%
as.matrix() %>%
is.na() %>%
colSums() %>%
which.min()
this_type <- this_type[ind]
}
LT <- lt_ambiguous(nMx_or_nqx_or_lx = X[[this_type]],
type = this_type,
Age = Age,
Sex = Sex_i,
Single = Single,
...)
LT$Sex <- Sex_i
LT$Date <- unique(X$Date)
LT
}) %>%
do.call("rbind", .)%>%
as.data.table()
# avoids 'no visible binding' warning
Sex <- NULL
nMxf <-
inputdt %>%
subset(Sex == "f") %>%
data.table::dcast(Age ~ Date, value.var = "nMx") %>%
.[order(Age)]
Age <- nMxf[["Age"]]
nMxf <- nMxf[, -1] %>% as.matrix()
rownames(nMxf) <- Age
nMxm <-
inputdt %>%
subset(Sex == "m") %>%
data.table::dcast(Age ~ Date, value.var = "nMx") %>%
.[order(Age)]
nMxm <- nMxm[, -1] %>% as.matrix()
rownames(nMxm) <- Age
# LC at unequal intervals ---------------------------------------------------------
#Age = sort(unique(input$Age)) # defined above for rownames
ndates_in <- length(dates_in)
ndates_out <- length(dates_out)
nAge <- length(Age)
# males
lc_estimate_m <- interp_lc_lim_estimate(nMxm, dates_in, dates_out, SVD)
axm <- lc_estimate_m[[1]]
bxm <- lc_estimate_m[[2]]
ktm <- lc_estimate_m[[3]]
k0m <- lc_estimate_m[[4]]
# females
lc_estimate_f <- interp_lc_lim_estimate(nMxf, dates_in, dates_out, SVD)
axf <- lc_estimate_f[[1]]
bxf <- lc_estimate_f[[2]]
ktf <- lc_estimate_f[[3]]
k0f <- lc_estimate_f[[4]]
# ask if prevent divergence and replicate target e0 ---------------------------------------------------------
if (is.null(dates_e0)){ # not rep e0
# basic
nMxm_hat <- exp(axm + bxm %*% t(ktm))
nMxf_hat <- exp(axf + bxf %*% t(ktf))
# avoid divergence extrapolating
if (prev_divergence){
kt = (ktm + ktf) * .5 # equal size male and female
bx = (bxm + bxf) * .5 # # error in vba code line 335. A parameter controls that only for reproducing purpose
k0 = (k0m + k0f) * .5
# apply common factor to rates with already specific factor (formula 6 in Li (2005)),
# not like Li´s paper way in this case. IW: we can improve this if UN wants.
nMxm_hat_div <- nMxm[,1] * exp(bx %*% t(kt-k0))
nMxf_hat_div <- nMxf[,1] * exp(bx %*% t(kt-k0))
# only for those years before min(dates_in). UN code explicit on that.
# IW: why not for dates_out>max(dates_in) also? Ask UN.
dates_extrap <- dates_out < min(dates_in)
nMxm_hat[,dates_extrap] <- nMxm_hat_div[,dates_extrap]
nMxf_hat[,dates_extrap] <- nMxf_hat_div[,dates_extrap]
}
} else { # fit e0 at each target year
stopifnot(length(e0_Males) == length(dates_e0))
stopifnot(length(e0_Females) == length(dates_e0))
# stepwise linear intra/extrapolation to target years.
# IW: Use interp(). Accepts matrix, so have to rbind and only get 1st row
e0m <- interp(rbind(e0_Males,
e0_Males),
dates_e0,
dates_out,
extrap = TRUE)
e0f <- interp(rbind(e0_Females,
e0_Females),
dates_e0,
dates_out,
extrap = TRUE)
# IW: issue with dimension in case the interpolation is for 1 date only
if(ndates_out==1){
e0m = e0m[1]
e0f = e0f[1]
}else{
e0m = e0m[1,]
e0f = e0f[1,]
}
# avoid divergence: same bx but not kt.
if (prev_divergence){
bxm <- bxf <- (bxm + bxf) * .5
}
# Optimize kt for each LC extrap/interp and sex
ktm_star = ktf_star = c()
for (j in 1:ndates_out){
ktm_star[j] <- optimize(f = interp_lc_lim_kt_min,
interval = c(-50, 50),
ax = axm,
bx = bxm,
age = Age,
sex = "m",
e0_target = e0m[j],
...)$minimum
ktf_star[j] <- optimize(f = interp_lc_lim_kt_min, # TR: add ...
interval = c(-50, 50),
ax = axf,
bx = bxf,
age = Age,
sex = "f",
e0_target = e0f[j],
...)$minimum
}
# get rates with optim k.
nMxm_hat <- exp(axm + bxm %*% t(ktm_star))
nMxf_hat <- exp(axf + bxf %*% t(ktf_star))
ktm <- ktm_star
ktf <- ktf_star
}
# life tables output ------------------------------------------------------------
colnames(nMxm_hat) <- dates_out
colnames(nMxf_hat) <- dates_out
. = NULL
Males_out <-
lapply(colnames(nMxm_hat), function(xx,MX,Age) {
mx <- MX[, xx]
LT <- lt_ambiguous(nMx_or_nqx_or_lx = mx,
type = "m",
Age = Age,
Sex = "m",
Single = Single,
...)
LT$Sex <- "m"
LT$Date <- as.numeric(xx)
LT
}, MX = nMxm_hat, Age = Age) %>%
do.call("rbind", .)
Females_out <-
lapply(colnames(nMxf_hat), function(xx,MX,Age) {
mx <- MX[, xx]
LT <- lt_ambiguous(nMx_or_nqx_or_lx = mx,
type = "m",
Age = Age,
Sex = "f",
Single = Single,
...)
LT$Sex <- "f"
LT$Date <- as.numeric(xx)
LT
}, MX = nMxf_hat, Age = Age) %>%
do.call("rbind", .)
lt_hat <- rbind(Males_out, Females_out)
# for output
lc_params <- list(ax = data.frame(Male = axm, Female = axf),
bx = data.frame(Male = bxm, Female = bxf),
kt = data.frame(Male = ktm, Female = ktf))
return(list(lt_hat = lt_hat,
lc_params = lc_params)
)
}
#' Optimize k
#' @description Optimize estimated k from LC with limited data model,
#' for fitting given e_0 at same dates
#' @details Given LC parameters at some date, change a bit k for replicate already know e_0 values.
#' This is useful to give some sort of flexibility, and not follow strictly linear model implied in LC model,
#' but taking advantage of estimated structure (ax) and change by age (bx) for some trustable period.
#' @param k numeric. k parameter from LC model.
#' @param ax numeric. Vector (same length of age) of parameters from LC model.
#' @param bx numeric. Vector (same length of age) of parameters from LC model.
#' @param age numeric.
#' @param sex numeric.
#' @param e0_target numeric.
#' @param ... Other arguments to be passed on to the \code{\link[DemoTools]{lt_abridged}} function.
#' @export
interp_lc_lim_kt_min <- function(k,
ax,
bx,
age,
sex,
e0_target,
...){
Mx_hat <- as.numeric(interp_lc_lim_abk_m(k, ax, bx))
e0 <- lt_ambiguous(nMx_or_nqx_or_lx = Mx_hat,
Age = age,
Sex = sex,
...)$ex[1]
return(((e0-e0_target)/e0_target)^2)
}
#' wrapper fun for `"interp_lc_lim_estimate"` function
#' @description wrapper fun to estimate rates from LC parameters
#' @inheritParams interp_lc_lim_kt_min
#' @export
interp_lc_lim_abk_m <- function(k,ax,bx){
exp(ax + bx * k)
}
# estimate LC for limited data
#' Estimate LC with limited data parameters
#' @description Estimate LC with limited data from a matrix of rates (age by dates).
#' @details SVD for `ax` and `bx.` Fit a simple linear model for `k` and interpolate/extrapolate for objective dates.
#' @param M numeric. Matrix with many rows as ages and columns as `dates_in`.
#' @param dates_in numeric. Vector of dates with input rates.
#' @param dates_out numeric. Vector of dates for estimate a set of rates.
#' @param SVD logical. Use Singular Value Decomposition for estimate `b` and `k` or Maximum Likelihood Estimation. Default `FALSE` for Maximum Likelihood Estimation.
#' @references
#' \insertRef{Li2004}{DemoTools}
#' @export
interp_lc_lim_estimate <- function(M, dates_in, dates_out, SVD = F){
ndates_in <- length(dates_in)
ax <- rowSums(log(M))/ndates_in
if(SVD==TRUE){
# Singular Value Decomposition
M_svd <- svd(log(M)-ax)
bx <- M_svd$u[, 1]/sum(M_svd$u[, 1])
kto <- M_svd$d[1] * M_svd$v[, 1] * sum(M_svd$u[, 1])
}else{
# likelihood method
kto <- colSums(log(M))-sum(ax) # because sum(bx)==1
bx <- (log(M) - ax)%*%kto/sum(kto^2)
}
c <- 0
c[2] <- (kto[ndates_in] - kto[1])/(dates_in[ndates_in] - dates_in[1])
c[1] <- kto[1] - c[2] * dates_in[1]
# explanation ratio
R = 1- sum((log(M) - (ax + bx %*% t(kto)))^2)/sum((log(M)-ax)^2)
# extrapolated k
kt <- c[1] + c[2] * dates_out
# initial k (useful for avoiding divegence case)
k0 <- c[1] + c[2] * dates_in[1]
return(list(ax=ax,bx=bx,kt=kt,k0=k0,R=R))
}
# smooth rule previous to solve ambiguous
#' Smooth and apply lt_ambiguous
#' @description Considering different mortality input for each sex/year data,
#' smooth older ages with makeham or kannisto in case no law was specified,
#' and return a data.frame with standard LT.
#' @details Makeham is chosen if last age is less than 90. Else Kannisto.
#' @param input data.frame. with cols: Date, Sex, Age, nMx (opt), nqx (opt), lx (opt)
#' @param ... Other arguments to be passed on to the \code{\link[DemoTools]{lt_abridged}} function.
#' @export
lt_smooth_ambiguous <- function(input, ...){
ExtraArgs = c(as.list(environment()), list(...))
ExtraArgs = ExtraArgs[! names(ExtraArgs) %in% c("input")]
# get only cols with values (could entry nMx for some sex/year and nqx or lx for other)
X <- input[!sapply(input, function(x) all(is.na(x)))]
types <- c("nMx","nqx","lx")
this_type <- types[types %in% colnames(X)]
this_sex <- unique(X[["Sex"]])
this_date <- unique(X[["Date"]])
# cases for smooth older ages by default
if(!"extrapLaw" %in% names(ExtraArgs)){
Ageext <- sort(unique(X$Age))
this_extrapFrom <- max(Ageext)
this_OAnew = 100
if(this_extrapFrom < 90){
this_extrapLaw <- "makeham"
if (ExtraArgs$verbose) cat(paste0("A Makeham function was fitted for older ages for sex ",
this_sex, " and date ",this_date,".\n"))
# TR: changed this. 30 could be sort of low in some situations.
this_extrapFit = Ageext[Ageext >= (this_extrapFrom - 30) & ifelse(ExtraArgs$OAG, Ageext < max(Ageext), TRUE)]
}else{
this_extrapLaw <- "kannisto"
if (ExtraArgs$verbose) cat(paste0("A Kannisto function was fitted for older ages for sex ",
this_sex, " and date ",this_date,".\n"))
this_extrapFit = Ageext[Ageext >= 60 & ifelse(ExtraArgs$OAG, Ageext < max(Ageext), TRUE)]
}
# TR: other args not passed in are scoped one level up
thisExtraArgs <- ExtraArgs[!names(ExtraArgs) %in%
c("extrapLaw","extrapFit","extrapFrom","OAnew","Single")]
out <- lt_ambiguous(nMx_or_nqx_or_lx = X[[this_type]],
Age = X[["Age"]],
type = this_type,
Sex = this_sex,
extrapLaw = this_extrapLaw,
extrapFit = this_extrapFit,
extrapFrom = this_extrapFrom,
OAnew = this_OAnew,
...)
}else{
out <- lt_ambiguous(nMx_or_nqx_or_lx = X[[this_type]],
Age = X[["Age"]],
type = this_type,
Sex = this_sex,
...)
}
out$Sex <- this_sex
out$Date <- this_date
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.