The basic premise of bootstrapping Das Gupta's rate decomposition can be thought of as expanding out to unit-level data, re-sampling, then aggregating back up.
Data from Wang 2000:
#| message: false #| warning: false library(tidyverse) library(DasGuptR) eg.wang2000 <- data.frame( pop = rep(c("male","female"), e = 12), ethnicity = rep(1:3, e = 4), age_group = rep(1:4, 3), size = c( 130,1305,1539,316,211,697,334,48,105,475,424,49, 70,604,428,43,55,127,44,9,72,178,103,12 ), rate = c( 12.31,34.90,52.91,44.44,16.67,36.40,51.20,41.67,12.38,19.20,21.23,12.50, 17.14,35.55,48.71,55.81,14.55,39.37,32.56,55.56,22.22,20.34,13.59,8.33 ) )
To do this with the DasGuptR package, we must first create a function to essentially "uncount" data into individual level data (where nrow = size of total population), then re-sample with replacement, then aggregate back up to the level of the original data.
getBS <- function(){ # expand out exp <- eg.wang2000 |> mutate( r1 = (rate/100)*size, r0 = (1-(rate/100))*size ) |> select(pop,ethnicity,age_group,r0:r1) |> pivot_longer(r0:r1, names_to = "r",values_to = "n") |> mutate(n = as.integer(n)) |> uncount(n) # sample for each pop bs <- lapply(unique(eg.wang2000$pop), \(p) slice_sample(exp[exp$pop==p,], prop=1,replace = TRUE) |> group_by(ethnicity,age_group) |> reframe( pop = p, size = n(), rate = mean(r=="r1")*100 ) |> ungroup()) do.call(rbind,bs) }
Take 500 resamples:
bsamps <- tibble( k = 1:500, i = map(1:500, ~getBS()) )
and apply the standardisation to each resample:
#| message: false #| warning: false bsamps <- bsamps |> mutate( dgo = map(i, ~dgnpop(., pop = "pop", factors = "rate", id_vars = c("ethnicity","age_group"), crossclassified = "size")) )
Here are the original difference effects:
#| message: false dgnpop(eg.wang2000, pop = "pop", factors = "rate", id_vars = c("ethnicity","age_group"), crossclassified = "size") |> dg_table()
And here are the standard errors:
bsamps |> select(k,dgo) |> unnest(dgo) |> group_by(k, factor) |> reframe( diff = rate[pop=="female"]-rate[pop=="male"] ) |> group_by(factor) |> reframe( se = sd(diff) )
Which we can take as a proportion of the crude rate difference:
#| echo: false #| message: false dd <- dgnpop(eg.wang2000, pop = "pop", factors ="rate", id_vars = c("ethnicity","age_group"), crossclassified = "size") |> dg_table() |> rownames_to_column(var = "factor") library(ggdist) theme_set(theme_ggdist()) bsamps |> select(k,dgo) |> unnest(dgo) |> group_by(k, factor) |> reframe( decomp = ( rate[pop=="male"]-rate[pop=="female"] )/ dd$diff[4]*100 ) |> filter(factor != "crude") |> ggplot(aes(x=decomp, y=factor, fill=factor))+ stat_halfeye()+ scale_fill_viridis_d(option="C", begin = 0.5, end = .9)+ guides(fill="none")
We can use this same approach to estimate uncertainty in adjusted rates. For instance, using the Scottish Reconvictions data:
#| eval: false data(reconv) src <- reconv |> transmute( pop = year, sex=Sex, age=Age, rate = prev_rate, size = offenders, r1 = rate*size, r0 = (1-rate)*size ) getBS <- function(){ # expand out exp <- src |> select(pop,sex,age,r0,r1) |> pivot_longer(r0:r1, names_to="r",values_to = "n") |> mutate(n = as.integer(n)) |> uncount(n) # sample for each pop bs <- lapply(unique(src$pop), \(p) slice_sample(exp[exp$pop==p,], prop=1,replace = TRUE) |> group_by(sex,age) |> reframe( pop = p, size = n(), rate = mean(r=="r1") ) |> ungroup()) do.call(rbind,bs) } # 500 resamples bsamps <- tibble( k = 1:500, i = map(1:500, ~getBS()) ) # apply DG on each resample bsamps <- bsamps |> mutate( dgo = map(i, ~dgnpop(., pop="pop",factors="rate", id_vars=c("age","sex"), crossclassified="size")) )
#| echo: false bsamps <- readRDS("bsamps500.RDS") bsamps |> transmute(rates = map(dgo,"rates")) |> unnest(rates) |> mutate(pop=as.numeric(as.character(pop))) |> #group_by(pop,factor) |> #median_qi(rate, .width = c(.5,.8,.95)) |> #ggplot(aes(x = pop, y = rate, ymin = .lower, ymax = .upper)) + #geom_lineribbon(aes(group=factor)) + ggplot(aes(x = pop, y = rate, group = factor, fill = after_stat(.width))) + stat_lineribbon(.width = ppoints(50))+ facet_wrap(~factor)+ scale_fill_distiller()+ guides(fill="none")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.