R/index.aggregation.r
In hicp: Harmonised Index of Consumer Prices

Documented in aggregate aggregate.tree carli disaggregate fisher harmonic jevons laspeyres paasche toernqvist walsh

# START

# Title:  Index number methods and aggregation
# Author: Sebastian Weinand
# Date:   25 July 2025

# bilateral index functions:
jevons <- function(x){
  
  # input checks:
  check.num(x=x, int=c(0,Inf))
  
  # compute index:
  if(all(is.na(x))){
    out <- NA_real_  # avoid NaN in case no complete observations are present
  }else{
    out <- exp(mean(log(x), na.rm=TRUE))
  }
  
  # return output:
  return(out)
  
}
carli <- function(x){
  
  # input checks:
  check.num(x=x, int=c(0,Inf))
  
  # compute index:
  if(all(is.na(x))){
    out <- NA_real_ # avoid NaN in case no complete observations are present
  }else{
    out <- mean(x, na.rm=TRUE)
  }
  
  # return output:
  return(out)
  
}
harmonic <- function(x){
  
  # input checks:
  check.num(x=x, int=c(0,Inf))
  
  # compute index:
  if(all(is.na(x))){
    out <- NA_real_ # avoid NaN in case no complete observations are present
  }else{
    out <- 1/mean(1/x, na.rm=TRUE)
  }
  
  # return output:
  return(out)
  
}
laspeyres <- function(x, w0){
  
  # input checks:
  check.num(x=x, int=c(0,Inf))
  check.num(x=w0, int=c(0,Inf))
  check.lengths(x=x, y=w0)
  
  # complete cases:
  idx <- stats::complete.cases(x, w0)
  
  # compute index:
  if(any(idx)){
    
    # subset to complete cases:
    x <- x[idx]
    w0 <- w0[idx]
    # if index is NA, no computation is possible.
    # dropping specific weights implies re-normalizing
    # the remaining weights.
    
    # renormalize weights:
    w0 <- w0/sum(w0, na.rm=TRUE)
    
    # compute index:
    out <- sum(x*w0, na.rm=TRUE)
    # see Ilo et al. (2004), p. 265, formula (15.8)
    
  }else{
    
    # set to NA as otherwise sum(NA, na.rm=TRUE)
    # will return 0:
    out <- NA_real_
    
  }
  
  # return output:
  return(out)
  
}
paasche <- function(x, wt){
  
  # input checks:
  check.num(x=x, int=c(0,Inf))
  check.num(x=wt, int=c(0,Inf))
  check.lengths(x=x, y=wt)
  
  # complete cases:
  idx <- stats::complete.cases(x, wt)
  
  # compute index:
  if(any(idx)){
    
    # subset to complete cases:
    x <- x[idx]
    wt <- wt[idx]
    # if index is NA, no computation is possible.
    # dropping specific weights implies re-normalizing
    # the remaining weights.

    # renormalize weights:
    wt <- wt/sum(wt, na.rm=TRUE)
    
    # compute index:
    out <- 1/sum((x^(-1))*wt, na.rm=TRUE)
    # see Ilo et al. (2004), p. 266, formula (15.9)
    
  }else{
    
    # set to NA as otherwise sum(NA, na.rm=TRUE)
    # will return 0:
    out <- NA_real_
    
  }
  
  # return output:
  return(out)
  
}
fisher <- function(x, w0, wt){
  
  # input checks:
  check.num(x=x, int=c(0,Inf))
  check.num(x=w0, int=c(0,Inf))
  check.num(x=wt, int=c(0,Inf))
  check.lengths(x=x, y=w0)
  check.lengths(x=x, y=wt)
  
  # complete cases:
  idx <- stats::complete.cases(x, w0, wt)
  
  # compute index:
  if(any(idx)){
    
    # subset to complete cases:
    x <- x[idx]
    w0 <- w0[idx]
    wt <- wt[idx]
    
    # renormalize weights:
    w0 <- w0/sum(w0, na.rm=TRUE)
    wt <- wt/sum(wt, na.rm=TRUE)
    
    # compute index:
    out <- sqrt(laspeyres(x=x, w0=w0)*paasche(x=x, wt=wt))
    # see Ilo et al. (2004), p. 267, formula (15.12)
    
  }else{
    
    # set to NA as otherwise sum(NA, na.rm=TRUE)
    # will return 0:
    out <- NA_real_
    
  }
  
  # return output:
  return(out)
  
}
toernqvist <- function(x, w0, wt){
  
  # input checks:
  check.num(x=x, int=c(0,Inf))
  check.num(x=w0, int=c(0,Inf))
  check.num(x=wt, int=c(0,Inf))
  check.lengths(x=x, y=w0)
  check.lengths(x=x, y=wt)
  
  # complete cases:
  idx <- stats::complete.cases(x, w0, wt)
  
  # compute index:
  if(any(idx)){
    
    # subset to complete cases:
    x <- x[idx]
    w0 <- w0[idx]
    wt <- wt[idx]
    
    # renormalize weights:
    w0 <- w0/sum(w0, na.rm=TRUE)
    wt <- wt/sum(wt, na.rm=TRUE)
    
    # compute index:
    out <- prod(x^((w0+wt)/2), na.rm=TRUE)
    # see Ilo et al. (2004), p. 283, formula (15.81)
    
  }else{
    
    # set to NA as otherwise sum(NA, na.rm=TRUE)
    # will return 0:
    out <- NA_real_
    
  }
  
  # return output:
  return(out)
  
}
walsh <- function(x, w0, wt){
  
  # input checks:
  check.num(x=x, int=c(0,Inf))
  check.num(x=w0, int=c(0,Inf))
  check.num(x=wt, int=c(0,Inf))
  check.lengths(x=x, y=w0)
  check.lengths(x=x, y=wt)
  
  # complete cases:
  idx <- stats::complete.cases(x, w0, wt)
  
  # compute index:
  if(any(idx)){
    
    # subset to complete cases:
    x <- x[idx]
    w0 <- w0[idx]
    wt <- wt[idx]
    
    # renormalize weights:
    w0 <- w0/sum(w0, na.rm=TRUE)
    wt <- wt/sum(wt, na.rm=TRUE)
    
    # compute index:
    out <- sum(sqrt(x)*sqrt(w0*wt), na.rm=TRUE)/sum(sqrt(1/x)*sqrt(w0*wt), na.rm=TRUE)
    # see Ilo et al. (2004), p. 269, formula (15.21)
    
  }else{
    
    # set to NA as otherwise sum(NA, na.rm=TRUE)
    # will return 0:
    out <- NA_real_
    
  }
  
  # return output:
  return(out)
  
}

# non-exported helper function to match index formulas to weights:
matchformula <- function(f, x, w0, wt){
  
  fargs <- names(formals(f))
  
  if(all(c("w0","wt")%in%fargs)){
    out <- f(x=x, w0=w0, wt=wt)
  }else{
    if(all(!c("w0","wt")%in%fargs)){
      out <- f(x=x)
    }
    if("w0"%in%fargs){
      out <- f(x=x, w0=w0)
    }
    if("wt"%in%fargs){
      out <- f(x=x, wt=wt)
    }
  }
  
  return(out)
  
}

# compute one or multiple user-defined aggregates:
aggregate <- function(x, w0, wt, id, formula=laspeyres, agg=list(), settings=list()){
  
  # set defaults:
  if(is.null(settings$exact)) settings$exact <- TRUE
  if(is.null(settings$names)) settings$names <- NULL
  if(is.null(settings$chatty)) settings$chatty <- getOption("hicp.chatty")
  
  # input checks:
  check.num(x=x, min.len=0, int=c(0,Inf))
  check.num(x=w0, min.len=0, int=c(0,Inf), miss.ok=TRUE)
  check.num(x=wt, min.len=0, int=c(0,Inf), miss.ok=TRUE)
  check.char(x=id, min.len=0)
  check.lengths(x=x, y=id)
  check.list(x=agg, min.len=0, miss.ok=TRUE, null.ok=TRUE, names=FALSE)
  check.log(x=settings$exact, min.len=1, max.len=1, na.ok=FALSE)
  check.char(x=settings$names, min.len=1, null.ok=TRUE, na.ok=FALSE)
  check.log(x=settings$chatty, min.len=1, max.len=1, na.ok=FALSE)
  check.lengths(x=agg, y=settings$names)
  
  # input checks on weights:
  if(missing(w0)){w0miss <- TRUE; w0 <- rep(0, length(x))}else{w0miss <- FALSE}
  if(missing(wt)){wtmiss <- TRUE; wt <- rep(0, length(x))}else{wtmiss <- FALSE}
  check.lengths(x=x, y=w0)
  check.lengths(x=x, y=wt)
  
  # input checks on formula:
  # we have to substitute first; otherwise, if x=laspeyres and 
  # formula=laspeyres, formula would be overwritten by x
  formula0 <- substitute(formula) 
  formula <- eval(formula0)
  if(is.function(formula)){
    formula <- stats::setNames(list(match.fun(formula)), deparse(formula0))
  }
  check.list(x=formula, miss.ok=FALSE, null.ok=FALSE, names=TRUE)
  fnw0 <- sapply(X=formula, FUN=function(z) !is.null(formals(z)$w0) & w0miss)
  fnwt <- sapply(X=formula, FUN=function(z) !is.null(formals(z)$wt) & wtmiss)
  fnw <- fnw0 | fnwt
  if(any(fnw, na.rm=TRUE)){
    fnok <- paste(names(which(fnw)), collapse=",")
    if(sum(fnw)>1L) fnok <- paste0("(", fnok, ")")
    stop(paste0("Non-valid input for w0 or wt -> weights required for 'formula=", fnok, "'"), call.=FALSE)
  }
  
  # check for duplicated aggregate names:
  if(sum(duplicated(settings$names))>0L && settings$chatty){
    warning("Duplicated aggregate names supplied -> names adjusted by 'make.unique(settings$names)'", call.=FALSE)
  }
  
  # set aggregate names:
  aggnm <- character()
  if(length(agg)>0L){
    if(length(settings$names)>0L){
      aggnm <- make.unique(names=settings$names)
    }else{
      aggnm <- as.character(seq_along(agg))
    }
  }
  
  # set output column names:
  cnm <- c(names(formula),"w0","wt")
  
  # subset to complete cases but store initial ids for checking:
  id0 <- id
  nonas <- stats::complete.cases(x, w0, wt, id)
  x <- x[nonas]
  w0 <- w0[nonas]
  wt <- wt[nonas]
  id <- id[nonas]
  
  # print user warning:
  if(settings$chatty && all(!nonas)){
    warning("No complete cases available for calculation", call.=FALSE)
  }
  
  # compute user-defined aggregates:
  noagg <- !logical(length=length(agg))
  out <- matrix(data=NA_real_, nrow=length(agg), ncol=length(cnm), dimnames=list(NULL, cnm))
  for(j in seq_along(agg)){
    
    # check if all ids are available:
    aggids.avail <- agg[[j]]%in%id0
    
    # subset data to relevant ids:
    idx1 <- id%in%agg[[j]]
    # duplicated elements in 'agg' without impact on aggregation
    
    # calculations only if:
    if((all(aggids.avail) || !settings$exact) && any(idx1)){
      
      # adjust logical:
      noagg[j] <- FALSE
      
      # subset indices and weights:
      x1 <- x[idx1]
      w01 <- w0[idx1]
      wt1 <- wt[idx1]
      
      # aggregate indices and sum weights:
      out[j, ] <- as.vector(c(
        mapply(FUN=matchformula, 
               f=formula, 
               MoreArgs=list(x=x1, w0=w01, wt=wt1), 
               SIMPLIFY=TRUE),
        "w0"=sum(w01, na.rm=TRUE),
        "wt"=sum(wt1, na.rm=TRUE)))
      
    }
    
  }
  
  # print message if there were any user-defined groups that were no aggregated:
  if(settings$chatty && any(noagg) && any(nonas)){
    msg <- "Not all elements in 'agg' were found in 'id' -> no calculation for 'agg=("
    message(
      paste(msg, paste(aggnm[noagg], collapse=","), ")'", sep="")
    )
  }
  
  # add aggregate ids and set column order:
  out <- data.table::data.table("id"=aggnm, out)
  data.table::setcolorder(x=out, c("id","w0","wt"))
  # do not set a key; otherwise, the output is resorted
  
  # drop weights if these were not provided by the user:
  if(w0miss) out$w0 <- NULL
  if(wtmiss) out$wt <- NULL
  
  # return output to console:
  return(out)
  
}

# compute one or multiple user-defined aggregates:
disaggregate <- function(x, w0, id, agg=list(), settings=list()){
  
  # set defaults:
  if(is.null(settings$exact)) settings$exact <- TRUE
  if(is.null(settings$names)) settings$names <- NULL
  if(is.null(settings$chatty)) settings$chatty <- getOption("hicp.chatty")
  
  # input checks:
  check.num(x=x, min.len=0, int=c(0,Inf))
  check.num(x=w0, min.len=0, int=c(0,Inf))
  check.char(x=id, min.len=0)
  check.lengths(x=x, y=w0)
  check.lengths(x=x, y=id)
  check.list(x=agg, min.len=0, miss.ok=TRUE, null.ok=TRUE)
  check.log(x=settings$exact, min.len=1, max.len=1, na.ok=FALSE)
  check.char(x=settings$names, min.len=1, null.ok=TRUE, na.ok=FALSE)
  check.log(x=settings$chatty, min.len=1, max.len=1, na.ok=FALSE)
  check.lengths(x=agg, y=settings$names)
  
  # check for duplicated aggregate names:
  if(sum(duplicated(settings$names))>0L && settings$chatty){
    warning("Duplicated aggregate names supplied -> names adjusted by 'make.unique(settings$names)'", call.=FALSE)
  }
  
  # set aggregate names:
  aggnm <- character()
  if(length(agg)>0L){
    if(length(settings$names)>0L){
      aggnm <- make.unique(names=settings$names)
    }else{
      aggnm <- as.character(seq_along(agg))
    }
  }
  
  # set output column names:
  cnm <- c("w0","laspeyres")
  
  # subset to complete cases but store initial ids for checking:
  id0 <- id
  nonas <- stats::complete.cases(x, w0, id)
  x <- x[nonas]
  w0 <- w0[nonas]
  id <- id[nonas]
  
  # print user warning:
  if(settings$chatty & all(!nonas)){
    warning("No complete cases available for calculation", call.=FALSE)
  }
  
  # compute user-defined aggregates:
  out <- matrix(data=NA_real_, nrow=length(agg), ncol=length(cnm), dimnames=list(NULL, cnm))
  noagg <- !logical(length=length(agg))
  for(j in seq_along(agg)){
    
    # check if all ids are available:
    aggids.avail <- agg[[j]]%in%id0
    
    # subset data to relevant ids:
    idx1 <- id%in%agg[[j]]
    idx2 <- id%in%names(agg)[[j]]
    # duplicated elements in 'agg' without impact on aggregation
    
    # calculations only if:
    if((all(aggids.avail) || !settings$exact) && any(idx1) && any(idx2)){
      
      # adjust logical:
      noagg[j] <- FALSE
      
      # subset indices and weights:
      x1 <- x[idx1]
      w01 <- w0[idx1]
      x2 <- x[idx2]
      w02 <- w0[idx2]
      
      # deduct indices and sum weights:
      out[j, ] <- c(w02-sum(w01), (w02*x2-sum(w01*x1))/(w02-sum(w01)))
      
    }
    
  }
  
  # print message if there were any user-defined groups that were no aggregated:
  if(settings$chatty && any(noagg) && any(nonas)){
    msg <- "Not all elements in 'agg' were found in 'id' -> no calculation for 'agg=("
    message(
      paste(msg, paste(aggnm[noagg], collapse=","), ")'", sep="")
    )
  }
  
  # set index and weights to NA if one is negative:
  out[rowSums(out<0L)>0L, ] <- NA_real_
  out[is.nan(out)] <- NA_real_
  # this can happen if aggregates are not properly
  # defined, e.g., the total is deducted from a subcomponent
  
  # add aggregate ids and set column order:
  out <- data.table::data.table("id"=aggnm, out)
  data.table::setcolorder(x=out, c("id","w0","laspeyres"))
  # do not set a key; otherwise, the output is resorted
  
  # return output to console:
  return(out)
  
}

# step-wise index aggregation following the COICOP tree:
aggregate.tree <- function(x, w0, wt, id, formula=laspeyres, settings=list()){
  
  # set defaults:
  if(is.null(settings$coicop.version)) settings$coicop.version <- getOption("hicp.coicop.version")
  if(is.null(settings$all.items.code)) settings$all.items.code <- getOption("hicp.all.items.code")
  if(is.null(settings$coicop.bundles)) settings$coicop.bundles <- getOption("hicp.coicop.bundles")
  if(is.null(settings$chatty)) settings$chatty <- getOption("hicp.chatty")
  
  # input checks:
  check.num(x=x, min.len=0, int=c(0,Inf))
  check.num(x=w0, min.len=0, int=c(0,Inf), miss.ok=TRUE)
  check.num(x=wt, min.len=0, int=c(0,Inf), miss.ok=TRUE)
  check.char(x=id, min.len=0)
  check.lengths(x=x, y=id)
  check.char(x=settings$coicop.version, min.len=1, max.len=1, na.ok=FALSE)
  check.char(x=settings$all.items.code, min.len=1, max.len=1, na.ok=FALSE)
  check.list(x=settings$coicop.bundles, min.len=0, names=TRUE)
  check.log(x=settings$chatty, min.len=1, max.len=1, na.ok=FALSE)
  
  # input checks on weights:
  if(missing(w0)){w0miss <- TRUE; w0 <- rep(0, length(x))}else{w0miss <- FALSE}
  if(missing(wt)){wtmiss <- TRUE; wt <- rep(0, length(x))}else{wtmiss <- FALSE}
  check.lengths(x=x, y=w0)
  check.lengths(x=x, y=wt)
  
  # input checks on formula:
  # input checks on formula:
  # we have to substitute first; otherwise, if x=laspeyres and 
  # formula=laspeyres, formula would be overwritten by x
  formula0 <- substitute(formula) 
  formula <- eval(formula0)
  if(is.function(formula)){
    formula <- stats::setNames(list(match.fun(formula)), deparse(formula0))
  }
  check.list(x=formula, miss.ok=FALSE, null.ok=FALSE, names=TRUE)
  fnw0 <- sapply(X=formula, FUN=function(z) !is.null(formals(z)$w0) & w0miss)
  fnwt <- sapply(X=formula, FUN=function(z) !is.null(formals(z)$wt) & wtmiss)
  fnw <- fnw0 | fnwt
  if(any(fnw, na.rm=TRUE)){
    fnok <- paste(names(which(fnw)), collapse=",")
    if(sum(fnw)>1L) fnok <- paste0("(", fnok, ")")
    stop(paste0("Non-valid input for w0 or wt -> weights required for 'formula=", fnok, "'"), call.=FALSE)
  }
  
  # set all-items code:
  allid <- settings$all.items.code
  
  # gather data:
  dt <- data.table::data.table(id, w0, wt, x)
  dt <- dt[hicp::is.coicop(id=id, settings=settings) | hicp::is.bundle(id=id, settings=settings),] # keep only valid coicop codes and bundle codes
  if(!all(is.na(dt$x))) dt <- dt[!is.na(x), ] # drop these NAs because they would bias aggregation
  dt[, names(formula) := x] # copy values for each formula
  dt[, "x":=NULL] # drop initial values
  data.table::setkey(x=dt, key="id") # do not set the key before
  
  # stop if aggregation not possible:
  if(nrow(dt)<1L){
    
    if(settings$chatty){
      warning("No complete cases available for calculation", call.=FALSE)
    }
    
    # keep empty data.table with all relevant columns:
    out <- cbind(dt[0,], "is_aggregated"=logical())
    
  }else{
    
    # derive aggregation structure:
    p <- parent(id=dt$id, usedict=TRUE, closest=FALSE, k=1:9, settings=settings)
    dtagg <- data.table("id"=rep(x=dt$id, lengths(p)), "parent"=unlist(x=p, use.names=FALSE))
    dtagg[, "id":=c(parent[-1], id[1]), by="id"]
    dtagg <- rbindlist(l=list(data.table("id"=allid, "parent"=NA_character_), dtagg))
    dtagg <- unique(dtagg)
    dtagg <- dtagg[keep.bundle(id=id, settings=settings),]
    dtagg[, "lvl":=hicp::level(id=id, settings=settings)]
    dtagg <- merge(x=dtagg, y=dt, by="id", all.x=TRUE, sort=FALSE)
    dtagg[, "is_aggregated":=FALSE]
    
    # compute aggregates from bottom to top levels in aggregation structure:
    lvl <- NULL # avoid no visible binding note when checking
    lvls <- max(dtagg$lvl):(min(dtagg$lvl)+1L)
    for(i in lvls){
      
      # aggregate prices and sum weights for each parent:
      res <- dtagg[lvl==i, 
                   c("is_aggregated"=TRUE,
                     mapply(FUN=matchformula, f=formula, x=.SD, MoreArgs=list(w0=w0, wt=wt), SIMPLIFY=FALSE),
                     "w0"=if(all(is.na(w0))){NA_real_}else{sum(w0, na.rm=TRUE)},
                     "wt"=if(all(is.na(wt))){NA_real_}else{sum(wt, na.rm=TRUE)}
                     ),
                   by=list("id"=parent),
                   .SDcols=names(formula),
                   env=list(i=i)]
      
      # merge and overwrite existing values:
      dtagg[res, names(res):=res, on="id"]
      
    }
    
    # prepare results, set column order and key:
    out <- subset(x=dtagg, select=-c(parent,lvl))
    
  }
  
  # set column order and key of output:
  data.table::setcolorder(x=out, c("id","is_aggregated","w0","wt"))
  data.table::setkeyv(x=out, cols="id")

  # drop weights if these were not provided by the user:
  if(w0miss) out$w0 <- NULL
  if(wtmiss) out$wt <- NULL
  
  # return output to console:
  return(out)
  
}

# END
Any scripts or data that you put into this service are public.
hicp documentation built on Aug. 8, 2025, 6:30 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
hicp
Harmonised Index of Consumer Prices

R/index.aggregation.r
In hicp: Harmonised Index of Consumer Prices

Defines functions aggregate.tree disaggregate aggregate matchformula walsh toernqvist fisher paasche laspeyres harmonic carli jevons

Documented in aggregate aggregate.tree carli disaggregate fisher harmonic jevons laspeyres paasche toernqvist walsh

Try the hicp package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hicp Harmonised Index of Consumer Prices

R/index.aggregation.r In hicp: Harmonised Index of Consumer Prices

Defines functions aggregate.tree disaggregate aggregate matchformula walsh toernqvist fisher paasche laspeyres harmonic carli jevons

Documented in aggregate aggregate.tree carli disaggregate fisher harmonic jevons laspeyres paasche toernqvist walsh

Try the hicp package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hicp
Harmonised Index of Consumer Prices

R/index.aggregation.r
In hicp: Harmonised Index of Consumer Prices