# R/geks.R In IndexNumR: Index Number Calculation

#### Documented in GEKSIndex

```# IndexNumR: a package for index number computation
# Copyright (C) 2018 Graham J. White (g.white@unswalumni.com)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

#' GEKS_w
#'
#' Function to compute a GEKS index over a window.
#' The window size is assumed to be the number of periods available in pervar.
#' This is not exposed to the user because GEKSIndex calls this
#' @keywords internal
#' @noRd
GEKS_w <- function(x, pvar, qvar, pervar, indexMethod="tornqvist", prodID,

# get the window length
window <- max(x[[pervar]]) - min(x[[pervar]]) + 1

# initialise some matrices
# matrix of all price indices using each month as the base period
pindices <- matrix(0, nrow = window, ncol = window)

# get the vector of period indices that are inside the window
pi <- unique(x[[pervar]])

# initialise a vector for storing NA pair information
naPairs <- character()

# for every period in the window...
for(j in 1:window){

# for every period in the window...
for(k in 1:window){
# if j=k then the index is 1
if(j==k){
pindices[j,k] <- 1
}
# if we're below the diagonal, then use symmetry to
# save computation time
else if(j>k){
pindices[j,k] <- 1/pindices[k,j]
}
else {
# set the period pi(j) = base period
xt0 <- x[x[[pervar]] == pi[j],]
# set the period pi(k) = period '1'
xt1 <- x[x[[pervar]] == pi[k],]

# if user asked for matching, get matched samples
if(sample=="matched"){
xt1 <- xt1[xt1[[prodID]] %in% unique(xt0[[prodID]]),]
xt0 <- xt0[xt0[[prodID]] %in% unique(xt1[[prodID]]),]
}

# set the price index element to NA if there are no
# matches
if(nrow(xt1)==0){
pindices[j,k] <- NA
naPairs <- paste0(naPairs, paste0("(",j,",",k,")"), collapse = ",")
}
else{
# set the price and quantity vectors
p0 <- xt0[[pvar]]
p1 <- xt1[[pvar]]
q0 <- xt0[[qvar]]
q1 <- xt1[[qvar]]

# calculate the price index for 'base' period j and 'next' period k
switch(tolower(indexMethod),
fisher = {pindices[j,k] <- fisher_t(p0, p1, q0, q1)},
tornqvist = {pindices[j,k] <- tornqvist_t(p0, p1, q0, q1)},
tpd = {pindices[j,k] <- tpd_t(p0, p1, q0, q1, xt0[[prodID]], xt1[[prodID]], biasAdjust, weights)},
walsh = {pindices[j,k] <- walsh_t(p0, p1, q0, q1)},
jevons = {pindices[j,k] <- jevons_t(p0, p1)})
}

}
}
}
# compute the geometric mean of each column of the price indices matrix
pgeo <- apply(pindices, 2, geomean, na.rm = TRUE)

# normalise to the first period
pgeo <- pgeo/pgeo[1]

return(list(pgeo = pgeo, naPairs = naPairs))
}

#' Compute a GEKS multilateral index
#'
#' A function to calculate a GEKS multilateral price index
#'
#' @param x A dataframe containing price, quantity, a time period identifier
#' and a product identifier. It must have column names.
#' @param pvar A character string for the name of the price variable
#' @param qvar A character string for the name of the quantity variable
#' @param prodID A character string for the name of the product identifier
#' @param pervar A character string for the name of the time variable. This variable
#' must contain integers starting at period 1 and increasing in increments of 1 period.
#' There may be observations on multiple products for each time period.
#' @param indexMethod A character string to select the index number method. Valid index
#' number methods are fisher, tornqvist, tpd, jevons or walsh. The default is tornqvist.
#' @param sample A character string specifying whether matching is to be performed.
#' The default is to use matching.
#' If sample=matched then any products that are not present in comparison periods
#' are removed prior to estimating the index for those periods.
#' @param window An integer specifying the length of the window.
#' @param splice A character string specifying the splicing method. Valid methods are
#' window, movement, half, mean, fbew or fbmw, wisp, hasp or mean_pub. The default is mean.
#' See details for important considerations when using fbew and fbmw.
#' @param biasAdjust whether to adjust for bias in the coefficients of the bilateral TPD index.
#' The default is FALSE because making this adjustment will break transitivity of the
#' GEKS index.
#' @param weights the type of weighting for the bilateral TPD index. Options are
#' "unweighted" to use ordinary least squares, "shares" to use weighted least squares
#' with expenditure share weights, and "average" to use weighted least squares
#' with the average of the expenditure shares over the two periods. See details for more
#' information
#' @param intGEKS whether to estimate the intersection GEKS method. This method performs
#' additional product matching over the sample = "matched" option. See Lamboray and Krsinich
#' @param imputePrices the type of price imputation to use for missing prices.
#' Currently only "carry" is supported to used carry-forward/carry-backward prices.
#' Default is NULL to not impute missing prices.
#' @details The splicing methods are used to update the price index when new data become
#' available without changing prior index values. The window, movement, half and mean splices
#' use the most recent index value as the base period, which is multiplied by a price movement
#' computed using new data. The fbew (Fixed Base Expanding Window) and fbmw (Fixed Base Moving
#' Window) use a fixed base onto which the price movement using new data is applied. The base
#' period is updated periodically. IndexNumR calculates which periods are the base periods using
#' \code{seq(from = 1, to = n, by = window - 1)}, so the data must be set up correctly and the
#' right window length chosen. For example, if you have monthly data and want December
#' of each year to be the base period, then the first period in the data must be December
#' and the window must be set to 13.
#' @examples
#' # compute a GEKS mutlilateral index with mean splicing
#' GEKSIndex(CES_sigma_2, pvar = "prices", qvar = "quantities", pervar = "time",
#' prodID = "prodID", indexMethod = "tornqvist", window=11, splice = "mean")
#'
#' # compute a GEKS multilateral index with window splicing and the Fisher index method
#' GEKSIndex(CES_sigma_2, pvar = "prices", qvar = "quantities", pervar = "time",
#' prodID = "prodID", indexMethod = "fisher", window=11, splice = "mean")
#'
#' @references Ivancic, L., W.E. Diewert and K.J. Fox (2011), "Scanner Data,
#' Time Aggregation and the Construction of Price Indexes", Journal of
#' Econometrics 161, 24-35.
#'
#' Lamboray, C. and F. Krsinich (2015), "A Modification of the GEKS Index When
#' Product Turnover is High", Paper presented at the fourteenth Ottawa Group
#' meeting, 20-22 May 2015, Tokyo, Japan.
#'
#' @export
GEKSIndex <- function(x, pvar, qvar, pervar,indexMethod = "tornqvist", prodID,
sample = "matched", window = 13, splice = "mean", biasAdjust = FALSE,
weights = "average", intGEKS = FALSE, imputePrices = NULL){

# check that only valid index methods are chosen
if(!(tolower(indexMethod) %in% c("fisher","tornqvist", "tpd", "walsh", "jevons"))){
stop("Not a valid index number method.")
}

# check that only valid splice methods are chosen
if(!(tolower(splice) %in% c("mean", "window", "movement", "half", "fbew", "fbmw", "wisp", "hasp", "mean_pub"))){
stop("Not a valid splicing method.")
}

# check valid column names are given
colNameCheck <- checkNames(x, c(pvar, qvar, pervar, prodID))
if(colNameCheck\$result == FALSE){
stop(colNameCheck\$message)
}

# check that the time period variable is continuous
timeCheck <- isContinuous(x[[pervar]])
if(timeCheck\$result == FALSE){
stop(paste("The time period variable is not continuous.",
"Missing periods:", timeCheck\$missing))
}

# check that columns are the right class
x <- checkTypes(x, pvar, qvar, pervar)

# get the number of periods
n <- max(x[[pervar]],na.rm = TRUE)
if(n<window){
stop("The window length exceeds the number of periods in the data")
}

# apply price imputation
if(!is.null(imputePrices)){
switch(imputePrices,
"carry" = {x <- imputeCarryPrices(x, pvar, qvar, pervar, prodID)},
stop("Invalid imputePrices argument"))
}

# sort the dataset by time period and product ID
x <- x[order(x[[pervar]], x[[prodID]]),]

# initialise some matrices
# final price index
pGEKS <- matrix(0, nrow = n, ncol = 1)

# set the sequence of base periods for fbew and fbmw splices
bases <- seq(from = 1, to = n, by = window - 1)

# first estimate a GEKS index for the first (window) observations
# subset the window of data to use
xWindow <- x[x[[pervar]] >= 1 & x[[pervar]] <= window,]

# call GEKS_w on first window
if(intGEKS){
tempGEK <- intGEKS_w(xWindow, pvar, qvar, pervar, indexMethod, prodID,
} else {
tempGEK <- GEKS_w(xWindow, pvar, qvar, pervar, indexMethod, prodID,
}

pGEKS[1:window,1] <- tempGEK\$pgeo

# initiate a vector of warnings for NAs
if(length(tempGEK\$naPairs) > 0){
naWarn <- paste0("1 to ",window,": ",tempGEK\$naPairs,"\n")
}
else{
naWarn <- character()
}

# use a splicing method to compute the rest of the index
if(n>window){
for(i in 2:(n-window+1)){

# find the base period for fbew and fbmw splices
base <- max(bases[bases <= i + window - 2])

# set the old GEKS window
if(i==2){
old <- pGEKS[(i-1):(i+window-2),1]
}
else {
old <- new
}

# set the base value for fbew
fbewBase <- pGEKS[base,1]

# fetch the next window of data
if(splice == "fbew"){
xWindow <- x[x[[pervar]] >= base & x[[pervar]] < i + window,]
}
else {
xWindow <- x[x[[pervar]] >= i & x[[pervar]] < i + window,]
}

# call GEKS_w on first window
if(intGEKS){
tempGEK <- intGEKS_w(xWindow, pvar, qvar, pervar, indexMethod, prodID,
} else {
tempGEK <- GEKS_w(xWindow, pvar, qvar, pervar, indexMethod, prodID,
}
new <- tempGEK\$pgeo

if(length(tempGEK\$naPairs) > 0){
naWarn <- paste0(naWarn, i, " to ",i+window-1,": ",
tempGEK\$naPairs, "\n")
}

# splice the new datapoint on
switch(splice,
fbew = {pGEKS[i+window-1,1] <- fbewBase*new[length(new)]},
fbmw = {pGEKS[i+window-1,1] <- fbewBase*new[length(new)]/new[length(new)-(i+window-1-base)]},
wisp = {pGEKS[i+window-1,1] <- splice_t(pGEKS[i+window-2,1], pGEKS[(i-1):(i+window-2)], new, method="window")},
hasp = {pGEKS[i+window-1,1] <- splice_t(pGEKS[i+window-2,1], pGEKS[(i-1):(i+window-2)], new, method="half")},
mean_pub = {pGEKS[i+window-1,1] <- splice_t(pGEKS[i+window-2,1], pGEKS[(i-1):(i+window-2)], new, method="mean")},
pGEKS[i+window-1,1] <- splice_t(pGEKS[i+window-2,1], old, new, method=splice))

}
}

# if there were periods where there were no overlapping products then
# print a warning
if(length(naWarn) > 0){
warning(paste0("The following windows contained bilateral comparisons where no overlapping products were found: \n",
"Window: Pairs \n",naWarn))
}

return(pGEKS)
}

#' intGEKS_w
#'
#' Function to compute the intersection GEKS index over a window.
#' The window size is assumed to be the number of periods available in pervar.
#' This is not exposed to the user because GEKSIndex calls this
#' @keywords internal
#' @noRd
intGEKS_w <- function(x, pvar, qvar, pervar, indexMethod="tornqvist", prodID,

# get the window length
window <- max(x[[pervar]]) - min(x[[pervar]]) + 1

# initialise some matrices
# matrix of all price indices using each month as the base period
pindices <- matrix(1, nrow = window, ncol = 1)
pgeks <- matrix(1, nrow = window, ncol = 1)

# get the vector of period indices that are inside the window
pi <- unique(x[[pervar]])

# initialise a vector for storing NA pair information
naPairs <- character()

# for every period in the window...
for(j in 2:window){

# for every period in the window...
for(k in 1:window){

# set the period pi(j-1) = base period
xt0 <- x[x[[pervar]] == pi[j-1],]
# set the period pi(j) = comparison period
xt1 <- x[x[[pervar]] == pi[j],]
# set the k period sample
xtk <- x[x[[pervar]] == pi[k],]
intProds <- unique(xtk[[prodID]])

# get matched sample across periods j, j-1, k
xt1 <- xt1[xt1[[prodID]] %in% unique(xt0[[prodID]]),]
xt0 <- xt0[xt0[[prodID]] %in% unique(xt1[[prodID]]),]
xt1 <- xt1[xt1[[prodID]] %in% intProds,]
xt0 <- xt0[xt0[[prodID]] %in% intProds,]
xtk <- xtk[xtk[[prodID]] %in% unique(xt0[[prodID]]),] # only need to do this against one of xt0 or xt1

# set the price index element to NA if there are no
# matches
if(nrow(xt1) == 0){
pindices[j,k] <- NA
naPairs <- paste0(naPairs, paste0("(",j,",",k,")"), collapse = ",")
}
else{
# set the price and quantity vectors
p0 <- xt0[[pvar]]
p1 <- xt1[[pvar]]
q0 <- xt0[[qvar]]
q1 <- xt1[[qvar]]
pk <- xtk[[pvar]]
qk <- xtk[[qvar]]

# calculate the price index for 'base' period j-1 and period k
pbIndex <- switch(tolower(indexMethod),
fisher = {fisher_t(p0, pk, q0, qk)},
tornqvist = {tornqvist_t(p0, pk, q0, qk)},
tpd = {tpd_t(p0, pk, q0, qk, xt0[[prodID]], xtk[[prodID]], biasAdjust, weights)},
walsh = {walsh_t(p0, pk, q0, qk)},
jevons = {jevons_t(p0, pk)})

# calculate the price index for period k and comparison period j
pcIndex <- switch(tolower(indexMethod),
fisher = {fisher_t(pk, p1, qk, q1)},
tornqvist = {tornqvist_t(pk, p1, qk, q1)},
tpd = {tpd_t(pk, p1, qk, q1, xtk[[prodID]], xt1[[prodID]], biasAdjust, weights)},
walsh = {walsh_t(pk, p1, qk, q1)},
jevons = {jevons_t(pk, p1)})

pindices[k, 1] <- (pbIndex*pcIndex)
}

}

pgeks[j, 1] <- prod(pindices)^(1/window)

}

# normalise to the first period
pgeks <- pgeks/pgeks[1,1]

# chain the period-on-period indexes
pgeks <- cumprod(pgeks)

return(list(pgeo = pgeks, naPairs = naPairs))
}
```

## Try the IndexNumR package in your browser

Any scripts or data that you put into this service are public.

IndexNumR documentation built on Feb. 7, 2022, 5:09 p.m.