Multilateral"

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width=8
)

Overview

The multilateral package provides one user facing function, that is multilateral(). The user provides the necessary attributes of the dataset for the respective multilateral method.

This returns a list object containing the continuous spliced index, each individual windows calculation (useful for diagnostics), and splicing information if applicable.

#devtools::install_github("MjStansfi/multilateral")
library(multilateral)

tpd_index <- multilateral(period = turvey$month,
                          id = turvey$commodity,
                          price = turvey$price,
                          quantity = turvey$quantity,
                          splice_method = "geomean",
                          window_length = 13,
                          index_method = "TPD")
str(tpd_index)

Show case of index methods you can apply

The package provides two lazily loaded datasets, they are 'turvey' and 'synthetic_gfk'. The first dataset provides price and quantity information for products with a unique ID. The second dataset provides price and quantity information for products with a unique ID, however it also includes de-identified characteristics.

Note for this example we do not provide a window length, therefore the index is calculated over the whole dataset.

str(turvey)
library(ggplot2)

index_methods <- c("TPD","GEKS-T","GEKS-J","GEKS-F")

start_run_time <- Sys.time()
turvey_multilats <- lapply(index_methods, function(index_method){


  temp_index <- multilateral(period = turvey$month,
                             id = turvey$commodity,
                             price = turvey$price,
                             quantity = turvey$quantity,
                             index_method = index_method)

  #For identification in plot
  temp_index$index$type <- index_method

  return(temp_index$index)  
})
end_run_time <- Sys.time()


turvey_multilats <- do.call(rbind,turvey_multilats)

plot <- ggplot(turvey_multilats)+geom_line(aes(x = period, y = index, colour = type))

print(plot)

Below indicates the time taken to run all four index methods above, highlighting efficiency of calculation

print(end_run_time-start_run_time)

Data sources with features

The above methods could also be applied to this dataset

str(synthetic_gfk)

Now we wrangle this into the necessary format, that is a unique observation for any given period.

library(dplyr)

synthetic_gfk <- synthetic_gfk%>%
  group_by(month_num,prodid_num)%>%
  mutate(quantity = sum(quantity),
         value = sum(value))%>%
  ungroup()%>%
  unique

#Calculate the unit value (price)
synthetic_gfk$uv <- synthetic_gfk$value/synthetic_gfk$quantity

#Extract data.frame containing features of interest
features <-synthetic_gfk[,grepl("char",colnames(synthetic_gfk))]

Once the dataframe is in the correct format we can run it through the main function with index_method set to ‘GEKS-IT’ and provide a features argument, containing the dataframe of features.

Note GEKS-IT still requires a product id.

itrygeks_index <- multilateral(period = synthetic_gfk$month_num,
                               id = synthetic_gfk$prodid_num, 
                               price = synthetic_gfk$uv,
                               quantity = synthetic_gfk$quantity,
                               features = features,
                               splice_method = "geomean",
                               index_method = "GEKS-IT")

#For identification in plot
itrygeks_index$index$type <- "itrygeks"

Alternatively we could run a basic TDH model, which does not require a unique ID.

#Note no unique product ID

TDH_index <- multilateral(period = synthetic_gfk$month_num,
                              price = synthetic_gfk$uv,
                              quantity = synthetic_gfk$quantity,
                              features = features,
                              splice_method = "geomean",
                              index_method = "TDH")


#For identification in plot
TDH_index$index$type <- "TDH"
feature_indexes <- rbind(itrygeks_index$index,TDH_index$index)

plot <- ggplot(feature_indexes)+geom_line(aes(x = period, y = index, colour = type))

print(plot)

Show case of splice methods you can apply

Here we introduce a window length and therefore some way to chain together the indexes. To do this and still meet the no revision constrant we 'splice' the individual windows together.

splice_methods <- c("geomean","geomean_short","window","movement","half")
turvey_splices <- lapply(splice_methods, function(splice_method){


  temp_index <- multilateral(period = turvey$month,
                             price = turvey$price,
                             id = turvey$commodity,
                             quantity = turvey$quantity,
                             window_length = 13,
                             splice_method = splice_method,
                             index_method = "TPD")

  temp_index$index$type <- splice_method

  return(temp_index$index)  
})

turvey_splices <- do.call(rbind,turvey_splices)

plot <- ggplot(turvey_splices)+geom_line(aes(x = period, y = index, colour = type))

print(plot)

Retrospective splicing (chained indexes)

Typical splicing of a multilateral index exists to enforce the no revision constraint for which most official price indexes follow. However this package allows for the user to provide a 'chain_method' rather than a 'splice_method'. With this we are able to look at what the index would be if we used all information available as at the latest period.

Note: depending on the position of the chaining, the series will be a different length.

chain_methods <- c("geomean","window","movement","half")
turvey_chains <- lapply(chain_methods, function(chain_method){


  temp_index <- multilateral(period = turvey$month,
                             price = turvey$price,
                             id = turvey$commodity,
                             quantity = turvey$quantity,
                             window_length = 13,
                             # splice_method = splice_method,
                             chain_method = chain_method,
                             index_method = "TPD")

  temp_index$index$type <- chain_method

  return(temp_index$index)  
})

turvey_chains <- do.call(rbind,turvey_chains)


plot <- ggplot()+
  geom_line(aes(x = period, y = index, colour = "chain"), data = turvey_chains[turvey_chains$type=="geomean"])+
  geom_line(aes(x = period, y = index, colour = "splice"), data = turvey_splices[turvey_splices$type=="geomean"])+
  ggtitle("Geomean chain compared to splice for turvey")

print(plot)

Why this package

Below we compare the PriceIndices, IndexNumR, and multilateral package. We look at how you would calculate the same index.

library(multilateral)
library(IndexNumR)
library(PriceIndices)
library(dplyr)
#---------------------#multilateral package
turvey_multilateral <- multilateral(period = turvey$month,
                                    price = turvey$price,
                                    quantity = turvey$quantity,
                                    id = turvey$commodity,
                                    window_length = 13,
                                    splice_method = "geomean",
                                    index_method = "GEKS-F",
                                    check_inputs_ind = T)

#---------------------#PriceIndices package

#Requires data.frame with specific colnames, and format of time

turvey_mod <- turvey%>%select(time = month,
                              prodID = commodity,
                              prices = price,
                              quantities = quantity)

turvey_priceindices <- geks_splice(turvey_mod, "1970-01", "1973-12", 13, splice = "mean", interval = T)


#---------------------#IndexNumR package

#Requires time to be a sequential numeric vector

turvey <- turvey%>%mutate(month_num = as.numeric(as.factor(month)))

turvey_IndexNumR <- GEKSIndex(turvey,
          pvar = "price",
          qvar = "quantity",
          pervar = "month_num",
          prodID = "commodity",
          indexMethod = "fisher",
          window = 13,
          splice = "mean")

Note the multilateral function also has a 'num_cores' parameter for parallelisation. Because calculation over a given window is not dependent on another, the majority of the function computation time - calculating indexes over each window - can be done in parallel. This is only applicable when there is more than one window, and only becomes noticeably quicker when dealing with larger datasets (>500k rows) as distributing to multiple cores has overheads. It is up to the user to find the correct balance.

nrow(big_data)
#[1] 100000000

multilateral(period = big_data$month,
             price = big_data$price,
             quantity = big_data$quantity,
             id = big_data$commodity,
             window_length = 13,
             splice_method = "geomean",
             index_method = "TPD",
             check_inputs_ind = T,
             num_cores = 32)


Try the multilateral package in your browser

Any scripts or data that you put into this service are public.

multilateral documentation built on April 20, 2022, 9:06 a.m.