#' Decomposition of Fixed-Effects index
#'
#' Take (mostly) the same inputs as the FEWS function and return the decomposition
#' on the multilateral scale.
#'
#' @param times vector of the times at which price observations were made. \cr
#' NOTE: \code{times} must be of class Date or numeric.
#' @param logprice vector of log of prices at the given time
#' @param id vector of distinct identification number of consumer goods
#' @param weight vector of expenditure weights used in the regressions
#' @param custom_time either empty (will assume latest periods) or a vector of length
#' two relating to times compared where [1] is the 'from' time and [2] is the 'to' time
#' e.g Comparing contribution from "1973-04-01" to "1973-05-01" would be c("1973-04-01", "1973-05-01")
#' @param window_length optional. Single number for length of window for the data that
#' regressions are fit on. Note if window_length is present it assumes custom time is latest two periods
#' in window.
#' @param big If vector size becomes too large it will fail to calculate. Setting big to true will calculate
#' it in an alternate way, however much slower.
#' @return A dataframe containing the numeric contribution (\code{contrib}) of every price observation
#' in the window, the product of which will equal the index movement between the two time periods. \code{p_contrib} is
#' the standardised percentage contribution of the observation, the sum of which will equal 1. \code{id_p_contrib} is
#' the standardised percentage contribution of the total id, the sum of which will equal 1.
#' @examples
#' library(TPDdecomp)
#'
#' load("Turvey.RData")
#'
#' contributions <- with(
#' turvey,
#' TPD_decomp(times = month,
#' logprice = log(price),
#' id = commodity,
#' weight = price*quantity,
#' custom_time = c("1973-04-30","1973-05-31"),
#' window_length = NULL)
#' )
#' #Comparing change from 1973-04-30 to 1973-05-31
#'
#' str(contributions)
#' Classes ‘data.table’ and 'data.frame': 176 obs. of 8 variables:
#' $ times : Date, format: "1973-07-31" "1973-06-30" "1973-05-31" ...
#' $ price : num 5.19 4.53 7.17 4.3 3.98 6.21 3.78 3.72 5.68 3.27 ...
#' $ weight : num 14932 18505 7407 10892 14495 ...
#' $ id : chr "Strawberries" "Strawberries" "Strawberries" "Strawberries" ...
#' $ exp_share : num 0.36 0.413 0.238 0.342 0.41 ...
#' $ contrib : num 0.965 0.967 1.545 0.97 0.97 ...
#' $ p_contrib : num -0.208 -0.201 2.58 -0.181 -0.184 ...
#' $ id_p_contrib: num 0.663 0.663 0.663 0.663 0.663 ...
#'
#' @import Matrix
#' @import MatrixModels
#' @import data.table
#' @import MASS
#' @export
TPD_decomp <- function(times,
logprice,
id,
weight,
custom_time = c(),
window_length = NULL,
big = FALSE,
verbose = FALSE){
#Check input variables are as required
c(times, logprice, id, weight, custom_time, window_length) %=%
check_inputs (times = times,
logprice = logprice,
id = id,
weight = weight,
custom_time = custom_time,
window_length = window_length)
#Check if custom_times are in the times vector
if(length(custom_time)!=0 & !all(custom_time %in% as.character(times))){
stop("Custom_time is not in times?")
}
# Make a data frame from all of the inputs
prices_df <- data.table(times = times,
price = exp(logprice),
logprice = logprice,
weight = weight,
id = as.character(id),
key = "times")
# It is essential that the data frame is sorted by date and ID
# use an if because sorting is slow, but testing is fast
setorderv(prices_df, c("times", "id"), c(-1, 1))
#If the input vectors contain more time periods than the window length
#we will remove what wouldn't be included in the FEWS calculation
if(!is.null(window_length)){
if(length(custom_time)!=0){
#Find position of end of window based on custom_time[2]
end_time <- which(custom_time[2]==unique(prices_df$times))
}else{
end_time <- 1
}
#return times of interest based off end_time index
times_of_interest <- unique(prices_df$times)[(end_time):(end_time+window_length-1)]
#check there is enough data for the window
if (any(is.na(times_of_interest))){
stop("There are NA's in the times of interest. If you have a custom time",
" and a window length, make sure there is enough back data from custom_time[2]")
}
# Filter data.table by times of interest
prices_df <- prices_df[times %in% times_of_interest]
}
#Create seperate times and id index variable which glm will use
prices_df[,"times_index":=as.factor(times)]
prices_df[,"id_index" := as.factor(id)]
#length of subsets used to seperate design matrix
times_index_length <- nlevels(prices_df$times_index)
id_index_length <- nlevels(prices_df$id_index)
#fixed effect regression model formula
glm_formula <- prices_df$logprice ~ prices_df$times_index + prices_df$id_index
#extract design matrix
d <- MatrixModels::model.Matrix(glm_formula, sparse =T)
#Design matrix size, (number of times) + (number of ids) - (1 date) - (1 commodity) + (1 intercept)
vcat("The dimensions of the design matrix:", dim(d),'\n')
assumed_col_length <- length(unique(prices_df$times))+length(unique(prices_df$id))-1-1+1
#check it matches
if(assumed_col_length == dim(d)[2]){
vcat("It matches\n")
}else{
warning("Design matrix is not the expected shape?")
}
#dtau is the design matrix subset to just the intercept + time elements
dtau <- d[,1:times_index_length]
vcat("design tau shape:",dim(dtau),'\n')
#dpi is the design matrix subset to just the
#commodities (which is everything right of times + intercept)
dpi <- d[,(times_index_length+1):dim(d)[2]]
vcat("design pi shape:",dim(dpi),'\n')
#p is the log price vector
p <- logprice
vcat("price vector length:",length(p),'\n')
#calculate the expenditure share of each ID for every time
prices_df[,"exp_share" := weight/sum(weight),by = "times"]
exp_share <- prices_df$exp_share
#create sparse weight matrix
wgt <- Matrix::.sparseDiagonal(x = exp_share)
vcat("weight matrix shape:",dim(wgt),'\n')
#calculate sub components of weight calculation
vcat("Calculating subcomponents\n")
wtau <- Matrix::crossprod(dtau,wgt)%*%dtau
wpi <- Matrix::crossprod(dpi,wgt)%*%dpi
wtaupi <- Matrix::crossprod(dtau,wgt)%*%dpi
# wpi_solve <- solve(wpi)
wpi_solve <- Matrix::chol2inv(Matrix::chol(wpi))
w_common <- wtaupi%*%wpi_solve
#Calculate weights (This multiplied by logprice vector will equal the estimates of parameters for each time)
vcat("Calculating contribution matrix\n")
#Alternate calc (more verbose)
# contribution_matrix <-
# solve(wtau - (wtaupi %*% Matrix::tcrossprod(solve(wpi),wtaupi)),
# ((Matrix::t(dtau) - wtaupi %*% Matrix::tcrossprod(solve(wpi), dpi)) %*% wgt))
left_solver <- list(wtau,wtaupi)
right_solver <- list(Matrix::t(dtau),dpi)
#Tidy up
rm(dtau,
dpi,
wtau,
wtaupi,
wpi,
wpi_solve,
d)
weight_calc <- function(inputs){
inputs[[1]]-(Matrix::tcrossprod(w_common,inputs[[2]]))
}
if(big){
#Instead of trying to do the large matrix substraction in one call
#Break it down by row so max vector allocation is size of row.
cat("Calculating alternate way as big = TRUE\n")
contribution_matrix <- vector("list",2)
contribution_matrix[[1]] <- weight_calc(left_solver)
temp_right <- (Matrix::tcrossprod(w_common,right_solver[[2]]))
nrows <- dim(right_solver[[1]])[1]
ncols <- dim(right_solver[[1]])[2] #BIG
contribution_matrix[[2]] <- sparseMatrix(dims=c(nrows,ncols), i={}, j={})
contribution_matrix[[2]] <- as(contribution_matrix[[2]], "dgCMatrix")
for(i in 1:nrows){
cat(i,'/',nrows,'\r')
contribution_matrix[[2]][i,] <- right_solver[[1]][i,]-temp_right[i,]
}
}else{
contribution_matrix <- lapply(list(left_solver,right_solver),
weight_calc)
}
contribution_matrix <- solve(contribution_matrix[[1]])%*%contribution_matrix[[2]]%*%wgt
#rename rows to times
dimnames(contribution_matrix) <- list(c("intercept",levels(prices_df$times_index)[-1]),NULL)
#if user input custom times change this
if (length(custom_time)!=0){
previous <- custom_time[1]
current <- custom_time[2]
}else{
#Get the latest 2 times
current <- levels(prices_df$times_index)[times_index_length]
previous <- levels(prices_df$times_index)[times_index_length-1]
}
#Change rowname for given current and previous to literally 'current' and 'previous'
dimnames(contribution_matrix)[[1]][which(dimnames(contribution_matrix)[[1]]==previous)] <- "previous"
dimnames(contribution_matrix)[[1]][which(dimnames(contribution_matrix)[[1]]==current)] <- "current"
#Let the user know this is whats being compared. Can investigate if not what is expected
cat("Comparing change from",as.character(previous),"to",as.character(current),"\n")
#Extract the relevant contribution rows i.e. what periods are being compared
contrib <- contribution_matrix[c(which(dimnames(contribution_matrix)[[1]]=="previous"),
which(dimnames(contribution_matrix)[[1]]=="current")),]
#Convert the transpose to a data.table
vcat("Creating contrib table from matrix\n")
contrib <- as.data.table(as.matrix(Matrix::t(contrib)))
#Merge back with prices_df, as simple as cbind as it should align already
prices_df <- cbind(prices_df,contrib)
#Calculate contribution of each price
vcat("Calculating contribution for each price observation\n")
prices_df[,"contrib":= (price^(current-previous))]
#Remove irrelevant columns
prices_df <- prices_df[,!c("logprice","id_index","times_index","previous","current")]
#Calculate total contribution for each id (product of contrib over times)
vcat("Calculating percentage contribution by product\n")
prices_df[,"p_contrib" := percentage_contrib(contrib)]
prices_df[,"id_p_contrib" := sum(p_contrib), id]
#sort by descending total contribution
setorderv(prices_df,"id_p_contrib",-1)
vcat("Complete.\n")
return(prices_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.