# install.packages('devtools') # library(devtools) if(!require(CommonFunctions)){ devtools::install_github('georgegui/CommonFunctions') library(CommonFunctions) } if(!require(DemandEstimation)){ devtools::install_github('georgegui/MarketingRegression') library(DemandEstimation) }
library(data.table) library(Matrix) library(ggplot2)
We generate the sales data based on a linear model. The default value for own elasticity, competing elasticities and promotion effect are -2, 0.2, 1 respectively, but they can be replaced with a random draw by providing the corresponding function. Missing products at a store or market can also be randomly generated, but for illustration purpose we make all products available at all stores.
set.seed(6) dt <- GenerateEstimationTestData( n_zip3 = 2, n_brands = 5, n_store = 5, region_time_fe = TRUE, own_elas_draw = function() rnorm(1, -2), missing_assortment_draw = function() 0 )
group_code is used to denote the product id. (This is probably a bad and confusing name originated from combining upc product group. I intended to change it to 'product' but there will be too many things that need to be changed accordingly.)
actual_elas <- unique(dt[ZIP3 == 1 & group_code == 1, .( actual_elasticity = own_elasticity, store_code_uc, group_code )]) actual_elas
head(dt[, .(week_end, ZIP3, store_code_uc, group_code, YearMonth, price)])
We generate a list of parameters that is necessary for estimation. This list of parameters is used throughout this project for testing different specifications. Not all parameters are used in the 'Estimate' function, as some are used for sample cleaning and selection.
cur_model <- data.table( model_name = 'SparseOLS', estimation_type = 'SparseOLS', time_control = 'YearMonth', regional_control = 'ZIP3', demean_before_estimation = 0 ) cur_model <- as.list(CheckEstimationProfiles(cur_model)) # class is importance because head(cur_model)
cur_model$lhs_groups <- '1' cur_model$rhs_groups <- levels(dt$group_code)
model
as input to S3 functionFor generating a flexible code structure, the function Estimate
is a S3 function that will use different estimation method based on the class of its first parameter model
. The potential class includes 'Sparse_OLS', 'SparseLASSO', 'OLS', 'Bayes'.
class(cur_model) <- c(cur_model$estimation_type, class(cur_model))
The output of the result is a list of regional-level results, where each element contains the necessary information for formatting the results.
results <- Estimate(cur_model, dt)
The estimated coefficients is stored in the long format instead of the wide format because the assortment difference across stores and regions.
The coefficient in the row with variable
being 'price' and group_code
being 2 is the effect of the price of product 2 on the sales of focal products.
result_region1 <- results[[1]]$out head(result_region1, 6)
own_price_coefficient <- result_region1[ ZIP3 == 1 & variable == 'price' & group_code == '1', .( estimated_coef = coefficient, store_code_uc )] own_price_coefficient <- merge(own_price_coefficient, actual_elas, by = 'store_code_uc') own_price_coefficient
cur_model$model_name = 'OLS' cur_model$estimation_type = 'OLS' class(cur_model) <- c(cur_model$estimation_type, class(cur_model)) # we demean the columns using the FWL theorem dt <- DemeanCols(dt, relevant_cols = c('price', 'promotion', 'sales'), by_cols = c('YearMonth', 'ZIP3')) result <- Estimate(cur_model, dt[ZIP3 == 1]) own_price_coefficient_fwl <- result[[1]]$out[ rhs_var == 'price_1' , .( fwl_coef = coefficient, store_code_uc )]
own_price_coefficient <- merge( own_price_coefficient, own_price_coefficient_fwl, by = 'store_code_uc') own_price_coefficient
ConstructSparseRhs
is a function that I designed to construct the regression matrix. It is used in the Estimate
function.
# if an observation is from a certain week_end and store_code_uc, which row it should be in. small_dt <- GenerateEstimationTestData( n_zip3 = 1, n_brands = 5, n_store = 5, region_time_fe = TRUE, own_elas_draw = function() rnorm(1, -2), missing_assortment_draw = function() 0 ) small_dt <- small_dt[year(week_end) %between% c(2008, 2010)] row_index <- small_dt[, .(row_index = as.integer(.GRP)), keyby = .(store_code_uc, week_end)] rhs_sparse_matrix <- ConstructSparseRhs( cur_model, # the model parameters small_dt, # the price data.table '1', # the lhs product row_index) column_definitions <- attr(rhs_sparse_matrix, 'relevant_cols') setnames(column_definitions, 'store_code_uc', 'store_fixed_effect') sparse_index <- data.table(which(rhs_sparse_matrix != 0, arr.ind = T)) # add column and row information setnames(sparse_index, names(sparse_index), c('row_index', 'column_index')) sparse_index <- merge(sparse_index, row_index, by = 'row_index') sparse_index <- merge(sparse_index, column_definitions, by = 'column_index', all.x = TRUE) n_price_cols <- nrow(column_definitions) n_month <- attr(rhs_sparse_matrix, 'n_month_cols') sparse_index[is.na(variable), variable := 'store fixed effect'] sparse_index[column_index %between% c(n_price_cols + 1, n_price_cols + n_month), variable := 'time fixed effect'] # store_label_text <- sparse_index[, .( x = median(as.numeric(column_index)), y = -median(as.numeric(row_index)) ), by = .(store_code_uc = paste0('store', store_code_uc))] gg <- ggplot() + geom_tile(data = sparse_index, aes(x = column_index, y = -row_index, fill = variable)) + geom_text(data = store_label_text, aes(x = x, y = y, label = store_code_uc)) + theme_void() + labs(x = "", y = "") + scale_x_discrete(expand = c(0, 0)) + scale_y_discrete(expand = c(0, 0)) ggsave('matrix_example.pdf', gg, height = 8, width = 8)
gg
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.