# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
utils::globalVariables(c("pop", "total.spend"))
#' Generate simulation objects under the AMSS framework.
#'
#' Produces an \code{amss.sim} object that contains the simulated data and can
#' be used to derive ground truth about the scenario.
#'
#' Objects of class \code{amss.sim} contain the full output from running a
#' simulation scenario. This output includes the observed data, the complete
#' dataset generated during the simulation process, and the parameters passed
#' to the simulation function in order to generate the simulated data. The
#' observed data is meant to be used by modelers. The complete dataset can be
#' useful for users who want a more complete understanding of the forces
#' operating in a simulation scenario. The parameter list is essential for
#' generating future datasets based on the same, or slightly modified,
#' simulation settings in order to obtain ground truth about the simulation
#' scenario.
#'
#' @param time.n number of timepoints.
#' @param nat.mig.module specifications of class \code{seq.specs}, to be used
#' to create the non-actionable drivers module and then generate its
#' variables
#' @param nat.mig.params any parameter values to pass to \code{nat.mig.module}
#' @param media.names character vector of unique names of all media modules.
#' ex: c("tv", "search")
#' @param media.modules list of functions that simulate the behavior of each
#' marketing intervention.
#' @param media.params list of parameter value lists for each media module.
#' @param sales.module function that models the sales in the category.
#' @param sales.params list of any parameter values to pass to the sales module
#' @param ping the spacing between time points at which to print a message
#' updating the user on simulation progress.
#' @param names.agg.const character vector of names of variables to surface in
#' the observed data, aggregated using the first entry, since they are
#' constant over the hidden states. By default, if not specified,
#' \code{SurfaceData()} will pick up variables with names containing
#' "price" and/or "budget", and the "pop.total" variable.
#' @param names.agg.sum character vector of names of variables to surface in
#' the observed data, aggregated using the function \code{sum()}. By
#' default, \code{SurfaceData()} will pick up variables with names
#' matching "revenue", "profit", "sales", "volume", and/or "spend".
#' @return an object of class \code{amss.sim}, containing
#' \describe{
#' \item{data}{the observed data.}
#' \item{data.full}{the full dataset, as a list of data.tables. Each
#' \code{data.table} contains the data at the end of a time interval, by
#' by population segment (row) and variable (column).}
#' \item{params}{the parameters used to generate the data.}
#' }
#' @export
SimulateAMSS <- function(
time.n,
nat.mig.module = `DefaultNatMigModule`,
nat.mig.params = list(),
media.names = character(),
media.modules = rep(list(`DefaultTraditionalMediaModule`),
length(media.names)),
media.params = rep(list(list()), length(media.names)),
sales.module = `DefaultSalesModule`,
sales.params = list(),
ping = max(10, floor(time.n / 10)),
names.agg.const = NULL,
names.agg.sum = NULL) {
# Check inputs.
assertthat::assert_that(time.n >= 1)
# Different media channels must have different names.
assertthat::assert_that(!anyDuplicated(media.names))
# Save the parameter list.
params <- as.list(match.call())[-1]
default.params <- as.list(formals(`SimulateAMSS`))
default.params[names(params)] <- NULL
params <- c(params, default.params)
eval.env <- environment()
params <- lapply(params, function(x) eval(x, eval.env))
# Simulate data.
data.full <- do.call(
SimulateData,
c(list(starting.dts = list()), params)[formalArgs(`SimulateData`)])
# Return an object of class amss.sim.
return(amss.sim(
data = SurfaceData(data.full, names.agg.const, names.agg.sum),
data.full = data.full,
params = params))
}
#' Simulate data using the AMSS framework.
#'
#' Data generating function for AMSS. Responsible for sequentially simulating
#' the value of each variable at each time point by population segment, given
#' the simulation settings.
#'
#' @param starting.dts previously generated data that we can start with. data
#' will only be generated for later timepoints
#' @param time.n total number of timepoints, including the timepoints already
#' existing in starting.dts.
#' @param nat.mig.module function used to simulate effects of natural migration
#' on population segmentation.
#' @param nat.mig.params list of any parameter values to pass to
#' \code{nat.mig.module}
#' @param media.names character vector of unique names of all media modules.
#' ex: c("tv", "search")
#' @param media.modules list of functions that simulate the behavior of each
#' marketing intervention.
#' @param media.params list of parameter value lists for each media module.
#' @param sales.module function that generate sales variables.
#' @param sales.params list of any parameter values to pass to the sales module
#' @param ping the spacing between time points at which to print a message
#' updating the user on simulation progress.
#' @return a list of data sets, one per timepoint. Each is a data.table with
#' rows corresponding to population segments and columns corresponding
#' to specific variables.
#' @keywords internal
SimulateData <- function(
starting.dts = list(),
time.n,
nat.mig.module, nat.mig.params,
media.names, media.modules, media.params,
sales.module, sales.params,
ping) {
# Initialize data with the starting.dts provided.
all.dt <- data.table::copy(starting.dts)
# Handle the special case: no new data to generate.
if (time.n <= length(all.dt)) {
return(all.dt[1:time.n])
}
# Generate data for each time interval sequentially.
# Initialize curr.dt variable to store the most recent values for each
# variable.
t.start <- length(all.dt) + 1L
if (t.start == 1) {
curr.dt <- InitStateData(time.index = 0)
} else {
curr.dt <- data.table::copy(all.dt[[t.start - 1]])
}
# Iteratively simulate data for each time interval.
for (iter.t in t.start:time.n) {
if ((iter.t %% ping) == 1) {
print(paste("Simulating data. t =", iter.t))
}
# Run the natural migration module.
do.call(nat.mig.module, c(list(curr.dt), nat.mig.params))
# Run the media modules.
for (iter.media in 1:length(media.names)) {
existing.var.names <- data.table::copy(names(curr.dt))
do.call(media.modules[[iter.media]],
c(list(curr.dt), media.params[[iter.media]]))
# Since the module updated the data.table without specifying the media
# name, variables such as "volume" must be updated to "tv.volume".
var.names <- setdiff(names(curr.dt), existing.var.names)
new.var.names <- PasteD(media.names[iter.media], var.names)
# Delete data from the previous time interval to avoid duplicate columns.
if (iter.t > 1) {
curr.dt[, (new.var.names) := NULL]
}
setnames(curr.dt, var.names, new.var.names)
}
curr.dt[,
total.spend :=
EvalText(paste(PasteD(media.names, "spend"),
collapse = " + "),
curr.dt)]
# Run the sales module.
do.call(sales.module, c(list(curr.dt), sales.params))
# Save progress, and stop if done.
all.dt[[iter.t]] <- copy(curr.dt)
}
return(all.dt)
}
#' Initialize data.
#'
#' Initialize the updating \code{data.table} containing current data on each
#' population segment.
#'
#' @param time.index time index to intialize the data table to
#' @return data.table with one column for each state variable (market,
#' satiation, activity, brand) and one row per each valid combination of
#' states for the above state variables. Also initializes the population
#' count at timepoint 1 as 0.
#' @keywords internal
InitStateData <- function(time.index = 0) {
# Create a copy of the data.table listing all population segments.
data.dt <- data.table::copy(kAllStates)
# Set the time index, geo index, and population size
data.dt[, time.index := time.index][, pop := 0]
return(data.dt)
}
#' Surface observable data.
#'
#' Extract observable data from the full dataset.
#'
#' @param full.data \code{data.table} of full data. each row contains
#' information for a particular timepoint + hidden state
#' @param names.const character names of variables with constant values over
#' the state, to be aggregated by taking the first entry.
#' @param names.sum character names of variables with numeric values, to be
#' aggregated by \code{sum()}.
#' @return observable data, aggregated over the hidden states as specified.
#' Variables not included in names.const or names.sum are assumed to be
#' hidden variables and not surfaced.
#' @keywords internal
SurfaceData <- function(full.data, names.const, names.sum) {
# Check the full.data argument.
full.data <- rbindlist(full.data, use.names = TRUE)
data.table::setkeyv(full.data,
c("time.index", colnames(kAllStates)))
# The data.table keys should be unique
assertthat::assert_that(identical(full.data, unique(full.data)))
# Check the arguments specifying names of variables.
if (missing(names.const) || is.null(names.const)) {
names.const <- c(
grep("price|budget", names(full.data), value = TRUE))
}
if (missing(names.sum) || is.null(names.sum)) {
names.sum <- c(grep(paste0("revenue$|^profit$|\\.sales$|",
"\\.volume$|",
"\\.spend$|\\.imps$|\\.clicks$"),
names(full.data), value = TRUE))
}
assertthat::assert_that(
identical(character(), intersect(names.const, names.sum)),
msg = "The same variable cannot be aggregated in multiple ways.")
if (!identical(character(),
setdiff(names.const,
setdiff(names(full.data), key(full.data))))) {
warning(paste("Cannot aggregate nonexistent or index columns.",
"Removing from names.const."))
names.const <- intersect(names(full.data), names.const)
}
if (!identical(character(),
setdiff(names.sum,
setdiff(names(full.data), key(full.data))))) {
warning(paste("Cannot aggregate nonexistent or index columns.",
"Removing from names.sum."))
names.sum <- intersect(names(full.data), names.sum)
}
# Perform the data aggregation.
if (identical(character(), names.sum)) {
vals.sum <-
unique(full.data[, "time.index", with = FALSE])
} else {
vals.sum <- full.data[, lapply(.SD, sum),
by = "time.index",
.SDcols = names.sum]
}
if (identical(character(), names.const)) {
vals.const <-
unique(full.data[, "time.index", with = FALSE])
} else {
vals.const <- full.data[, lapply(.SD, function(x) x[1]),
by = "time.index",
.SDcols = names.const]
}
surfaced.data <- vals.sum[vals.const]
return(surfaced.data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.