#' This function creates a simulated BLP dataset.
#'
#' @param nmkt number of markets
#' @param nbrn number of products
#' @param Xlin character vector specifying the set of linear variables
#' @param Xexo character vector specifying the set of exogenous variables (subset of \code{Xlin})
#' @param Xrandom character vector specifying the set of random coefficients (subset of \code{Xlin})
#' @param instruments character vector specifying the set of instrumental variables
#'
#' @param true.parameters list with parameters of the DGP
#' \describe{
#' \item{\code{Xlin.true.except.price}}{"true" linear coefficients in utility function except price}
#' \item{\code{Xlin.true.price}}{"true" linear price coefficient in utility function}
#' \item{\code{Xrandom.true}}{"true" set of random coefficients}
#' \item{\code{instrument.effects}}{"true" coefficients of instrumental variables to explain endogenous price}
#' \item{\code{instrument.Xexo.effects}}{"true" coefficients of exogenous variables to explain endogenous price} }
#'
#' @param price.endogeneity list with arguments of the multivariate normal
#' distribution
#' \describe{
#' \item{\code{mean.xi}}{controls for the mean of the error term in the utility function}
#' \item{\code{mean.eita}}{controls for the mean of the error term in the price function}
#' \item{\code{cov}}{controls for the covariance of \code{xi} and \code{eita} }}
#'
#' @param printlevel 0 (no output) 1 (summary of generated data)
#' @param seed seed for the random number generator
#'
#' @return Returns a simulated BLP dataset.
#'
#' @details The dataset is balanced, so every market has the same amount of products.
#' Only unobserved heterogeneity can be considered.
#' Variables that enter the equation as a Random Coefficient or
#' exogenously must be included in the set of linear variables.
#' The \code{parameter.list} argument specifies the "true" effect on the
#' individual utility for each component. Prices are generated endogenous
#' as a function of exogenous variables and instruments, where the
#' respective effect sizes are specified in \code{instrument.effects}
#' and \code{instrument.Xexo.effects}. Error terms \code{xi} and \code{eita}
#' are drawn from a multivariate normal distribution, whose
#' parameters can be set in \code{price.endogeneity}. Market shares
#' are generated by MLHS integration rule with 10000 nodes.
#'
#' @export
#'
#' @examples
#' K<-2 #number of random coefficients
#' Xlin_example <- c("price", "x1", "x2", "x3", "x4", "x5")
#' Xexo_example <- c("x1", "x2", "x3", "x4", "x5")
#' Xrandom_example <- paste0("x",1:K)
#' instruments_example <- paste0("iv",1:10)
#' data <- get.BLP.dataset(nmkt = 25,
#' nbrn = 20,
#' Xlin = Xlin_example,
#' Xexo = Xexo_example,
#' Xrandom = Xrandom_example,
#' instruments = instruments_example,
#' true.parameters = list(Xlin.true.except.price = rep(0.2,5),
#' Xlin.true.price = -0.2,
#' Xrandom.true = rep(2,K),
#' instrument.effects = rep(2,10),
#' instrument.Xexo.effects = rep(1,5)),
#' price.endogeneity = list( mean.xi = -2,
#' mean.eita = 0,
#' cov = cbind( c(1,0.7), c(0.7,1))),
#' printlevel = 0, seed = 234234 )
#'
#'
get.BLP.dataset <- function( nmkt, nbrn,
Xlin, Xexo, Xrandom, instruments,
true.parameters = list(),
price.endogeneity = list(),
printlevel=1 , seed)
{
## input checks ----
### Existence check of necessary arguments
# (collecting all arguments as a list, implicitly requires the arguments to be non-empty,
# and lapply checks whether list arguments are non empty, i.e. are null)
toBeTested<- list("nmkt" = nmkt,
"nbrn" = nbrn,
"Xlin" = Xlin,
"Xexo" = Xexo,
"Xrandom"= Xrandom,
"instruments" = instruments,
"true.parameters$Xlin.true.except.price" = true.parameters$Xlin.true.except.price,
"true.parameters$Xlin.true.price" = true.parameters$Xlin.true.price,
"true.parameters$Xrandom.true" = true.parameters$Xrandom.true,
"true.parameters$instrument.effects" = true.parameters$instrument.effects,
"true.parameters$instrument.Xexo.effects" = true.parameters$instrument.Xexo.effects,
"price.endogeneity$mean.xi" = price.endogeneity$mean.xi,
"price.endogeneity$mean.eita" = price.endogeneity$mean.eita,
"price.endogeneity$cov" = price.endogeneity$cov)
names_toBeTested <- names(toBeTested)
lapply(names_toBeTested,
function(i){
if(is.null(toBeTested[[i]])) {
stop(paste("Argument",i,"is missing."))
} }
)
### Content check
if (is.numeric(seed) & (length(seed)>0) )
{
set.seed(seed)
} else
{
seed <- NA
}
if (!("price" %in% Xlin))
stop("Linear parameters must include a variable named *price* . ")
if (!all(Xexo %in% Xlin) || !all(Xrandom %in% Xlin))
stop("Linear parameters must include random coefficients and exogenous variables.")
if (length(Xlin) != (length(true.parameters$Xlin.true.except.price) +
1))
stop("Number of linear parameters and true effects must match.")
if (length(Xrandom) != length(true.parameters$Xrandom.true))
stop("Number of random coefficients and true effects must match.")
if (length(instruments) != length(true.parameters$instrument.effects))
stop("Number of instruments and true effects must match.")
## intializing vectors ----
nobs <- nmkt * nbrn
nlin <- length(Xlin) # number of lin. parameters
ninst <- length(instruments) # number of instruments
K <- length(true.parameters$Xrandom.true)
cdid <- sort(rep(1:nmkt, nbrn))
cdindex <- c(0, cumsum(table(cdid)))
total.demographics <- 0
## eita & xi ----
cov.xi.eita <- price.endogeneity$cov
random.rv <- cbind(rnorm(nobs), rnorm(nobs))
choleski <- t(chol(cov.xi.eita))
xi.eita <- t(apply( random.rv , 1 ,function(x) choleski %*% x) )
xi <- xi.eita[, 1] + price.endogeneity$mean.xi
eita <- xi.eita[, 2] + price.endogeneity$mean.eita
## generate data matrices ----
instruments.data <- vapply(instruments, function(x) runif(nobs, 0, 2), numeric(nobs)) #;% cost shifters
Xlin.data <- vapply(Xlin, function(x) runif(nobs, 0, 2), numeric(nobs))
Xlin.data[, "price"] <- NA # %; price is generated as a dependent variable of instruments and Xexo
Xexo.data <- vapply(Xexo, function(x) Xlin.data[, x], numeric(nobs))
Xlin.data[, "price"] <- instruments.data %*% true.parameters$instrument.effects +
Xexo.data %*% true.parameters$instrument.Xexo.effects + eita
Xrandom.data <- vapply(Xrandom, function(x) Xlin.data[, x], numeric(nobs))
# true marketshares ----
integration.list <- get.integration.input(dim = K, method = "MLHS",
amount_nodes = 10000, nmkt = nmkt, seed = seed)
deltatrue <- c(Xlin.data[, "price"] * true.parameters$Xlin.true.price +
Xlin.data[, -which(Xlin == "price"), drop = FALSE] %*% matrix(true.parameters$Xlin.true.except.price) +
xi)
deltatrue.exp <- exp(deltatrue)
theta2.matrix <- matrix(true.parameters$Xrandom.true) # ;% could also include demographic effects in subsequent columns
expmu <- getExpMu(theta2.matrix, integration.list$nodes,
Xrandom.data, cdid, demographics = matrix(NA))
sij <- getSij(expmu,
deltatrue.exp,
cdindex)
shares <- c(sij %*% matrix(integration.list$weights))
## message and output ----
if (printlevel > 1)
{
cat("You are now working with:", "\n", K, "random coefficient(s) with true values: ",
true.parameters$Xrandom.true, ",\n", total.demographics,
"demographics, \n", nmkt, "markets,", "\n", nbrn,
"products and", "\n", ninst, "instruments.", "\n",
"The following variables enter the problem linearly: ",
Xlin, "\n", "Exogenous (i.e. no correlation to the structural error term) variables are:",
Xexo, "\n", "Variables that are used as random coefficient: ",
Xrandom, "\n")
}
BLP.data <- cbind(Xlin.data, instruments.data, cdid, shares,
deltatrue)
BLP.dataframe <- data.frame(BLP.data)
return(BLP.dataframe)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.