# Copyright 2019 Google LLC.
# SPDX-License-Identifier: Apache-2.0
# CBC data handling and creation routines (other than import-export!)
# FUNCTIONS:
# generateMNLrandomTab() : Create a design matrix from a list of attributes
# pickMNLwinningCards() : Given utilities, choose the winning tasks from design matrix
# generateRNDpws() : Create random utilities for simulated respondents
# fillCBCwinners() :
# expandCBCwinners() :
#############################################
# generateMNLrandomTab()
#############################################
# function(attrLevels, cards=3, respondents=200, trials=12,
# balanced.sample=TRUE, best.of=100, verbose=TRUE, no.output=FALSE)
#
# Create a "TAB" style CBC design
#
# WARNING: for didactic & simulation rather than survey purposes. Designs are
# slightly optimized for level balance, but not for other concerns such as
# D-efficiency and optimal balanced overlap.
#
# PARAMETERS
# attrLevels = list of how many levels to create for each attribute
# (length = # of attributes; each entry = # of levels)
# cards = concepts shown per CBC task
# respondents = how many sets of trials to generate
# trials = how many trials to generate for each "respondent"
# balanced.sample = whether to minimize reuse of concept levels
# TRUE = No overlap within trials (if possible). Do not resample
# a level within attribute in a trial
# FALSE = Just generate cards randomly with level resampling
# according to uniform distribution
# best.of = how many designs to evaluate; function will return the single
# best-balanced design among them
# verbose = show output as it goes
# no.output = whether to run silently
#
# NOTE:
# in v0.2, all respondents must have the same number of cards and trials
# see "estimateMNLfromDesign()" for more details
#
# SAMPLE CODE
# a design with 10 attributes of 3-7 levels
# tmp.attrs <- c(4,3,4,5,5,3,4,5,2,7)
# create random CBC cards for that design
# tmp.tab <- generateMNLrandomTab(tmp.attrs, respondents=200, trials=8)
# convert those to a design-coded dummy matrix
# tmp.des <- convertSSItoDesign(tmp.tab)
#
#' Simple experimental design for choice-based conjoint analysis
#'
#' @description
#' Create an experimental design for an exactly rectangular version of
#' choice-based conjoint analysis (same number of concepts and trials
#' for every respondent).
#' This function is recommended primarily for simple surveys and
#' didactic purposes; it does not do advanced optimization.
#' It will attempt to find level balance through iterative selection across
#' multiple randomized designs.
#'
#' @param attrLevels A vector of integers, where each integer is the number of
#' levels for its respective attribute. For example, if a product has 3
#' brands, 4 performance levels, and 3 price points, this would be
#' \code{c(3, 4, 3)}.
#' @param cards The number of concepts to show at one time.
#' @param respondents The number of designs to generate.
#' @param trials How many tasks will be given to each respondent
#' @param balanced.sample Whether to attempt to do level balancing across
#' attributes, i.e., to have fewer duplications of any level on a given
#' task.
#' @param best.of How many iterations to consider before selecting the
#' optimized one.
#' @param verbose Outputs more information as it works.
#' @param no.output Whether to suppress all output.
#'
#' @returns A data frame for the experimental design, with a row for
#' each respondent * trial * concept, with columns for the attribute levels.
#'
#' @seealso [generateRNDpws] to create a set of random utilities for
#' a specific attribute list, [pickMNLwinningCards] to "answer" the survey
#' design according to utilities.
#'
generateMNLrandomTab <- function(attrLevels, cards=3, respondents=200,
trials=12, balanced.sample=TRUE, best.of=50,
verbose=TRUE, no.output=FALSE)
{
cat("Searching for a balanced design ...\n")
# create a matrix to hold the result
rnd.tab <- matrix(0,ncol=length(attrLevels),nrow=cards*respondents*trials)
# and figure the starting point for a "best" design
att.base <- 1/rep(attrLevels,attrLevels) # ideal balance proportions
hold.mss <- sum(att.base^2)
# and one to hold trials along the way
rnd.trial <- matrix(0,ncol=length(attrLevels),nrow=cards*respondents*trials)
for (i in 1:best.of)
{
# generate a candidate trial
# try to balance the attribute levels ?
if (balanced.sample) {
# figure out which attr levels can be balanced and which can't
which.balanced <- which(attrLevels >= cards)
which.replace <- which(attrLevels < cards)
# generate the columns that can be balanced
# (sample without replacement from attribute levels)
if (any(which.balanced)) {
# generate balanced sets for every combination where possible
for (ii in 1:(respondents*trials))
{
samp.row <- sapply(attrLevels[which.balanced],
sample, cards, replace=FALSE)
rnd.trial[((ii-1)*cards+1):(ii*cards), which.balanced] <- samp.row
}
}
# and now the columns that can't balance (sample with replacement)
if (any(which.replace)) {
samp.replace <- sapply(attrLevels[which.replace],
sample, cards*trials*respondents,replace=TRUE)
rnd.trial[,which.replace] <- samp.replace
}
} else {
# just generate them all randomly
rnd.trial <- sapply(attrLevels,
sample, cards*trials*respondents, replace=TRUE)
}
# is the candidate better than the previous candidate ?
# convert it to a design matrix
new.des <- convertSSItoDesign(as.data.frame(rnd.trial), no.output=TRUE)
# calculate how much it differs from the ideal and compare to held trial
new.mss <- sum((colMeans(new.des)-att.base)^2)
if (new.mss < hold.mss) {
rnd.tab <- rnd.trial
hold.mss <- new.mss
if (verbose & (i>1)) {
cat("Improved design found on trial: ", i ," SSE = " , new.mss , "\n")
}
}
}
rnd.tab <- data.frame(rnd.tab)
if (!is.null(names(attrLevels))) {
names(rnd.tab) <- names(attrLevels)
}
return(rnd.tab)
}
#############################################
# pickMNLwinningCards
#############################################
# function(design.mat, pws=rep(0, ncol(design.mat)), cards=3,
# verbose=TRUE, vec.format="WIN")
#
# Given a design matrix and vector of part worths, returns a vector of which
# card wins each comparison set
# NOTE: In v0.2, this function is primarily for test purposes -- it only uses
# aggregate level part worths, plus optional random noise
#
# PARAMETERS
# design.mat : the design matrix in dummy-coded (0/1) format
# pws : list of part worths to use
# NOTE: currently uses aggregate-level PWs only
# cards : number of cards per trial
# noise : add randomness into the choices?
# p.noise : fraction of choices that will be noisy, if noise==TRUE
# verbose : output more as it goes
# vec.format : format of the output, either "WIN" == 0/1 | "ANS" = 1,2,3, etc
#
# OUTPUT
# a vector of the winning cards for the design
#' Choose the winning concepts in a CBC design matrix, according to utilities
#'
#' @param design.mat A design matrix as created by \code{generateMNLrandomTab}
#' @param pws A vector of utility coefficients for each column in
#' \code{design.mat}. If missing, these are set to 0 and all winners will be
#' drawn randomly.
#' @param cards How many concepts are seen on each trial in the design matric.
#' @param noise Whether to randomly make some choices (instead of using the
#' utilties)
#' @param p.noise The odds of randomly choosing a winner (instead of using
#' the utilities)
#' @param use.MNL Not used currently, placeholder for future.
#' @param verbose Whether to output information as it runs.
#' @param vec.format "WIN" will mark the winning row in the design matrix
#' with a "1" and all others with "0". This would match a typical regression
#' model to estimate coefficients from the observations.
#' "ANS" will put the row number of the winning concept into the rows for
#' each trial; some other software expects that format.
#'
#' @return A vector identifying which concept won each trial, according to the
#' design matrix and supplied utilities.
#' @seealso [generateMNLrandomTab] to create a design matrix,
#' [generateRNDpws] to create a random set of utilities.
#'
pickMNLwinningCards <- function(design.mat, pws=rep(0, ncol(design.mat)),
cards=3, noise=FALSE, p.noise=0.3,
use.MNL=TRUE, ## to do -- choose according to MNL roulette
verbose=TRUE, vec.format="WIN")
{
# how many sets of comparisons are there?
n.trial <- nrow(design.mat)/cards
vec.win <- rep(0, n.trial)
## pws.mat <- as.matrix(pws) ## ** TO DO: handle vector OR a matrix
pws.mat <- pws
# iterate over every set of comparisons and find the winner
# ** TO DO
# ** would be nice to vectorize this -- [chris] see preference share prediction routines in portfolio model code
for (i in 1:n.trial)
{
card.mat <- design.mat[((i-1)*cards+1):(i*cards),]
card.util <- exp(pws.mat %*% t(card.mat))
which.win <- which(card.util==max(card.util))
if (noise) {
if (runif(1) < p.noise) {
which.win <- sample(1:cards, 1)
}
}
if (length(which.win) > 1) {
which.win <- sample(which.win, 1) # choose among multiple winners randomly
}
vec.win[i] <- which.win
if (verbose) {
if (i/2000 == floor(i/2000)) {
cat("Processing trial: ",i,"\n")
}
}
}
# expand this to the desired format (WIN==1/0 format by card; ANS=card number)
if (vec.format == "WIN") { # cards marked 1/0 according to whether they won or not
vec.tmp <- ifelse(rep(vec.win,each=cards)==rep(c(1:cards),n.trial),1,0)
vec.win <- vec.tmp
} else if (vec.format == "ANS") {
vec.tmp <- rep(vec.win,each=cards)
vec.win <- vec.tmp
} else {
warning("Parameter 'vec.format' specified incorrectly (should be 'WIN' or 'ANS'). Raw card list returned (1 card per set).")
}
return(vec.win)
}
#############################################
# generateRNDpws
#############################################
# function(attrs)
#
# given a vector of attributes/features, create random zero-sum part worths
#
# PARAMETERS
# attrs : a vector where each element represents how many levels there are for the corresponding attribute
# e.g., c(2,3,2) would be 3 attributes, with 2 levels, 3 levels, and 2 levels respectively
# mu : mean of the randomly drawn part-worths, if you want to ensure some of them are far from zero
# of course the return vector will still be zero-sum
# stdev : sd of the randomly drawn part-worths, if you want to ensure some different scale factor
#
# OUTPUT
# a vector of zero-centered (within attribute) random normal partworths (mean=mu, sd=stdev)
#
#' Generate some zero-centered part worths (utilities) for CBC design
#'
#' @param attrs A vector with the attribute/level design, as noted for
#' \code{generateMNLrandomTab}#'
#' @param mu The mean of the random utilities
#' @param sdev Standard deviation of the utilities
#'
#' @returns A vector of random values that sum to 0 for each subset of values
#' that correspond to one attribute in a CBC attribute/level design.
#'
#' @seealso [generateMNLrandomTab] to create a design matrix,
#' [pickMNLwinningCards] to select the winning concepts from it, given
#' some set of utilities (as from this function).
generateRNDpws <- function(attrs, mu=0, sdev=1)
{
pw.vec <- NULL
for (i in 1:length(attrs))
{
if (attrs[i] > 1) { # more than 1 feature so OK to generate
pw.car <- rnorm(attrs[i]-1, mu, sdev)
pw.cdr <- -1*sum(pw.car) # the final pw needed to make zero-sum
pw.vec <- c(pw.vec, sample(c(pw.car, pw.cdr))) # shuffle and add to list
} else if (attrs[1] < 1) { # attribute with <1 level
warning("Attribute level is missing (# features < 1).")
} else { # exactly 1 level so PW must be 0
pw.vec <- c(pw.vec, 0)
}
}
return(pw.vec)
}
#########################
# fillCBCwinners()
#########################
#
# utility function. In a vector of CBC choices, replaces any NAs with random choices
#
# PARAMETERS
# win.cards = vector of CBC winning cards, e.g., c(1,3,2,3,2,1,1,3,2...)
# OUTPUT
# vector with NA values replaced by random choices in [1..max(win.card)]
#
fillCBCwinners <- function(win.cards) {
if (any(!is.na(win.cards))) {
win.cards[is.na(win.cards)] <- sample(max(win.cards, na.rm=T), sum(is.na(win.cards)), replace=TRUE)
} else {
warning ("All cards in fillCBCwinners are NA. Assuming cards per trial = 3.")
win.cards[is.na(win.cards)] <- sample(3, sum(is.na(win.cards)), replace=TRUE)
}
return(win.cards)
}
#########################
# expandCBCwinners()
#########################
#
# utility function. In a vector of CBC choices, replaces any NAs with random choices
#
# PARAMETERS
# win.cards = vector of CBC winning cards, e.g., c(1,3,2,3,2,1,1,3,2...)
# cards = how many cards per trial
# fill = whether to replace NA values (missing responses) with random responses
# OUTPUT
# vector marking each concept as 0/1 for lost/won, of length (cards*length(win.cards))
#
expandCBCwinners <- function(win.cards, cards=3, fill=TRUE) {
if (fill) {
win.cards <- fillCBCwinners(win.cards)
}
win.exp <- rep(0, length(win.cards)*cards) # vector to hold card winners. by default all choices are "no"
win.ind <- seq(from=0, to=length(win.exp)-1, by=3) + win.cards # the 1/0 location of just the winning cards
win.exp[win.ind] <- 1 # set the winners
return(win.exp)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.