R/conjoint.R

Defines functions conjoint

Documented in conjoint

#' Conjoint analysis
#'
#' @details See \url{https://radiant-rstats.github.io/docs/multivariate/conjoint.html} for an example in Radiant
#'
#' @param dataset Dataset
#' @param rvar The response variable (e.g., profile ratings)
#' @param evar Explanatory variables in the regression
#' @param int Interaction terms to include in the model
#' @param by Variable to group data by before analysis (e.g., a respondent id)
#' @param reverse Reverse the values of the response variable (`rvar`)
#' @param data_filter Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000")
#' @param envir Environment to extract data from
#'
#' @return A list with all variables defined in the function as an object of class conjoint
#'
#' @examples
#' conjoint(mp3, rvar = "Rating", evar = "Memory:Shape") %>% str()
#'
#' @seealso \code{\link{summary.conjoint}} to summarize results
#' @seealso \code{\link{plot.conjoint}} to plot results
#'
#' @export
conjoint <- function(dataset, rvar, evar,
                     int = "", by = "none",
                     reverse = FALSE, data_filter = "",
                     envir = parent.frame()) {
  vars <- c(rvar, evar)
  if (by != "none") vars <- c(vars, by)
  df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset))
  dataset <- get_data(dataset, vars, filt = data_filter, envir = envir)
  radiant.model::var_check(evar, colnames(dataset)[-1], int) %>%
    (function(x) {
      vars <<- x$vars
      evar <<- x$ev
      int <<- x$intv
    })

  ## in case : was used to select a range of variables
  # evar <- colnames(dataset)[-1]
  if (!is.empty(by, "none")) {
    evar <- base::setdiff(evar, by)
    vars <- base::setdiff(vars, by)
    bylevs <- dataset[[by]] %>%
      as_factor() %>%
      levels()
    model_list <- vector("list", length(bylevs)) %>% set_names(bylevs)
  } else {
    bylevs <- "full"
    model_list <- list(full = list(model = NA, coeff = NA, tab = NA))
  }

  formula <- paste(rvar, "~", paste(vars, collapse = " + ")) %>% as.formula()

  for (i in seq_along(bylevs)) {
    if (!by == "none") {
      cdat <- filter(dataset, .data[[by]] == bylevs[i]) %>%
        select_at(.vars = base::setdiff(colnames(dataset), by))
    } else {
      cdat <- dataset
    }

    if (reverse) {
      cdat[[rvar]] <- cdat[[rvar]] %>%
        (function(x) (max(x) + 1) - x)
    }

    model <- sshhr(lm(formula, data = cdat))
    coeff <- tidy(model) %>%
      na.omit() %>%
      as.data.frame()
    tab <- the_table(coeff, cdat, evar)

    coeff$sig_star <- sig_stars(coeff$p.value) %>%
      format(justify = "left")
    colnames(coeff) <- c("label", "coefficient", "std.error", "t.value", "p.value", "sig_star")
    hasLevs <- sapply(select(dataset, -1), function(x) is.factor(x) || is.logical(x) || is.character(x))
    if (sum(hasLevs) > 0) {
      for (j in names(hasLevs[hasLevs])) {
        coeff$label %<>% gsub(paste0("^", j), paste0(j, "|"), .) %>%
          gsub(paste0(":", j), paste0(":", j, "|"), .)
      }
      rm(j, hasLevs)
    }
    model_list[[bylevs[i]]] <- list(model = model, coeff = coeff, tab = tab)
  }

  ## creating PW and IW data.frames
  if (!is.empty(by, "none")) {
    cn <- gsub("\\|", "_", model_list[[1]]$coeff$label) %>%
      gsub("[^A-z0-9_\\.]", "", .)

    PW <- matrix(NA, nrow = length(bylevs), ncol = length(cn) + 2) %>%
      as.data.frame(stringsAsFactors = FALSE) %>%
      set_colnames(c(by, ".Rsq", cn))
    PW[[by]] <- bylevs

    for (i in seq_along(bylevs)) {
      PW[i, 2] <- glance(model_list[[bylevs[i]]]$model)$r.squared
      PW[i, 3:ncol(PW)] <- model_list[[bylevs[i]]]$coeff$coefficient
    }

    ## creating IW data.frame
    cn <- model_list[[1]]$tab$IW$Attribute %>%
      gsub("[^A-z0-9_\\.]", "", .)

    IW <- matrix(NA, nrow = length(bylevs), ncol = length(cn) + 2) %>%
      as.data.frame(stringsAsFactors = FALSE) %>%
      set_colnames(c(by, ".Rsq", cn))
    IW[[by]] <- bylevs

    for (i in seq_along(bylevs)) {
      IW[i, 2] <- glance(model_list[[bylevs[i]]]$model)$r.squared
      IW[i, 3:ncol(IW)] <- model_list[[bylevs[i]]]$tab$IW$IW
    }
    rm(cn)
  }

  rm(model, coeff, tab, envir)

  as.list(environment()) %>% add_class("conjoint")
}

#' Summary method for the conjoint function
#'
#' @details See \url{https://radiant-rstats.github.io/docs/multivariate/conjoint.html} for an example in Radiant
#'
#' @param object Return value from \code{\link{conjoint}}
#' @param show Level in by variable to analyze (e.g., a specific respondent)
#' @param mc_diag Shows multicollinearity diagnostics.
#' @param additional Show additional regression results
#' @param dec Number of decimals to show
#' @param ... further arguments passed to or from other methods
#'
#' @examples
#' result <- conjoint(mp3, rvar = "Rating", evar = "Memory:Shape")
#' summary(result, mc_diag = TRUE)
#'
#' @seealso \code{\link{conjoint}} to generate results
#' @seealso \code{\link{plot.conjoint}} to plot results
#'
#' @importFrom car vif
#'
#' @export
summary.conjoint <- function(object, show = "", mc_diag = FALSE,
                             additional = FALSE, dec = 3, ...) {
  cat("Conjoint analysis\n")
  cat("Data                 :", object$df_name, "\n")
  if (!is.empty(object$data_filter)) {
    cat("Filter               :", gsub("\\n", "", object$data_filter), "\n")
  }
  if (object$by != "none") {
    cat("Show                 :", object$by, "==", show, "\n")
  }
  rvar <- if (object$reverse) paste0(object$rvar, " (reversed)") else object$rvar
  cat("Response variable    :", rvar, "\n")
  cat("Explanatory variables:", paste0(object$evar, collapse = ", "), "\n\n")

  if (object$by == "none" || is.empty(show) || !show %in% names(object$model_list)) {
    show <- names(object$model_list)[1]
  }

  tab <- object$model_list[[show]]$tab
  cat("Conjoint part-worths:\n")
  tab$PW[, 1:2] %<>% format(justify = "left")
  print(format_df(tab$PW, dec), row.names = FALSE)
  cat("\nConjoint importance weights:\n")
  tab$IW[, 1:2] %<>% format(justify = "left")
  print(format_df(tab$IW, dec), row.names = FALSE)
  cat("\nConjoint regression results:\n\n")

  coeff <- object$model_list[[show]]$coeff
  coeff$label %<>% format(justify = "left")
  if (!additional) {
    coeff[, 2] %<>% (function(x) sprintf(paste0("%.", dec, "f"), x))
    print(dplyr::rename(coeff[, 1:2], `  ` = "label"), row.names = FALSE)
    cat("\n")
  } else {
    if (all(coeff$p.value == "NaN")) {
      coeff[, 2] %<>% (function(x) sprintf(paste0("%.", dec, "f"), x))
      print(dplyr::rename(coeff[, 1:2], `  ` = "label"), row.names = FALSE)
      cat("\nInsufficient variation in explanatory variable(s) to report additional statistics")
      return()
    } else {
      p.small <- coeff$p.value < .001
      coeff[, 2:5] %<>% format_df(dec)
      coeff$p.value[p.small] <- "< .001"
      print(dplyr::rename(coeff, `  ` = "label", ` ` = "sig_star"), row.names = FALSE)
    }

    model <- object$model_list[[show]]$model

    if (nrow(model$model) <= (length(object$evar) + 1)) {
      return("\nInsufficient observations to estimate model")
    }

    ## adjusting df for included intercept term
    df_int <- if (attr(model$terms, "intercept")) 1L else 0L

    reg_fit <- glance(model) %>% round(dec)
    if (reg_fit["p.value"] < .001) reg_fit["p.value"] <- "< .001"
    cat("\nSignif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\n")
    cat("R-squared:", paste0(reg_fit$r.squared, ", "), "Adjusted R-squared:", reg_fit$adj.r.squared, "\n")
    cat("F-statistic:", reg_fit$statistic, paste0("df(", reg_fit$df - df_int, ",", reg_fit$df.residual, "), p.value"), reg_fit$p.value)
    cat("\nNr obs:", format_nr(reg_fit$df + reg_fit$df.residual, dec = 0), "\n\n")

    if (anyNA(model$coeff)) {
      cat("The set of explanatory variables exhibit perfect multicollinearity.\nOne or more variables were dropped from the estimation.\n")
    }
  }

  if (mc_diag) {
    if (length(object$evar) > 1) {
      cat("Multicollinearity diagnostics:\n")
      car::vif(object$model_list[[show]]$model) %>%
        (function(x) if (!dim(x) %>% is.null()) x[, "GVIF"] else x) %>% # needed when factors are included
        data.frame(
          VIF = .,
          Rsq = 1 - 1 / .,
          stringsAsFactors = FALSE
        ) %>%
        round(dec) %>%
        .[order(.$VIF, decreasing = T), ] %>%
        (function(x) if (nrow(x) < 8) t(x) else x) %>%
        print()
    } else {
      cat("Insufficient number of attributes selected to calculate\nmulticollinearity diagnostics")
    }
  }
}

#' Predict method for the conjoint function
#'
#' @details See \url{https://radiant-rstats.github.io/docs/multivariate/conjoint.html} for an example in Radiant
#'
#' @param object Return value from \code{\link{conjoint}}
#' @param pred_data Provide the dataframe to generate predictions. The dataset must contain all columns used in the estimation
#' @param pred_cmd Command used to generate data for prediction
#' @param conf_lev Confidence level used to estimate confidence intervals (.95 is the default)
#' @param se Logical that indicates if prediction standard errors should be calculated (default = FALSE)
#' @param interval Type of interval calculation ("confidence" or "prediction"). Set to "none" if se is FALSE
#' @param dec Number of decimals to show
#' @param envir Environment to extract data from
#' @param ... further arguments passed to or from other methods
#'
#' @seealso \code{\link{conjoint}} to generate the result
#' @seealso \code{\link{summary.conjoint}} to summarize results
#' @seealso \code{\link{plot.conjoint}} to plot results
#'
#' @examples
#' result <- conjoint(mp3, rvar = "Rating", evar = "Memory:Shape")
#' predict(result, pred_data = mp3)
#'
#' @importFrom radiant.model predict_model
#'
#' @export
predict.conjoint <- function(object, pred_data = NULL, pred_cmd = "",
                             conf_lev = 0.95, se = FALSE,
                             interval = "confidence", dec = 3,
                             envir = parent.frame(), ...) {
  if (is.character(object)) {
    return(object)
  }
  if (!isTRUE(se)) {
    interval <- "none"
  } else if (isTRUE(interval == "none")) {
    se <- FALSE
  }

  ## ensure you have a name for the prediction dataset
  if (is.data.frame(pred_data)) {
    df_name <- deparse(substitute(pred_data))
  } else {
    df_name <- pred_data
  }

  pfun <- function(model, pred, se, conf_lev) {
    pred_val <-
      try(
        sshhr(
          predict(model, pred, interval = ifelse(se, interval, "none"), level = conf_lev)
        ),
        silent = TRUE
      )

    if (!inherits(pred_val, "try-error")) {
      if (se) {
        pred_val %<>% data.frame(stringsAsFactors = FALSE) %>% mutate(diff = .[, 3] - .[, 1])
        ci_perc <- ci_label(cl = conf_lev)
        colnames(pred_val) <- c("Prediction", ci_perc[1], ci_perc[2], "+/-")
      } else {
        pred_val %<>% data.frame(stringsAsFactors = FALSE) %>% select(1)
        colnames(pred_val) <- "Prediction"
      }
    }

    pred_val
  }

  if (is.empty(object$by, "none")) {
    object$model <- object$model_list[["full"]]$model
    predict_model(object, pfun, "conjoint.predict", pred_data, pred_cmd, conf_lev, se, dec, envir = envir) %>%
      set_attr("radiant_interval", interval) %>%
      set_attr("radiant_pred_data", df_name)
  } else {
    predict_conjoint_by(object, pfun, pred_data, pred_cmd, conf_lev, se, dec, envir = envir) %>%
      set_attr("radiant_interval", interval) %>%
      set_attr("radiant_pred_data", df_name)
  }
}

#' Predict method for the conjoint function when a by variables is used
#'
#' @details See \url{https://radiant-rstats.github.io/docs/multivariate/conjoint.html} for an example in Radiant
#'
#' @param object Return value from \code{\link{conjoint}}
#' @param pfun Function to use for prediction
#' @param pred_data Name of the dataset to use for prediction
#' @param pred_cmd Command used to generate data for prediction
#' @param conf_lev Confidence level used to estimate confidence intervals (.95 is the default)
#' @param se Logical that indicates if prediction standard errors should be calculated (default = FALSE)
#' @param dec Number of decimals to show
#' @param envir Environment to extract data from
#' @param ... further arguments passed to or from other methods
#'
#' @seealso \code{\link{conjoint}} to generate the result
#' @seealso \code{\link{summary.conjoint}} to summarize results
#' @seealso \code{\link{plot.conjoint}} to plot results
#'
#' @importFrom radiant.model predict_model
#'
#' @export
predict_conjoint_by <- function(object, pfun, pred_data = NULL, pred_cmd = "",
                                conf_lev = 0.95, se = FALSE, dec = 3,
                                envir = parent.frame(), ...) {
  if (is.character(object)) {
    return(object)
  }
  ## ensure you have a name for the prediction dataset
  if (is.data.frame(pred_data)) {
    attr(pred_data, "radiant_pred_data") <- deparse(substitute(pred_data))
  }

  pred <- list()
  bylevs <- object$bylevs

  for (i in seq_along(bylevs)) {
    object$model <- object$model_list[[bylevs[i]]]$model
    pred[[i]] <- predict_model(object, pfun, "conjoint.predict", pred_data, pred_cmd, conf_lev, se, dec, envir = envir)

    ## when se is true reordering the columns removes attributes for some reason
    if (i == 1) att <- attributes(pred[[1]])

    if (is.character(pred[[i]])) {
      return(pred[[i]])
    }
    pred[[i]] %<>%
      (function(x) {
        x[[object$by]] <- bylevs[i]
        x
      }) %>%
      (function(x) x[, c(object$by, head(colnames(x), -1))])
  }

  pred <- bind_rows(pred)
  att$row.names <- 1:nrow(pred)
  att$vars <- att$names <- colnames(pred)
  attributes(pred) <- att
  add_class(pred, "conjoint.predict.by") %>%
    add_class("conjoint.predict")
}

#' Print method for predict.conjoint
#'
#' @param x Return value from prediction method
#' @param ... further arguments passed to or from other methods
#' @param n Number of lines of prediction results to print. Use -1 to print all lines
#'
#' @importFrom radiant.model print_predict_model
#'
#' @export
print.conjoint.predict <- function(x, ..., n = 20) {
  print_predict_model(x, ..., n = n, header = "Conjoint Analysis")
}

#' Plot method for the conjoint function
#'
#' @details See \url{https://radiant-rstats.github.io/docs/multivariate/conjoint.html} for an example in Radiant
#'
#' @param x Return value from \code{\link{conjoint}}
#' @param plots Show either the part-worth ("pw") or importance-weights ("iw") plot
#' @param show Level in by variable to analyze (e.g., a specific respondent)
#' @param scale_plot Scale the axes of the part-worth plots to the same range
#' @param shiny Did the function call originate inside a shiny app
#' @param custom Logical (TRUE, FALSE) to indicate if ggplot object (or list of ggplot objects) should be returned. This option can be used to customize plots (e.g., add a title, change x and y labels, etc.). See examples and \url{https://ggplot2.tidyverse.org/} for options.
#' @param ... further arguments passed to or from other methods
#'
#' @examples
#' result <- conjoint(mp3, rvar = "Rating", evar = "Memory:Shape")
#' plot(result, scale_plot = TRUE)
#' plot(result, plots = "iw")
#'
#' @seealso \code{\link{conjoint}} to generate results
#' @seealso \code{\link{summary.conjoint}} to summarize results
#'
#' @importFrom rlang .data
#'
#' @export
plot.conjoint <- function(x, plots = "pw", show = "", scale_plot = FALSE,
                          shiny = FALSE, custom = FALSE, ...) {
  if (x$by == "none" || is.empty(show) || !show %in% names(x$model_list)) {
    show <- names(x$model_list)[1]
  }

  the_table <- x$model_list[[show]]$tab
  plot_ylim <- the_table$plot_ylim
  plot_list <- list()

  if ("pw" %in% plots) {
    PW.df <- the_table[["PW"]]

    lab <- if (x$by == "none") "" else paste0("(", show, ")")

    for (var in x$evar) {
      PW.var <- PW.df[PW.df[["Attributes"]] == var, ]

      # setting the levels in the same order as in the_table. Without this
      # ggplot would change the ordering of the price levels
      PW.var$Levels <- factor(PW.var$Levels, levels = PW.var$Levels, ordered = FALSE)

      p <- ggplot(PW.var, aes(x = .data$Levels, y = .data$PW, group = 1)) +
        geom_line(color = "blue", linetype = "dotdash", linewidth = .7) +
        geom_point(color = "blue", size = 4, shape = 21, fill = "white") +
        labs(title = paste("Part-worths for", var, lab), x = "") +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))

      if (scale_plot) {
        p <- p + ylim(plot_ylim[var, "Min"], plot_ylim[var, "Max"])
      }
      plot_list[[var]] <- p
    }
  }

  if ("iw" %in% plots) {
    IW.df <- the_table[["IW"]]
    lab <- if (x$by == "none") "" else paste0(" (", show, ")")
    plot_list[["iw"]] <- ggplot(IW.df, aes(x = .data$Attributes, y = .data$IW, fill = .data$Attributes)) +
      geom_bar(stat = "identity", alpha = 0.5) +
      theme(legend.position = "none") +
      labs(title = paste0("Importance weights", lab))
  }

  if (length(plot_list) > 0) {
    if (custom) {
      if (length(plot_list) == 1) plot_list[[1]] else plot_list
    } else {
      patchwork::wrap_plots(plot_list, ncol = min(length(plot_list), 2)) %>%
        (function(x) if (isTRUE(shiny)) x else print(x))
    }
  }
}

#' Function to calculate the PW and IW table for conjoint
#'
#' @details See \url{https://radiant-rstats.github.io/docs/multivariate/conjoint.html} for an example in Radiant
#'
#' @param model Tidied model results (broom) output from \code{\link{conjoint}} passed on by summary.conjoint
#' @param dataset Conjoint data
#' @param evar Explanatory variables used in the conjoint regression
#'
#' @examples
#' result <- conjoint(mp3, rvar = "Rating", evar = "Memory:Shape")
#' the_table(tidy(result$model_list[[1]][["model"]]), result$dataset, result$evar)
#'
#' @seealso \code{\link{conjoint}} to generate results
#' @seealso \code{\link{summary.conjoint}} to summarize results
#' @seealso \code{\link{plot.conjoint}} to plot results
#'
#' @export
the_table <- function(model, dataset, evar) {
  if (is.character(model)) {
    return(list("PW" = "No attributes selected."))
  }

  attr <- select_at(dataset, .vars = evar) %>%
    mutate_if(is.logical, as.factor) %>%
    mutate_if(is.character, as.factor)

  isFct <- sapply(attr, is.factor)
  if (sum(isFct) < ncol(attr)) {
    return(list("PW" = "Only factors can be used.", "IW" = "Only factors can be used."))
  }
  bylevs <- lapply(attr[, isFct, drop = FALSE], levels)
  vars <- colnames(attr)[isFct]

  nlevs <- sapply(bylevs, length)
  PW.df <- data.frame(rep(vars, nlevs), unlist(bylevs), stringsAsFactors = FALSE)
  colnames(PW.df) <- c("Attributes", "Levels")
  PW.df$PW <- 0

  ## Calculate PW and IW's when interactions are present
  ## http://www.slideshare.net/SunnyBose/conjoint-analysis-12090511
  rownames(PW.df) <- paste(PW.df[["Attributes"]], PW.df[["Levels"]], sep = "")

  coeff <- model$estimate
  PW.df[model$term[-1], "PW"] <- coeff[-1]

  minPW <- PW.df[tapply(1:nrow(PW.df), PW.df$Attributes, function(i) i[which.min(PW.df$PW[i])]), ]
  maxPW <- PW.df[tapply(1:nrow(PW.df), PW.df$Attributes, function(i) i[which.max(PW.df$PW[i])]), ]
  rownames(minPW) <- minPW$Attributes
  rownames(maxPW) <- maxPW$Attributes

  rangePW <- data.frame(cbind(maxPW[vars, "PW"], minPW[vars, "PW"]), stringsAsFactors = FALSE)
  rangePW$Range <- rangePW[[1]] - rangePW[[2]]
  colnames(rangePW) <- c("Max", "Min", "Range")
  rownames(rangePW) <- vars

  ## for plot range if standardized
  maxlim <- rangePW[["Max"]] > abs(rangePW[["Min"]])
  maxrange <- max(rangePW[["Range"]])
  plot_ylim <- rangePW[c("Min", "Max")]

  plot_ylim[maxlim, "Max"] <- plot_ylim[maxlim, "Max"] + maxrange - rangePW$Range[maxlim]
  plot_ylim[!maxlim, "Min"] <- plot_ylim[!maxlim, "Min"] - (maxrange - rangePW$Range[!maxlim])
  plot_ylim <- plot_ylim * 1.01 ## expanded max to avoid hiding max points in plot

  IW <- data.frame(vars, stringsAsFactors = FALSE)
  IW$IW <- rangePW$Range / sum(rangePW$Range)
  colnames(IW) <- c("Attributes", "IW")

  PW.df[["Attributes"]] <- as.character(PW.df[["Attributes"]])
  PW.df[["Levels"]] <- as.character(PW.df[["Levels"]])
  PW.df <- rbind(PW.df, c("Base utility", "~", coeff[1]))
  PW.df[["PW"]] <- as.numeric(PW.df[["PW"]])

  PW.df[["PW"]] <- round(PW.df[["PW"]], 3)
  IW[["IW"]] <- round(IW[["IW"]], 3)

  list("PW" = PW.df, "IW" = IW, "plot_ylim" = plot_ylim)
}

#' Store method for the Multivariate > Conjoint tab
#'
#' @details Store data frame with PWs or IWs in Radiant r_data list if available
#'
#' @param dataset Dataset
#' @param object Return value from conjoint
#' @param name Variable name(s) assigned to predicted values
#' @param ... further arguments passed to or from other methods
#'
#' @export
store.conjoint <- function(dataset, object, name, ...) {
  if (missing(name)) {
    object$tab
  } else {
    stop(
      paste0(
        "This function is deprecated. Use the code below for part worths instead:\n\n",
        name, " <- ", deparse(substitute(object)), "$PW\nregister(\"",
        name, ")\n\n",
        "This function is deprecated. Use the code below for importance weights instead:\n\n",
        name, " <- ", deparse(substitute(object)), "$IW\nregister(\"",
        name, ")"
      ),
      call. = FALSE
    )
  }
}

##' Store predicted values generated in predict.conjoint
#'
#' @details See \url{https://radiant-rstats.github.io/docs/multivariate/conjoint.html} for an example in Radiant
#'
#' @param dataset Dataset to add predictions to
#' @param object Return value from model predict function
#' @param name Variable name(s) assigned to predicted values
#' @param ... Additional arguments
#'
#' @examples
#' conjoint(mp3, rvar = "Rating", evar = "Memory:Shape") %>%
#'   predict(mp3) %>%
#'   store(mp3, ., name = "pred_pref")
#'
#' @export
store.conjoint.predict <- function(dataset, object, name = "prediction", ...) {
  radiant.model:::store.model.predict(dataset, object, name = name, ...)
}
radiant-rstats/radiant.multivariate documentation built on Nov. 29, 2023, 9:52 p.m.