R/functions.R

Defines functions better_names paste_noNA column_combiner column_to_numeric factor_na_drop factor_cleaner factor_orderer fo factor_conflator column_orderer col_regularizer factor_swap bp wc grouper dt_sentiment_formatter val_dat val_demos reversable_gauge average_weighted_by_user rating_diff_value rating_diff_box icon_input color_input break_level_maker num_overview_plotter factor_overview_plotter yes_factor_overview_plotter add_overview_plot_default_options percents intents_by_awareness_bar spc sc cc conflicting_term_hierarchies row_percents conflicting_evaluations

Documented in average_weighted_by_user better_names bp break_level_maker cc color_input conflicting_evaluations factor_orderer factor_overview_plotter factor_swap fo icon_input intents_by_awareness_bar num_overview_plotter percents rating_diff_box rating_diff_value row_percents spc val_dat wc yes_factor_overview_plotter

# Functions for processing the popchips dataframe in popchips.R

#' Function for getting useable names.
#'
#' Function for getting useable names.
#' @param x dataframe, to be given better names
#' @return x but with column names standardized
better_names <- function(x) {
  # names(x) <- gsub('[[:punct:]]', '', names(x))
  names(x) <- gsub("\\s+", " ", names(x))
  return (x)
}
# Function to paste ignoring NAs
paste_noNA <- function(x, sep=", ") {
  gsub(", ", sep, toString(x[!is.na(x) & x!="" & x!="NA"]))
}
# Function to combine two columns into a third, drop now redundant columns
column_combiner <- function(df, col1, col2, combo) {
  df[[combo]] <- apply(df[ , c(col1, col2)], 1, paste_noNA, sep='')
  df <- df[!str_detect(names(df), paste0('', col1, '|', col2))]
  return(df)
}
# Improvement for x columns
# column_combiner <- function(df, col_vec, combo) {
#   df[[combo]] <- apply(df[ , col_vec], 1, paste_noNA, sep='')
#   df <- select(df, -col_vec)
#   return(df)
# }
# gatherer <- function(x, new_col = "dsf", gathered_cols) {
#   x %>%
#     gather(key = "brand", new_col, gathered_cols, na.rm = TRUE) %>%
#     select(-brand)
# }
# Function to convert to numeric
column_to_numeric <- function(df, columns) {
  df[ , columns] <- as.numeric(as.character(unlist(df[ , columns])))
  return(df)
}
# Function to replace NAs in a given column with a string
factor_na_drop <- function(df, column, replacement) {
  df[[column]][is.na(df[[column]])] <- replacement
  return(df)
}
# Function to replace variant string with definitive string
factor_cleaner <- function(df, column, bad_factor, replacement) {
  df[[column]] <-str_replace_all(df[[column]], bad_factor, replacement)
  return(df)
}
#' Function to reorder factor presentation.
#'
#' Function to reorder factor presentation.
#' @export
#' @param df dataframe, dataframe with factors being reordered.
#' @param column numeric (or string), identifies column containing factors to reorder.
#' @param column_order vector, identifies factors to 'push' to beginning of factor levels.
#' @return A dataframe like \code{df}, but with \code{column}'s factor levels reordered to put \code{column_order} at the beginning.
factor_orderer <- function(df, column, column_order) {
  for (i in length(column_order):1) {
    df[[column]] <- stats::relevel(df[[column]], column_order[i])
  }
  return(df)
}

#' Factor ordering for already-ordered factors; must specify all factors
#'
#' Factor ordering for already-ordered factors; must specify all factors
#' @export
#' @param df dataframe, dataframe with factors being reordered.
#' @param columns numeric (or string), identifies columns containing factors to reorder.
#' @param column_order vector, identifies factors in new order.
#' @return A dataframe like \code{df}, but with \code{column}'s factor levels reordered according to \code{column_order}.
fo <- function(df, columns, column_order) {
  for (i in columns) {
    df[[i]] <- factor(df[[i]], levels = column_order)
  }
  return(df)
}
# Function to reduce factor levels
factor_conflator <- function(df, column, factors) {
  df[[column]] <- factor(df[[column]])
  df[[column]] <- addNA(df[[column]])
  levels(df[[column]]) <- factors
  return(df)
}
# Function to reorder columns
column_orderer <- function(df, column_order) {
  return(df[ , column_order])
}

# Functions for processing pepsi data

# Function for getting column names into a format that pivot_longer() can deal with
col_regularizer <- function(x, conditions, replacements,
                            products = NULL, reverse = FALSE) {
  # Check whether replacements and conditions and of the same length
  len <- length(conditions)
  if (len != length(replacements)) {
    stop("Conditions and replacements are of unequal length")
  }
  # If products are empty, then replacement strings replace columns names
  # directly and without adding a suffix
  if (missing(products)) {
    for (i in 1:len) {
      x[grep(conditions[i], x)] <- replacements[i]
    }
    # Otherwise, go through every product, and each condition for each product
    # and replace column names accordingly.
    # `reversed` parameter controls whether the condition is expected before
    # the product name -- reversed == TRUE when product comes first.
  } else {
    for (p in products) {
      for (i in 1:len) {
        condition_string <- ifelse(reverse == FALSE,
                                   paste0(conditions[i], ".*", p),
                                   paste0(p, ".*", conditions[i]))
        x[grep(tolower(condition_string), tolower(x))] <- paste0(replacements[i], "_", p)
      }
    }
  }
  return(x)
}

#' For swapping out one factor for another.
#'
#' For swapping out one factor for another (e.g. numerical code to actual sentences in response).
#' @export
#' @param df dataframe, dataframe with factors being swapped.
#' @param columns vector, identifies columns containing factors to swap.
#' @param original_factors vector, gives factors to swap.
#' @param new_factors vector, gives replacements for original_factors.
#' @param addNA logical or string, if not NULL, replaces NAs with string.
#' @return A dataframe like \code{df}, but with \code{original_factors} in \code{columns} replaced by \code{new_factors} (and NAs replaced with \code{addNA} if \code{addNA} used).
factor_swap <- function(df, columns, original_factors = NA, new_factors = NA,
                        addNA = NULL) {
  for (c in columns) {
    df[[c]] <- factor(df[[c]])
    df[[c]] <- plyr::mapvalues(df[[c]], from = original_factors, to = new_factors,
                               warn_missing = FALSE)
    if (!missing(addNA)) {
      df[[c]] <- forcats::fct_explicit_na(df[[c]], addNA)
    }
  }
  return(df)
}
# factor_swap <- function(df, columns, original_factors = NA, new_factors = NA,
#                         addNA = FALSE) {
#   for (c in columns) {
#     df[[c]] <- factor(df[[c]])
#     df[[c]] <- plyr::mapvalues(df[[c]], from = original_factors, to = new_factors)
#     if (!missing(addNA)) {
#       df[[c]] <- addNA(df[[c]])
#       df[[c]] <- plyr::mapvalues(df[[c]], from = NA, to = addNA)
#     }
#   }
#   return(df)
# }

# Helper functions within the app (server.R)

#' For making bar plots with multiple traces.
#'
#' For making bar plots with multiple traces.
#' @export
#' @param t dataframe containing data for plot.
#' @param title_string string, title for plot.
#' @param col_vec vector containing colours for plot.
#' @param orient string, "h" or "v", whether the bars are horizontal or vertical.
#' @return  A plotly bar plot.
bp <- function(t, title_string, col_vec, orient = 'h') {
  # col_vec --> vector of colours for graph
  # Determining how to access temporary dtaframe
  t_cols <- 2:(length(t)-1)

  plt <- t %>%
    dplyr::mutate(primary_col = factor(primary_col, levels = factor(primary_col))) %>%
    plotly::plot_ly(type = "bar") %>%
    plotly::layout(
      barmode = "stack", font = list(family = font_fam, color = font_color),
      showlegend = FALSE, title = title_string, orientation = orient,
      xaxis = list(title = ""), yaxis = list(title =""),
      paper_bgcolor = box_color, plot_bgcolor = box_color)
  for (i in t_cols) {
    df_trace <- data.frame(x = t[[i]], y = t$primary_col, nm = names(t)[i])
    plt <- plt %>%
      plotly::add_trace(plt, data = df_trace, x = ~x, y = ~y, name = ~nm,
                        hoverlabel = list(namelength = -1),
                        hoverinfo = "text", text = ~paste0(nm, ": ", x),
                        marker = list(color = col_vec[i-1],
                                      line = list(color = font_color, width = 1)))
  }
  return(plt)
}

#' For duplicating ggwordcloud plots.
#'
#' For duplicating ggwordcloud plots.
#' @seealso \code{\link{wc_plotter}}
#' @param x dataframe, contains data for word cloud.
#' @param font_size, font size for word cloud.
#' @return A ggplot2 plot.
wc <- function(x, font_size) {
  ggplot2::ggplot(x, ggplot2::aes(size = n, family = font_fam, fontface = "bold")) +
    ggwordcloud::geom_text_wordcloud_area(rm_outside = TRUE) +
    ggplot2::scale_size_area(max_size = font_size) +
    ggplot2::theme(
      panel.background = ggplot2::element_rect(fill = box_color),
      plot.background = ggplot2::element_rect(fill = box_color, color = box_color))
}

# For standard grouping by n
grouper <- function(x, group_col) {
  quocol <- rlang::enquo(group_col)
  x %>%
    dplyr::group_by(!!quocol) %>%
    dplyr::summarize(n = n()) %>%
    dplyr::arrange(desc(n), !!quocol)
}

# Format pos/ neg setiment format for data tables
dt_sentiment_formatter <- function(x, sent = "sentiment") {
  x %>%
    DT::datatable() %>%
    DT::formatRound(sent, 2) %>%
    DT::formatStyle(
      sent, color = DT::styleInterval(c(-0.001, 0.001),
                                      c(neg_color, box_color, pos_color)))
}

#' Ensure demographics filters haven't filtered out all relevant data.
#'
#' Ensure demographics filters haven't filtered out all relevant data.
#' @export
#' @param x dataframe, previously filtered; hopefully not filtered to 0 rows.
val_dat <- function(x) {
  shiny::validate(shiny::need(nrow(x) > 0,
                              'All reviews filtered out; please include elements from each filter.'))
}
val_demos <- function(x) {
  shiny::validate(shiny::need(length(x) == 2,
                              "Please select another set of demographic variables (even if you don't intent to filter by them)."))
}

# Generic reversable gauge
reversable_gauge <- function(gauge_value, threshold,
                             label_text = '', symbol = '',
                             min_val = 0, max_val = 100, reversed = FALSE,
                             colours = c("success", "warning", "danger")) {
  print(threshold)
  if (length(threshold) == 1) {
    if (reversed == FALSE) {
      hi <- c(threshold+0.05, max_val)
      lo <- c(min_val, threshold-0.05)
    } else {
      lo <- c(threshold+0.05, max_val)
      hi <- c(min_val, threshold-0.05)
    }
    sec <- flexdashboard::gaugeSectors(success = hi,
                                       warning = c(threshold-0.05, threshold+0.05),
                                       danger = lo,
                                       colors = colours)
  } else {
    if (reversed == FALSE) {
      hi <- c(threshold[2], max_val)
      lo <- c(min_val, threshold[1])
    } else {
      lo <- c(threshold[2], max_val)
      hi <- c(min_val, threshold[1])
    }
    sec <- flexdashboard::gaugeSectors(success = hi,
                                       warning = c(threshold[1], threshold[2]),
                                       danger = lo,
                                       colors = colours)
  }

  gauge <- flexdashboard::gauge(
    gauge_value, min = min_val, max = max_val, symbol = symbol,
    label = label_text, sectors = sec
  )
  return(gauge)
}

#' For getting average by user.
#'
#' Some users give more than one rating.
#' This gloms all ratings by a given user (determined by Email)
#' together and then gives the average fraction of users who have
#' rated positively in at least one rating.
#' @export
#' @param df dataframe, data of interest
#' @param pcol numeric, the number of df's column containing the question of interest.
#' @param per_options vector, all the strings in df[[pcol]] that are counted when the percentage is calculated.
#' @param ignore_options string (or vector of strings), the factors to ignore during calculations.
# #' @param neg_options vector, all the strings in df[[pcol]] that are considered 'bad' responses.
#' @param dropNA logical, if TRUE, applies tidyr::drop_na(df, pcol).
#' @return A numeric value, the fraction of users who have given positive responses.
average_weighted_by_user <- function(df, pcol,
                                     per_options = "Yes",
                                     ignore_options = "Decline to state",
                                     dropNA = TRUE) {
  if (dropNA == TRUE) {
    value <- tidyr::drop_na(df, pcol)
  } else {value <- df}
  value <- value %>% dplyr::rename(primary_col = pcol)
  for (i in ignore_options) {
    value <- value %>% dplyr::filter(primary_col != i)
  }

  if (class(df[[pcol]]) == "numeric") {
    value <- value %>%
      dplyr::group_by(Email) %>% # At one point, grouped by Product too; cut
      dplyr::summarize(avg = mean(primary_col, na.rm = TRUE)) %>%
      dplyr::ungroup() %>%
      dplyr::summarize(avg = mean(avg, na.rm = TRUE)) %>%
      round(2)
    value <- value[[1]]
  } else {
    value <- value %>%
      cc() %>%
      dplyr::filter(primary_col %in% per_options) %>%
      dplyr::select(`per`) %>%
      round(2)
    value <- value[[1]]
  }
  return(value)
}

#' For getting the difference between rating and benchmark.
#'
#' For getting the difference between rating and benchmark.
#' @param x dataframe
#' @param target_column numeric (or string), identifying column of interest.
#' @param warning_range numeric, benchmark to compare rating to; if larger than rating, negative result
#' @return A numeric value, positive if rating is greater than benchmark.
rating_diff_value <- function(x, target_column, warning_range) {
  val <- average_weighted_by_user(x, target_column) - warning_range
  return(val)
}
#' For creating the valueBox() that compares rating to benchmark.
#'
#' For creating the valueBox() that compares rating to benchmark.
#' @export
#' @param x numeric, value to show off in the box.
#' @param reversed logical, determines whether positive or negative numbers are considered 'good' / green.
#' @return A shinydashboard::box() object.
rating_diff_box <- function(x, label = "Rating vs. benchmark", reversed = FALSE,
                            symbol = '') {
  y <- ifelse(reversed == TRUE, -x, x)
  if (is.na(x)) {
    val <- "None of this flavor."
  } else { val <- paste0(round(x, 2), symbol)}
  shinydashboard::valueBox(
    val, label,
    icon = icon_input(x), width = NULL,
    color = color_input(y))
}
#' For swapping between up-arrows and down-arrows.
#'
#' For swapping between up-arrows and down-arrows.
#' @export
#' @param x numeric, tested against 0
#' @return An appropriate icon() arrow.
icon_input <- function(x) {
  if (length(x) == 1) {
    greater <- ifelse(x > 0, TRUE, FALSE)
    if (isTRUE(greater)) {
      return(shiny::icon("arrow-circle-up"))
    } else {
      return(shiny::icon("arrow-circle-down"))
    }
  } else if (length(x) == 2) {
    lesser1 <- ifelse(x[1] < 0, TRUE, FALSE)
    greater2 <- ifelse(x[2] > 0, TRUE, FALSE)
    if (isTRUE(greater2)) {
      return(shiny::icon("arrow-circle-up"))
    } else if (isTRUE(lesser1)) {
      return(shiny::icon("arrow-circle-down"))
    } else { # In between or error:
      return(shiny::icon("asterisk"))
    }
  }

}
#' For swapping between green and red colors.
#'
#' For swapping between green and red colors.
#' @export
#' @param x numeric, tested against 0
#' @return An appropriate colour string

color_input <- function(x) {
  # greatereq <- ifelse(x >= 0, TRUE, FALSE)
  # if (is.na(greatereq)) {return("aqua")}
  # if (isTRUE(greatereq == TRUE)) {
  #   return("green")
  # } else {
  #   return("red")
  # }
  if (length(x) == 1) {
    greater <- ifelse(x > 0, TRUE, FALSE)
    if (isTRUE(greater)) {
      return("green")
    } else {
      return("red")
    }
  } else if (length(x) == 2) {
    lesser1 <- ifelse(x[1] < 0, TRUE, FALSE)
    greater2 <- ifelse(x[2] > 0, TRUE, FALSE)
    if (isTRUE(greater2)) {
      return("green")
    } else if (isTRUE(lesser1 == TRUE)) {
      return("red")
    } else { # In between or error:
      return("yellow")
    }
  }
}

#' Helper for overview plotters. Generates breaks.
#'
#' Helper for overview plotters. Generates breaks.
#' @inheritParams factor_overview_plotter
break_level_maker <- function(break_values) {
  break_levels <- c()
  for (i in 1:(length(break_values)-1)) {
    break_levels <- c(break_levels,
                      paste0("(", break_values[i], ",", break_values[i+1], "]"))
  }
  return(break_levels)
}

#' For making faceted plots with numeric data (eg. 1 through 5).
#'
#' For making faceted plots with numeric data (eg. 1 through 5).
#' @export
#' @seealso \code{\link{factor_overview_plotter}}
#' @inheritParams factor_overview_plotter
num_overview_plotter <- function(x, title_string, warning,
                                 demo_one, demo_two,
                                 break_values = c(0, 10, 30, 1000),
                                 reversed_values = FALSE) { # reversed values not implemented
  break_levels <- break_level_maker(break_values)
  y <- x %>%
    dplyr::group_by(!!sym(demo_one), !!sym(demo_two), Product) %>%
    dplyr::summarize(Reviews = cut(n(), breaks = break_values, labels = break_levels),
                     Benchmark = ifelse(
                       (mean(target_column, na.rm = TRUE) - warning >= 0),
                       TRUE, FALSE)) %>%
    dplyr::mutate(Reviews = factor(Reviews, levels = break_levels)) %>%
    dplyr::right_join(x %>% dplyr::select(!!rlang::sym(demo_one), !!rlang::sym(demo_two), Product, target_column)) %>%
    fo("Benchmark", c(TRUE, FALSE))
  p <- y %>%
    ggplot2::ggplot(ggplot2::aes(Product, target_column, fill = Benchmark,
                                 color = Benchmark, alpha = Reviews)) +
    ggplot2::geom_bar(stat = "summary", fun.y = "mean")
  return(add_overview_plot_default_options(p, title_string, demo_one, demo_two))
}

#' For making faceted plots with factor data (eg. "Yes" / "No").
#'
#' For making faceted plots with factor data (eg. "Yes" / "No").
#' @seealso overview
#' @param x dataframe, column of interest named \'target_column\'.
#' @param title_string string, title of plot.
#' @param warning numeric, cutoff between \'positive\' and \'negative\' values.
#' @param demo_one string, (numeric probably works too?) identifies 1st demographic column in x to filter by.
#' @param demo_two string, (numeric probably works too?) identifies 2nd demographic column in x to filter by.
#' @param foptions vector, strings identifying which factors/strings in target_column count as \'positive\'.
#' @param break_values vectors, numeric values sample sizes into appropriate categories.  Ideally statistically relevant.
#' @param reversed_values logical, indicates whether to flip values (e.g. y \%>\% mutate(Percent = 100 - Percent) ).
#' @return A ggplo2 plot.
factor_overview_plotter <- function(x, title_string, warning,
                                    demo_one, demo_two,
                                    foptions = aware_options,
                                    break_values = c(0, 10, 30, 1000),
                                    reversed_values = FALSE) {
  # Email used to identify unique reviewers
  break_levels <- break_level_maker(break_values)
  y <- x %>%
    dplyr::mutate(terms = ifelse(target_column %in% foptions, 1, 0)) %>% # Count as aware if any answer was yes
    dplyr::group_by(Product, Email) %>%
    dplyr::summarize(tally = sum(terms)) %>%
    dplyr::mutate(target_column = ifelse(tally > 0, "Yes", "No")) %>% # Count as aware if any answer was yes
    dplyr::left_join(x %>% select(!!rlang::sym(demo_one), !!rlang::sym(demo_two), Email),
                     by = "Email") %>%
    dplyr::group_by(!!rlang::sym(demo_one), !!rlang::sym(demo_two), Product) %>%
    dplyr::summarize(Reviews = cut(n(), breaks = break_values, labels = break_levels),
                     Benchmark = ifelse(
                       (sum(target_column == "Yes")/n() * 100 - warning >= 0),
                       TRUE, FALSE),
                     n = n(), Percent = sum(target_column == "Yes")/n()*100)
  if (isTRUE(reversed_values)) {
    y <- y %>% mutate(Percent = 100 - Percent) %>% mutate(Benchmark = !Benchmark)
  }
  y <- y %>%
    dplyr::mutate(Reviews = factor(Reviews, levels = break_levels)) %>%
    fo("Benchmark", c(TRUE, FALSE))

  p <- y %>%
    ggplot2::ggplot(ggplot2::aes(stringr::str_wrap(Product, 8), Percent, fill = Benchmark,
                                 color = Benchmark, alpha = Reviews)) +
    ggplot2::geom_bar(stat = "identity")
  return(add_overview_plot_default_options(p, title_string, demo_one, demo_two))
}

#' factor_overview_plotter with default foptions set to "Yes" rather than aware_options.
#'
#' factor_overview_plotter with default foptions set to "Yes" rather than aware_options.
#' @export
#' @inheritParams factor_overview_plotter
#' @return A ggplo2 plot.
yes_factor_overview_plotter <- function(x, title_string, warning,
                                        demo_one, demo_two,
                                        foptions = c("Yes"), # Only change from f_o_p
                                        break_values = c(0, 10, 30, 1000),
                                        reversed_values = FALSE) {
  return(factor_overview_plotter(x, title_string, warning, demo_one, demo_two,
                                 foptions, break_values, reversed_values))
}

# Setting default options for various overview plots
add_overview_plot_default_options <- function(plot, title_string, d1, d2,
                                              title_size = 22) {
  r <- plot +
    ggplot2::scale_x_discrete(labels=product_labels) +
    ggplot2::facet_grid(dplyr::vars(!!rlang::sym(d1)), dplyr::vars(!!rlang::sym(d2))) +
    ggplot2::scale_fill_manual(values=c(pos_color, neg_color)) +
    ggplot2::scale_color_manual(values=c(pos_color, neg_color), guide = FALSE) +
    ggplot2::scale_alpha_discrete(range = c(0.2, 1), drop = FALSE) +
    ggtitle(title_string) +
    guides(fill=guide_legend(title="Over Benchmark")) +
    theme(plot.title = element_text(size=title_size),
          axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          axis.text.x = element_text(angle = 30),
          legend.position="right")
  return(r)
}
# add_overview_plot_default_options <- function(plot, title_string, d1, d2) {
#   quod1 <- rlang::enquo(d1)
#   quod2 <- rlang::enquo(d2)
#   r <- plot +
#     facet_grid(Age ~ Gender
#       # !!quod1 ~ !!quod2
#                # , labeller=labeller(`Ethnic Origin` = eo_labels)
#                ) +
#     scale_fill_manual(values=c(pos_color, neg_color)) +
#     scale_color_manual(values=c(pos_color, neg_color), guide = FALSE) +
#     scale_alpha_discrete(range = c(0.2, 1)) +
#     ggtitle(title_string) +
#     guides(fill=guide_legend(title="Over Benchmark")) +
#     theme(axis.title.x=element_blank(),
#           axis.title.y=element_blank(),
#           axis.text.x = element_text(angle = 75),
#           legend.position="right")
#   return(r)
# }


# Helpers for generalizing stacked bar graphs
#' Getting percentages of total values for each row, column.
#'
#' Getting percentages of total values for each row, column.  Also renaming original columns for easy access.
#' @param df dataframe
#' @param cols vector labelling columns of interest
#' @param total numeric, total number of responses.
percents <- function(df, cols, total) {
  t <- df
  for (i in 1:length(cols)) {
    per_ <- paste0("per_", i)
    s_ <- paste0("s", i)
    exp <- rlang::sym(cols[i])
    t <- t %>%
      dplyr::mutate(!!per_ := (!!exp) / total * 100) %>%
      dplyr::rename(!!s_:=(!!exp))
  }
  return(t)
}

#' Plotly bar chart for intents by brand awareness.
#'
#' Plotly bar chart for intents by brand awareness.  Uses \code{\link{percents}}.
#' Future improvement: aggregate intents by user; get user intents rather than response intents.
#' @param dt dataframe, its primary column of interest must already be renamed primary_col and its secondary column of interest -- the column by which the bars will be stacked -- sec_col.
#' @param title_string string, for plot title.
#' @param legend_string string, for plot legend title.
#' @param col_vec vector of strings labelling colours.
#' @param dropSecNA logical, whether to drop NA/ "Decline to state" factors from sec_col.
#' @param legend_position list with two named numerics (x and y), position of legend on plot.
#' @return A plotly bar plot.
intents_by_awareness_bar <- function(dt, title_string, legend_string, col_vec,
                                     dropSecNA = TRUE,
                                     legend_position = list(x = 0.05, y = 0.95)) {
  # Drop rows with NA values, "Decline to state" in sec_col
  if (isTRUE(dropSecNA)) {
    dt <- dt %>%
      tidyr::drop_na(sec_col) %>%
      dplyr::filter(sec_col != "Decline to state")
  }
  # Getting total number of responses
  total <- dt %>% dplyr::tally() ; total <- total[[1]]
  # Getting temporary dataframe with responses shown by primary_col and sec_col
  t <- dt %>%
    group_by_primary_sec() %>%
    select(-total)
  # # Reordering
  # %>% select(primary_col, contains("Yes,"), everything())
  # Getting the actual response phrases (factors of sec_col)
  sec_opts <- t %>%
    dplyr::select(-primary_col) %>%
    names()
  # Adding percentages
  t <- percents(t, sec_opts, total)
  # Plotting graph
  plot <- t %>%
    # Starting with empty bar graph
    plotly::plot_ly(type = 'bar') %>%
    # Adding layout
    plotly::layout(
      title = title_string,
      legend = legend_position,
      annotations = list(yref='paper', xref="paper",
                         y=legend_position[["y"]]+0.04,
                         x=legend_position[["x"]]-0.02,
                         font = list(size = 13),
                         text = legend_string, showarrow = FALSE),
      xaxis = list(title=""), yaxis = list(title="Count"),
      barmode = "stack",
      hovermode = "compare",
      font = list(family = font_fam, color = font_color),
      paper_bgcolor = box_color,
      plot_bgcolor = box_color)
  # Determining how to access temporary dtaframe
  t_cols <- 2:length(t); or_cols <- t_cols[1:(length(t_cols)/2)]
  # Looping through temporary dataframe to add 'traces' to the empty bar graph
  for (i in or_cols) {
    df_trace <- data.frame(y=t[[i]], p=t[[i+length(or_cols)]],
                           x=t$primary_col)
    plot <- plotly::add_trace(
      plot, data = df_trace, x = ~x, hoverinfo = "name+text", y = ~y, #type = 'bar',
      name = sec_opts[i-1], marker = list(color= col_vec[i-1]),
      text = ~paste0('Count: ', y, '\nPercent: ', round(p, 2), '%'))
  }
  return(plot)
}

#' Plotly pie chart, simple, but nicely formatted.
#'
#' Plotly pie chart, simple, but nicely formatted.
#' @param x dataframe
#' @param title_string string, title of plot
#' @param color_vec vector, colours for plot
#' @param show_legend logical, whether to show legend in plot
#' @param dropNA logical, whether to drop null values before making graph.
#' @return Plotly pie chart.
spc <- function(x, title_string, color_vec, show_legend = FALSE, dropNA = TRUE) {
  ### Column of interest to be renamed primary_col beforehand
  # scnt <- sc(x)
  scnt <- cc(x) # Use this to be more accurate.
  if (isTRUE(dropNA)) {scnt <- tidyr::drop_na(scnt)}
  scnt %>%
    plotly::plot_ly(
      labels = ~primary_col, values = ~n, type = 'pie', rotation = 0,
      direction = "clockwise", sort = FALSE, textposition = 'inside',
      textinfo = 'label+percent+value',
      insidetextfont = list(family = font_fam, color = font_color),
      hoverinfo = 'label+text',
      text = ~paste('Count:', n),
      marker = list(colors = color_vec,
                    line = list(color = font_color, width = 1)),
      showlegend = show_legend) %>%
    plotly::layout(
      title = title_string,
      font = list(family = font_fam, color = font_color),
      hovermode = "compare", #position = "top centre",
      paper_bgcolor = box_color, plot_bgcolor = box_color)
}

# Simple count-retrieval
sc <- function(x) {
  ### column of interest must be named primary_col beforehand
  x %>%
    dplyr::group_by(primary_col) %>%
    dplyr::count() %>%
    dplyr::ungroup() %>%
    dplyr::mutate(per = n/sum(n) * 100)
}
#' Complicated count-retrieval.
#'
#' Complicated count-retrieval. Counts primary_col responses; if a user has give more than one response, counts them as "Mixed".
#' @param x dataframe, column of interest named primary_col.
#' @return A dataframe with all distinct variants of primary_col (and maybe "Mixed" too) and the number and % of responses of that variant.
cc <- function(x) {
  ### column of interest must be named primary_col beforehand
  p_levels <- c(x[["primary_col"]] %>% factor() %>% levels(), "Mixed")
  dist <- x %>%
    dplyr::group_by(Email) %>%
    dplyr::summarize(tally = dplyr::n_distinct(primary_col)) %>% # Get uniques per person
    ungroup()
  x <- x %>%
    dplyr::left_join(dist, by = c("Email" = "Email")) %>% # Join them only main df
    dplyr::mutate(primary_col = ifelse(tally > 1, "Mixed", as.character(primary_col))) %>% # Replace varying responses with "Mixed".
    dplyr::group_by(primary_col, Email) %>% # Eliminate duplicate counts from individual users
    dplyr::count(primary_col) %>%
    dplyr::ungroup() %>%
    dplyr::group_by(primary_col) %>%
    dplyr::count() %>% # Count how many are left afterwards
    dplyr::ungroup() %>%
    dplyr::mutate(per = n/sum(n) * 100) %>%
    dplyr::mutate(primary_col = factor(primary_col, levels = p_levels))
}

# For summarizing conflicting evaluations
# Helper function to evaluate conflicting term hierarchies
conflicting_term_hierarchies <- function(conflicting_terms, ordered_hierarchy) {
  # ordered_hierarchy is the order if which the conflicting_terms overrule each other
  # Ie. If 1 shows up, that's what the user is classified as;
  # 2 overrules anything but but 1, 3 anything but 1 and 2, etc.
  if (length(conflicting_terms) != length(ordered_hierarchy)) {stop("Error; (length(conflicting_terms) != length(ordered_hierarchy)")}
  ordered_terms <- conflicting_terms[ordered_hierarchy]
}
#' For getting sums by row.
#'
#' For getting sums by row.
#' @export
#' @param df dataframe
#' @param cols vector of strings, column names.
#' @param sify logical, whether to rename col into s_xs.
#' @return A dataframe...
row_percents <- function(df, cols, sify = TRUE) {
  for (i in 1:length(cols)) {
    per_ <- paste0("per_", i)
    s_ <- paste0("s", i)
    exp <- rlang::sym(cols[i])
    df <- df %>%
      dplyr::mutate(!!per_ := (!!exp) / sec_tally * 100)
    if (isTRUE(sify)) {
      df <- df %>%
        dplyr::rename(!!s_:=(!!exp))
    }
  }
  return(df)
}

#' Pick an evaluation when users have >1 conflicting answers.
#'
#' Pick an evaluation when users have >1 conflicting answers (eg. users evaluating multiple products who have said yes and no when asked whether they've used it before).
#' @param x dataframe
#' @param primary_col numeric, column of interest.
#' @param conflicting_terms vector, strings possible answers from column of interest.
#' @param ordered_hierarchy vector, numeric order in which to accept conflicting_terms.
#' @param sec_col string, (or numeric) denoting column to divide into separate rows/bars by.
#' @param dropNA logical, whether to drop NA values from output.
#' @return A dataframe tallying the frequency of responses by product.
conflicting_evaluations <- function(x, primary_col, conflicting_terms,
                                    ordered_hierarchy, sec_col = "Product",
                                    dropNA = TRUE) {
  ### terms: column with conflicting evaluations
  ### Email: column used to identify individuals with conflicting evaluations for a given Flavor
  ordered_terms <- conflicting_term_hierarchies(conflicting_terms, ordered_hierarchy)
  x <- x %>%
    dplyr::rename(terms = primary_col) %>%
    dplyr::rename(sec = sec_col) %>%
    dplyr::group_by(terms, sec, Email) %>%
    dplyr::count()
  if (isTRUE(dropNA)) {x <- tidyr::drop_na(x)}
  x <- x %>%
    dplyr::ungroup() %>%
    tidyr::spread(terms, n, fill = 0) %>%
    dplyr::mutate(terms = NA)
  for (i in (length(x)-1):3) {
    x <- x %>% dplyr::mutate(terms = ifelse(x[[i]] > 0, names(x)[i], terms))
  }
  x <- x %>%
    dplyr::group_by(terms, sec) %>%
    dplyr::count() %>%
    dplyr::ungroup() %>%
    tidyr::spread(terms, n, fill = 0) %>%
    dplyr::mutate(sec_tally = rowSums(.[2:length(.)])) %>%
    dplyr::select(sec, c(ordered_terms), everything()) %>%
    row_percents(ordered_terms) %>%
    dplyr::select(sec, matches("s[0-9]"), contains("per"), everything()) %>%
    # dplyr::mutate(sec = factor(sec, levels = rev(factor(sec))))
    dplyr::arrange(rev(sec))
  return(x)
}


#' For plotting awareness by product.
#'
#' For plotting awareness by product.
#' @export
#' @seealso \code{\link{conflicting_evaluations}}
#' @param adf dataframe, ideally the output of conflcting_evaluations().
#' @param title_string string, title of plot.
#' @param ordered_terms vector, ordered factors for column of interest.
#' @param color_vec vector, strings denoting colours for all bar colour options (eg. "Yes", "No", "Brand but not product", etc.).
#' @param pc_end logical, if TRUE, tacks on total, sum(per_x) to end of bars; otherwise sec_tally.
#' @return A plotly bar plot with parallel horizontal bars.
horizontal_bars_plot <- function(adf, title_string, ordered_terms, color_vec,
                                 pc_end = FALSE) {
  plot <- plotly::plot_ly(
    adf, type = 'bar', orientation = 'h', hoverinfo = "text",
    marker = list(line = list(color = font_color, width = 1))) %>%
    plotly::layout(
      title = title_string, barmode = "stack", hovermode = "y",
      yaxis = list(title = ""),
      xaxis = list(title = "", showgrid = FALSE, showline = FALSE,
                   showticklabels = FALSE, zeroline = FALSE),
      font = list(family = font_fam, color = font_color),
      # position = "top centre",
      showlegend = FALSE,
      paper_bgcolor = box_color, plot_bgcolor = box_color)
  adf_cols <- 1:length(adf); or_cols <- adf_cols[2:(length(adf_cols)/2)]
  rows <- adf %>% tally() %>% `[[`(1)
  df_cum <- data.frame(x = rep(0, rows), p = rep(0, rows), y = rep(0, rows))
  for (i in or_cols) {
    df_trace <- data.frame(x=adf[[i]], p=adf[[i+length(or_cols)]], y=adf[[1]],
                           l = ordered_terms[i-1])
    plot <- plotly::add_trace(
      plot, data = df_trace,
      x = ~p, hoverinfo = "text", #name+
      y = ~y, #type = 'bar',
      name = ordered_terms[i-1],
      marker = list(color= color_vec[i-1]),
      text = ~paste0(l, '\nCount: ', x, '\nPercent: ', round(p, 2), '%'),
      hoverlabel = list(namelength = -1)) %>%
      plotly::add_annotations(
        xref = ordered_terms[i-1], yref = 'sec',
        x = df_cum$p + (df_trace$p / 2), y = df_trace$y,
        text = paste0(round(df_trace$p, 2), "%"),
        font = list(family = font_fam, color = font_color), showarrow = FALSE)
    df_cum$p = df_cum$p + df_trace$p

  }
  if (isTRUE(pc_end)) {
    plot <- plot %>%
      plotly::add_annotations(
        x = df_cum$p + 5, y = df_trace$y,
        text = paste0(adf$total, "\n/ ", round(df_cum$p), "%"),
        font = list(family = font_fam, color = font_color), showarrow = FALSE)
  } else {
    plot <- plot %>%
      plotly::add_annotations(
        xref = ordered_terms[length(ordered_terms)], yref = 'sec',
        x = 105, y = adf$sec, text = adf$sec_tally,
        font = list(family = font_fam, color = font_color), showarrow = FALSE)
  }
  return(plot)
}
#' Finds the mode of a vector.
#'
#' Finds the mode of a vector.
#' @param x vector
#' @return The first-occurring modal value.
md <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}
#' Groups data by a primary_col and a sec_col.
#'
#' Counts primary_col, sec_col responses; if a user has give more than one response, counts them as "Mixed".
#' @export
#' @seealso \code{\link{understanding_by_awareness}} uses group_by_primary_sec()'s output as its primary input.
#' @param x dataframe containing primary_col and sec_col.
#' @param arrange_by_total logical, whether to arrange rows by total number in each.
#' @param mode_by_user logical, whether to consolidate multiple responses from the same user -- if responses vary, they are replaced with "Mixed".
#' @return A dataframe with all distinct variants of primary_col and sec_col (and maybe "Mixed" for either or both too) and the number of responses of that combo.
group_by_primary_sec <- function(x, arrange_by_total = FALSE,
                                 mode_by_user = TRUE) {
  ### primary column of interest must be named primary_col beforehand
  ### secondary column of interest must be named sec_col beforehand
  if (isTRUE(mode_by_user)) { # Eliminate multiple answers per user.
    pcol_levels <- c(x[["primary_col"]] %>% factor() %>% levels(), "Mixed")
    scol_levels <- c(x[["sec_col"]] %>% factor() %>% levels(), "Mixed")
    dist_p <- x %>%
      dplyr::group_by(Email) %>%
      dplyr::summarize(tally = dplyr::n_distinct(primary_col)) %>% # Get uniques per person
      ungroup()
    p <- x %>%
      dplyr::left_join(dist_p, by = c("Email" = "Email")) %>% # Join them on main df
      dplyr::mutate(p = ifelse(tally > 1, "Mixed", as.character(primary_col))) %>% # Replace varying responses with "Mixed".
      dplyr::group_by(p, Email) %>% # Eliminate duplicate counts from individual users
      dplyr::count(p) %>%
      dplyr::ungroup()
    dist_s <- x %>%
      dplyr::group_by(Email) %>%
      dplyr::summarize(tally = dplyr::n_distinct(sec_col)) %>% # Get uniques per person
      ungroup()
    s <- x %>%
      dplyr::left_join(dist_s, by = c("Email" = "Email")) %>% # Join them on main df
      dplyr::mutate(s = ifelse(tally > 1, "Mixed", as.character(sec_col))) %>% # Replace varying responses with "Mixed".
      dplyr::group_by(s, Email) %>% # Eliminate duplicate counts from individual users
      dplyr::count(s) %>%
      dplyr::ungroup()
    x <- x %>%
      dplyr::left_join(p, by = "Email") %>%
      dplyr::left_join(s, by = "Email") %>%
      dplyr::mutate(primary_col = ifelse(is.na(primary_col), NA, as.character(p))) %>%
      dplyr::mutate(sec_col = ifelse(is.na(sec_col), NA, as.character(s))) %>%
      dplyr::mutate(primary_col = as.factor(p)) %>%
      dplyr::mutate(sec_col = as.factor(s)) %>%
      dplyr::mutate(primary_col = factor(primary_col, levels = pcol_levels)) %>%
      dplyr::mutate(sec_col = factor(sec_col, levels = scol_levels)) %>%
      dplyr::mutate(primary_col = forcats::fct_explicit_na(primary_col, "Decline to state")) %>%
      dplyr::mutate(sec_col = forcats::fct_explicit_na(sec_col, "Decline to state")) %>%
      dplyr::mutate(primary_col = forcats::fct_drop(primary_col)) %>%
      dplyr::mutate(sec_col = forcats::fct_drop(sec_col)) %>%
      dplyr::group_by(primary_col, sec_col, Email) %>%
      dplyr::count() %>%
      dplyr::ungroup()
  }
  x <- x %>%
    dplyr::group_by(primary_col, sec_col) %>%
    dplyr::count() %>%
    dplyr::ungroup() %>%
    tidyr::spread(sec_col, n, fill = 0, drop = FALSE) %>%
    dplyr::mutate(total = rowSums(dplyr::select(., -primary_col)))
  if (isTRUE(arrange_by_total)) {x <- x %>% dplyr::arrange(total)}
  return(x)
}

#' This function plots the frequency of primary_col stacked by secondary_col.
#'
#' Titled according to its first use, this function plots the frequency of primary_col stacked by secondary_col.
#' @export
#' @seealso \code{\link{group_by_primary_sec}}
#' @param df dataframe, must include columns named primary_col and seconary_col; intended to be the output of \code{\link{group_by_primary_sec}}.
#' @param title_string string, titles plot.
#' @param legend_string string, titles legend.
#' @param color_vec vector, strings labelling each factor of secondary_col.
#' @param legend_position vector, two numeric elements, where the legend goes.
#' @param drop_na logical, whether to drop missing values.
#' @return A plotly vertical bar plot.
understanding_by_awareness <- function(df, title_string, legend_string,
                                       color_vec,
                                       legend_position = c(0.06, 0.94),
                                       drop_na = TRUE) {

  if (drop_na == TRUE) {
    x <-df[stats::complete.cases(df), ]
    x <-  x[, colSums(x != 0) > 0]
  } else {
    x <- df
  }
  sec_opts <- x %>%
    select(-c(primary_col, total)) %>%
    names()
  # Adding percentages
  t <- percents(x, sec_opts, total) # temporary dataframe
  # Determining how to access temporary dtaframe
  t_cols <- 2:length(t); or_cols <- t_cols[1:(length(t_cols)/2)]

  # Plotting graph
  plot <- t %>%
    # Starting with empty bar graph
    plotly::plot_ly(type = 'bar') %>%
    # Adding layout
    plotly::layout(
      title = title_string,
      legend = list(x = legend_position[1], y = legend_position[2]),
      annotations = list(yref='paper', xref="paper",
                         x=legend_position[1] - 0.05,
                         y=legend_position[2] + 0.05,
                         font = list(size = 13),
                         text = legend_string, showarrow = FALSE),
      xaxis = list(title=""), yaxis = list(title="Count"),
      barmode = "stack",
      hovermode = "compare",
      font = list(family = font_fam, color = font_color),
      paper_bgcolor = box_color,
      plot_bgcolor = box_color)
  # Looping through temporary dataframe to add 'traces' to the empty bar graph
  for (i in or_cols) {
    df_trace <- data.frame(y=t[[i]], p=t[[i+length(or_cols)]],
                           x=t$primary_col)
    plot <- plotly::add_trace(
      plot, data = df_trace, x = ~x, hoverinfo = "name+text", y = ~y, #type = 'bar',
      name = sec_opts[i-1], marker = list(color= color_vec[i-1]),
      text = ~paste0(x, '\nCount: ', y, '\nPercent: ', round(p, 2), '%'))
  }
  return(plot)
}



# Processing for free-form quesitons

# # For picking out which terms were used in a free-form response by Flavor.
# # Must follow on with additional data-cleaning
# # CANNOT usefully group reviewers by product since 1 reviewer can review multiple products
# separate_terms_by_product <- function(x, identity_col, target_col) {
#   # terms --> primary_col
#   x %>%
#     rename(primary_col = target_col) %>%
#     mutate(primary_col = tolower(primary_col)) %>%
#     filter(!is.na(primary_col)) %>%
#     select(primary_col, Product) %>%
#     separate_rows(primary_col, # Separate terms out by various markers
#                   sep = "(or )|(,)|( and)|( /)|(/ )|(\\.)|(!)|(:)|(;)", convert = FALSE) %>%
#     mutate(primary_col = trimws(primary_col)) %>% # Omit trailing spaces
#     filter(primary_col != '') # Omit blank terms
# }


#' Separates terms by gender.
#'
#' Separates terms by gender.  Drops NAs, selects gender & target_col, then divides target_col up by various string markers (eg. \'or \', \',\', \' and\', etc.).  Filters out non-sense pieces from broken up terms.
#' @export
#' @param x dataframe, contains columns Gender and a column identified by target_col.
#' @param target_col numeric (or string), identifies column of interest.
#' @return A dataframe with columns primary_col and Gender.
separate_terms_by_gender <- function(x, target_col) {
  x %>%
    rename(primary_col = target_col) %>%
    mutate(primary_col = tolower(primary_col)) %>%
    filter(!is.na(primary_col)) %>%
    pre_cleaner(primary_col) %>%
    select(primary_col, Gender) %>%
    separate_rows(primary_col, # Separate terms out by various markers
                  sep = "(or )|(,)|( and)|( /)|(/ )|(\\.)|(!)|(:)|(;)", convert = FALSE) %>%
    mutate(primary_col = trimws(primary_col)) %>% # Omit trailing spaces
    filter(primary_col != '') %>% # Omit blank terms
    filter(primary_col != 'n/a') %>%
    filter(primary_col != '#n/a') %>%
    filter(primary_col != '?')
}

single_plural_chooser <- function(x) {
  # Choose single or plural depending on which is more common; ties --> no s
  x %>%
    dplyr::mutate(word_no_s = stringr::str_replace(primary_col, pattern = "(s$)", '')) %>% # Get s-less version # No idea why used to have |(') in pattern
    dplyr::mutate(word_s = stringr::str_replace(primary_col, pattern = "(?<!s)$", "s")) %>% # Get version ending in s
    dplyr::mutate(sless = ifelse(primary_col == word_no_s, TRUE, FALSE)) %>% # Get marker for true version
    dplyr::add_count(word_no_s, name = 'n') %>% # Tally count ignoring ending-s status
    dplyr::add_count(word_no_s, sless, name = 'which') %>% # Get count using ending-s status
    dplyr::mutate(choose_sless = ifelse(n-which > n/2, !sless, sless)) %>% # See which ending is more common
    dplyr::mutate(primary_col = ifelse(choose_sless, word_no_s, word_s)) #%>% # Select more common version
  # select(-word_no_s, -word_s, -sless, -n, -which, -choose_sless)
}

#' Functions to clean word variants for particular kinds of questions.
#'
#' Functions to clean word variants for particular kinds of questions.
#' @name cleaner_function
#' @param x dataframe
#' @param target_col numeric, identifies which column of dataframe contains question of interest.
#' @return Dataframe with column cleaned.
NULL

#' @rdname cleaner_function
#' @export
pre_cleaner <- function(x, target_col) {
  quocol <- rlang::enquo(target_col)
  x %>%
    dplyr::mutate(!!quocol := stringr::str_to_lower(!!quocol)) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'smart.*final', 'smart & final'))
}

#' @rdname cleaner_function
#' @export
snack_cleaner <- function(x, target_col) {
  quocol <- rlang::enquo(target_col)
  x %>%
    dplyr::mutate(!!quocol := stringr::str_to_lower(!!quocol)) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = ' bar[s]?', ' bars')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'cliff bars', 'clif bars')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*protein bars.*', 'protein bars')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*granola bars.*', 'granola bars')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*almonds.*', 'almonds')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'trailmix', 'trail mix')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*trail mix.*', 'trail mix')) %>%
    dplyr::mutate(!!quocol := stringr::str_to_title(!!quocol))
}
#' @rdname cleaner_function
#' @export
grocery_store_cleaner <- function(x, target_col) {
  quocol <- rlang::enquo(target_col)
  x %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'stater brothers', 'stater bros')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '7/11', '7-eleven')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '7-11', '7-eleven')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '711', '7-eleven')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '7/11', '7-eleven')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '7 eleven', '7-eleven')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '\u2019', "'")) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'pavillions', 'pavilions')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'von\'?s', "vons")) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'whole?.?food\'?s.*', "whole foods")) %>% # ANYTHING after whole foods gets eaten!
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.alph\'?s?.*', "ralph's")) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*joe\'?s?.*', "trader joe's")) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'food4less', "food 4 less")) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'food for less', "food 4 less")) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*superking.*', "super king")) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'super king$', "super king markets")) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'super king market$', "super king markets")) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'smart.*final', "smart & final")) %>%
    # dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'smart&', "smart &")) %>%
    dplyr::mutate(!!quocol := stringr::str_to_title(!!quocol))
}
#' @rdname cleaner_function
#' @export
energy_drink_cleaner <- function(x, target_col) {
  quocol <- rlang::enquo(target_col)
  x %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'redbull', 'red bull')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*coffee.*', 'coffee')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*starbuck.*', 'starbucks')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*tea.*', 'tea')) %>%
    dplyr::mutate(!!quocol := stringr::str_to_title(!!quocol))
}
#' @rdname cleaner_function
#' @export
workout_cleaner <- function(x, target_col) {
  quocol <- rlang::enquo(target_col)
  x %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*weight.*', 'weightlifting')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*sports.*', 'sports')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*running.*', 'running')) %>%
    dplyr::mutate(!!quocol := stringr::str_to_title(!!quocol))
}

#' @rdname cleaner_function
#' @export
subscription_cleaner <- function(x, target_col) {
  quocol <- rlang::enquo(target_col)
  x %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'imperfect.*', 'imperfect produce')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'post(\\s)*mate.*', 'postmates')) %>%
    dplyr::mutate(!!quocol := stringr::str_to_title(!!quocol))
}


#' @rdname cleaner_function
#' @export
coffee_cleaner <- function(x, target_col) {
  quocol <- rlang::enquo(target_col)
  x %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'red(\\s)*bull.*', 'red bull')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = '.*starbuck.*', 'starbucks')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'ground(\\s)*work.*', 'groundworks')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'blue bottled', 'blue bottle')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'ice coffee', 'iced coffee')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = ".*don't have one", "none")) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'rockstar.*', 'rockstar')) %>%
    dplyr::mutate(!!quocol := stringr::str_replace(!!quocol, pattern = 'monster energy.*', 'monster')) %>%
    dplyr::mutate(!!quocol := stringr::str_to_title(!!quocol))
}

#' Filter out stopwords, assign polarity to terms from target_column.
#'
#' Filter out stopwords, assign polarity to terms from target_column.  Makes use of \code{\link{wc_plotter}}.
#' @seealso \code{\link{wc_plotter}}
#' @param x dataframe with column of interest.
#' @param target_column number of the column in x containing data being assigned polarity.
#' @param stopwords_dict dictionary of stopwords e.g. \code{tibble::tibble(stopwords::stopwords(source="stopwords-iso"))}
#' @return A dataframe containing 5 columns including the polarity of target_column's words.
assign_polarity <- function(x, target_column, stopwords_dict = stop_d) {
  targ <- x %>% select(target_column) # targ for temporary tibble; must access with [[]]
  targ <- targ[[1]] %>%
    as.character() %>%
    sentimentr::get_sentences() %>%
    sentimentr::extract_sentiment_terms() %>%
    attributes()
  targ$counts %>%
    dplyr:: mutate(valence = ifelse(polarity > 0, "positive",
                                    ifelse(polarity < 0, "negative", 'neutral'))) %>%
    dplyr::anti_join(stopwords_dict) %>%
    dplyr::filter(!is.na(words)) %>%
    dplyr::arrange(desc(n)) %>%
    dplyr::mutate(terms = str_to_title(words))
}

# Get average sentiment and product for each response
sentiment_prod_response <- function(x, target_column) {
  x %>%
    rename("Q" = target_column) %>%
    mutate("response" = Q) %>%
    select(Q, response, Product) %>%
    sentimentr::get_sentences() %$% # Like a with()
    sentimentr::sentiment_by(., by=c('Product', 'response')) %>%
    filter(word_count > 0) %>%
    filter(!is.na(response)) %>%
    filter(response != "n/a") %>%
    rename(sentiment = ave_sentiment) %>%
    select(response, Product, sentiment) %>%
    arrange(desc(sentiment))
}

#' Select appropriate colors depending on 'valence' values in selection.
#'
#' Select appropriate colors depending on 'valence' values in selection. Takes a named vector containing colours and assigns them in the correct order to factor levels from a dataframe.
#' @param valence_df dataframe containing valence data; (truncated, arranged) output from \code{\link{assign_polarity}}.
#' @param valence_col string, selects column with strings / factors labelling valences to derive colors from.
#' @param avc vector, named with valences, contains strings defining colours to use for each.
#' @return A named vector with avc's elements in the order matching their levels in valence_df.
valence_colorer <- function(valence_df, valence_col = "valence", avc = all_valence_colors) {
  # all_valence_colors must be a named vector with colors for each valence.
  valences <- valence_df[[valence_col]] %>% factor() %>% levels()
  v_colors <- c()
  for (i in valences) {
    if (i %in% names(avc)) {
      v_colors <- c(v_colors, i)
    }
  }
  return(avc[v_colors])
}

#' Function that generates wordclouds.
#'
#' Function that generates wordclouds.  Makes use of \code{\link{valence_colorer}}, \code{\link{wc}}.
#' @seealso \code{\link{assign_polarity}}, \code{\link{valence_colorer}}
#' @param df_reactive dataframe, output from \code{\link{assign_polarity}}.
#' @param cutoff numeric, number of words to include in wordcloud.
#' @param size numeric, size of text for wordcloud.
#' @return A plot.
wc_plotter <- function(df_reactive, cutoff, size) {
  # df_reactive is presumed the output of assign_polarity()
  trms <- df_reactive %>%
    dplyr::top_n(cutoff, n) %>%
    dplyr::arrange(valence)
  v_col <- valence_colorer(trms)
  plt <- trms %>%
    wc(size) +
    aes(label = terms, color = valence) +
    scale_color_manual(values=v_col)
  return(plt)
}
# Broken with non-pepsi data; needs fixing
# # Get counts for specific terms, broken down by Flavor
# specific_term_counts_by_product <- function(x, specific_terms) {
#   # specific_terms originally an output of grouper() %>% top_n()
#   x %>%
#     filter(terms %in% specific_terms$terms) %>%
#     group_by(terms, Product) %>%
#     summarize(n = n()) %>%
#     ungroup() %>%
#     spread(Product, n, fill = 0, drop = FALSE) %>%
#     mutate(total = rowSums(
#       select(., c(`Rockstar XDurance (Both)`, `Rockstar XDurance (Cotton Candy)`,
#                   `Rockstar XDurance (Peach & Iced Tea)`, `Game Fuel`,
#                   `Starbucks Tripleshot Caffe Mocha`)))) %>%
#     arrange(total)
# }

# vertical 3-layer stacked single-bar bar chart
vertical_three_layer_bar <- function(x, color_vec) {
  # Primary column of interest must be named primary_col beforehand
  targ <- sc(x)
  x <- targ %>% dplyr::select(primary_col)
  x <- x[[1]]
  y <- targ %>% dplyr::select(n)
  y <- y[[1]]
  plotly::plot_ly(
    y = y[1], type = "bar",
    marker = list(color = color_vec[1], line = list(color = font_color, width = 1)),
    hoverinfo = "skip") %>%
    plotly::add_trace(y=y[2],marker=list(color=color_vec[2],
                                         line=list(color=font_color,width=1))) %>%
    plotly::add_trace(y=y[3],marker=list(color=color_vec[3],
                                         line=list(color=font_color,width=1))) %>%
    plotly::layout(
      barmode = "stack", title = "Preferred Flavor",
      hovermode = "compare", font = list(family = font_fam, color = font_color),
      xaxis = list(title = "", showticklabels = FALSE),
      yaxis = list(title =""), showlegend = FALSE,
      paper_bgcolor = box_color, plot_bgcolor = box_color) %>%
    plotly::add_annotations(
      x = 0, y = y[1]/2,
      text = paste(x[1],paste0(y[1]," / ",round(y[1]/sum(y)*100,2),"%"),sep='\n'),
      font = list(family = font_fam, color = font_color), showarrow = FALSE) %>%
    plotly::add_annotations(
      x = 0, y = y[1] + y[2]/2,
      text = paste(x[2],paste0(y[2]," / ",round(y[2]/sum(y)*100,2),"%"),sep='\n'),
      font = list(family = font_fam, color = font_color), showarrow = FALSE) %>%
    plotly::add_annotations(
      x = 0, y = y[1] + y[2] + y[3]/2,
      text = paste(x[3],paste0(y[3]," / ",round(y[3]/sum(y)*100,2),"%"),sep='\n'),
      font = list(family = font_fam, color = font_color), showarrow = FALSE)
}

# Colour datatable text rows by Product
product_format <- function(x, product_col, color_vec) {
  DT::formatStyle(x, "Product", target = 'row',
                  color = DT::styleEqual(unique(product_col), color_vec))
}

# For checking whether all variants of a product are displayed
check_var <- function(x, product, variants) {
  counts <- str_detect(x, product)
  count <- sum(counts)
  if (count == 0) {
    ret <- NULL
  } else if (count == variants) {
    ret <- product
  } else if (count > variants) {
    ret <- paste0(product, " variants exceed expected: ", variants)
  } else {
    ret <- paste0(product, " (some)")
  }
  return(ret)
}
#' For displaying which products are selected; avoid overcluttering.
#'
#' For displaying which products are selected; avoid overcluttering.
#' @param x dataframe
#' @param products_short vector of strings, a truncated list of productds avoiding within-brand repetition.
#' @param p_var vector of numerics, the number of different variants each products_short element represents.
#' @param color_cutoff numeric, threshold for pos/neg colors.
#' @return HTML code to display the product names succinctly.
display_prod <- function(x, products_short, p_var, color_cutoff) {
  # products_short is a truncated list of products avoiding within-brand repetition
  # p_var is the number of different flavours/ variants of a products are in the data
  txt <- ''
  for (i in 1:length(products_short)) {
    txt <- paste(txt, check_var(x, products_short[i], p_var[i]), sep = "<br/>")
  }
  return(shiny::HTML(txt))
}

# Tallies factors for multiple columns, gives counts for each in one dataframe.
tallyer <- function(df, orderby, ...) {
  # orderby the name of the column to order by in a quosure --
  #   q*, * being the column # or totals
  # columns listed in ...
  quos <- rlang::enquos(...)
  cnt <- 1
  for (i in quos) {
    # Naming each tally q*
    nm <- paste0("top", quo_name(cnt))
    b <- df %>%
      group_by(!!i) %>%
      count() %>%
      rename(!!nm := n) %>%
      rename(primary_col := !!i) %>%
      ungroup()
    # Joining each tally together
    if (cnt > 1) {a <- full_join(a, b, by = "primary_col")} else {a <- b}
    cnt <- cnt + 1
  }
  a <- a %>%
    # Replacing NAs with 0.
    replace(., is.na(.), 0)  %>%
    # Add totals
    dplyr::mutate(totals = rowSums(select(., contains("top"))))
  # Order
  for (i in rev(orderby)) {
    a <- a %>% arrange(!!i)
  }
  return(a)
}



#' For filtering using quosures.
#'
#' For filtering using quosures.
#' @export
#' @param x dataframe, to be filtered
#' @param target_col numeric, the column number of the dataframe to be filtered
#' @param values vector, the values that target_col should contain
#' @return dataframe, like x but with target_col filtered for values.
filter_by <- function(x, target_col, values) {
  quo_col <- dplyr::quo(target_col)
  x %>% dplyr::filter_at(vars(!!quo_col), any_vars(. %in% values))
}

#' shinydashboard::box() but with different default settings.
#'
#' shinydashboard::box() but with different default settings
#' @export
#' @inheritParams shinydashboard::box
#' @return A shinydashboard::box().
box_d <- function(..., title = NULL, footer = NULL, status = "primary",
                  solidHeader = TRUE, background = NULL, width = 12, height = NULL,
                  collapsible = TRUE, collapsed = FALSE) {
  shinydashboard::box(..., title = title, footer = footer, status = "primary",
                      solidHeader = solidHeader, background = background, width = width,
                      height = height, collapsible = collapsible, collapsed = collapsed)
}


#' Takes dataframe, prepares it for horizontal_bars_plot().
#'
#' Takes dataframe, prepares a column formed as a concatenated checklist for graphing with horizontal_bars_plot().
#' @export
#' @seealso \code{\link{horizontal_bars_plot}}
#' @param x dataframe
#' @param pcol string or numeric, column of primary interest; ends up being named sec at end of function.
#' @param scol string or numeric, column of seconday interest; how the bar graph's colors will be stacked.
#' @param pcol_strings vector of 5 strings (in future, generalize this; not sure how yet), pcol's 'pure' values.
#' @return A list containing a dataframe ready for horizontal_bars_plot() and a vector containing strings of the factors sec_col (including "Mixed" if relevant).
checkbox_to_hbp <- function(x, pcol = lookfor_col, scol = "Product",
                            pcol_strings = lookfor_strings) {
  qs <- c()
  for (i in 1:length(pcol_strings)) {
    qs <- c(qs, rlang::sym(pcol_strings[i]))
  }

  x <- x %>%
    dplyr::rename(primary_col = pcol) %>%
    dplyr::rename(sec_col = scol) %>%
    concat_by_user() %>%
    spread_by_strings(pcol_strings) %>%
    col_sep()
  rowcount <- nrow(x)
  ordered_products <- x %>%
    dplyr::select(sec_col) %>%
    `[[`(1) %>%
    factor() %>%
    levels() %>%
    forcats::fct_relevel("Mixed", after = Inf) %>%
    levels()
  x <- x %>%
    dplyr::add_count() %>%
    dplyr::group_by(sec_col) %>%
    dplyr::summarize(!!qs[[1]] := sum(!!qs[[1]]),
                     !!qs[[2]] := sum(!!qs[[2]]),
                     !!qs[[3]] := sum(!!qs[[3]]),
                     !!qs[[4]] := sum(!!qs[[4]]),
                     !!qs[[5]] := sum(!!qs[[5]])) %>%
    tidyr::gather(sec, count, !!qs[[1]]:!!qs[[length(qs)]]) %>%
    tidyr::spread(sec_col, count) %>%
    select(sec, ordered_products) %>%
    mutate(total = rowSums(.[2:length(.)])) %>%
    mutate(sec_tally = rowcount) %>%
    dplyr::select(sec, ordered_products, everything()) %>%
    row_percents(ordered_products, sify = FALSE) %>%
    dplyr::select(sec, ordered_products, contains("per"), everything()) %>%
    dplyr::arrange(desc(sec))
  return(list(dtfrm = x, ordered_products = ordered_products))
}

# Helpers for checkbox_to_hbp().

#' Get new column for each strings value.
#'
#' Get new logical/ boolean columns for checking whether each value of strings is found in primary_col.
#' @param x dataframe with column of interest named primary_col.
#' @param strings vector of strings to check against primary_col.
#' @return A dataframe with a new column for each element of strings.
spread_by_strings <- function(x, strings) {
  for (i in strings) {
    quocol <- rlang::enquo(i)
    col_name <- rlang::quo_name(quocol)
    x <- x %>%
      dplyr::mutate(!!col_name := stringr::str_detect(primary_col, i))
  }
  return(x)
}

#' Concatenates primary_col for each individual user (as defined by Email).
#'
#' Concatenates primary_col for each individual user (as defined by Email).
#' @param x dataframe with column of interest primary_col and users identified by Email.
#' @param cllps string, divided concatenated primary_col entries.  Vary if your primary_col might actually have a " | ".
#' @return A dataframe with grouped by Email with primary_col concatenated.
concat_by_user <- function(x, cllps = " | ") {
  x %>%
    dplyr::group_by(Email) %>%
    dplyr::summarize(primary_col = paste(primary_col, collapse = cllps),
                     sec_col = paste(sec_col, collapse = cllps))
}

#' Takes a vector (ideally of strings), leaves it be if it only has one element; otherwise replaces it with "Mixed".
#'
#' Takes a vector (ideally of strings), leaves it be if it only has one element; otherwise replaces it with "Mixed".
#' @param x vector (ideally of strings).
#' @return An element of the vector (or "Mixed").
more_than_one_unique <- function(x) {
  ifelse(length(unique(x)) > 1, "Mixed", unique(x))
}

#' Splits a string column into a vectors, checks whether those vectors are unique.
#'
#' Splits a string column into a vectors, checks whether those vectors are unique.  If so, leaves be; if not, replaces with "Mixed".
#' @param x dataframe
#' @param clmn string naming the column that will be split into vectors and checked for uniqueness.
#' @param sep string, used to separate into clmn into vectors.
#' @return A dataframe with clmn squashed down into one string (as opposed to multiple concatenatd by sep).
col_sep <- function(x, clmn = "sec_col", sep = " | ") {
  quocol <- rlang::enquo(clmn)
  col_name <- rlang::quo_name(quocol)
  split_by_sep <- x %>%
    select(!!col_name) %>%
    `[[`(1) %>%
    strsplit(sep, fixed = TRUE)
  x <- x %>% dplyr::mutate(!!col_name := unlist(lapply(split_by_sep, more_than_one_unique)))
  return(x)
}
IskanderBlue/morseldash documentation built on Oct. 30, 2019, 7:24 p.m.