R/preparedata.R

Defines functions isRawDataQTable qTableAttributesToRemove unclassQTable updateQStatisticsInfo addStatTesting containsQTable convertScatterMultYvalsToDataFrame extractRegressionScatterData checkRegressionOutput tidyLabels hasUserSuppliedRownames rawDataLooksCrosstabbable PrepareForCbind getFullRowNames setAxisTitles useFirstColumnAsLabel isListOrRaggedArray setWeight prepareForSpecificCharts convertPercentages transformTable RearrangeRowsColumns asPercentages checkForNegPercent scatterVariableIndices rmScatterDefaultNames tidyScatterDefaultNames scatterDefaultNames checkNumberOfDataInputs processPastedData isQTableWithMultStatistic processInputData isDistribution coerceToDataFrame aggregateDataForCharting crosstabOneVariable isScatter isTableList unlistTable replaceDimNames PrepareData

Documented in coerceToDataFrame PrepareData PrepareForCbind

#' PrepareData
#'
#' Prepares input data for charting.
#' @param chart.type Character; chart type to be plotted.
#' @param subset subset An optional vector specifying a subset of
#'     observations to be used in the fitting process, or, the name of
#'     a variable in \code{data}. It may not be an expression.
#' @param weights An optional vector of sampling weights, or, the name
#'     of a variable in \code{data}. It may not be an expression.
#' @param input.data.table Array; typically a table of some kind,
#'     which is then processed using
#'     \code{\link[flipTables]{AsTidyTabularData}}.
#' @param input.data.tables List of array; each component is assumed
#'     to be a Qtable and will be processed using.
#'     \code{\link[flipTables]{AsTidyTabularData}}
#' @param input.data.raw List, containing variables or data.frames or Regression outputs from flipRegression.
#'     In the case of multiple Regression outputs, the labels default to the R name of the Regression output.
#' @param input.data.pasted List of length six; the first component of
#'     which is assumed to be from a user-entered/pasted table; will
#'     be processed by \code{\link{ParseUserEnteredTable}}.
#' @param input.data.other A PickAny Multi Q variable.
#' @param data.source Where multiple data inputs are provided, a text
#'     string can be provided to disambiguate. Refer to the source
#'     code for a precise understanding of how this works (it is not
#'     obvious and is not likely to be of any use for most cases, so
#'     should usually be left as a \code{NULL}).
#' @param signif.append Append attributes used to show statistical test for significance.
#' @param signif.symbol Character; Symbol used on chart to indicate significance. This can "Arrow" or "Caret".
#' @param signif.symbol.size Numeric; size of symbol in pixels.
#' @param signif.p.cutoffs Numeric; vector of p-values used to determine color of symbols.
#'     These values should be supplied in decreasing order. The colors used will correspond
#'     to the smallest cutoff larger than the p-value of that cell.
#' @param signif.colors.pos Character; vector of colors, of the same length as \code{signif.p.cutoffs}.
#' @param signif.colors.neg Character; vector of colors, of the same length as \code{signif.p.cutoffs}.
#' @param signif.colors.on.font Boolean; whether signif colors should also affect data label font colors.
#' @param first.aggregate Logical; whether or not the input data needs
#'     to be aggregated in this function. A single variable is
#'     tabulated, 2 variables are crosstabbed if \code{group.by.last} is selected,
#'     and otherwise the mean is computed. If \code{input.data.raw} contains
#'     two an 'X' variable and a 'Y' variable in the first two elements of the list,
#'     the data is automatically aggregated and crosstabbed.
#' @param scatter.input.columns.order (deprecated) Use \code{scatter.mult.yvals} instead.
#' @param scatter.mult.yvals Logical; When \code{chart.type} is "Scatter',
#'     a \code{TRUE} value indicates that columns of input.data.table or input.data.pasted
#'     should be considered multiple series instead of different attributes (default).
#' @param group.by.last Logical; \code{TRUE} and \code{first.aggregate} and there is data
#'     in either of \code{input.data.table} or \code{input.data.pasted}, the data is aggregated
#'     using the last variable
#' @param tidy Logical; whether or not the input data needs to be
#'     aggregated in this function (e.g., if an x and y variable have
#'     been provided, a contingency table is used to aggregate. This
#'     defaults to \code{TRUE}. It aggressively seeks to turn the data
#'     into a named vector or a matrix using
#'     \code{\link[flipTables]{TidyTabularData}}. This is not applied
#'     when \code{data.input.tables} are provided, or when the chart
#'     type is any of \code{"Scatter"}, \code{"Bean"},
#'     \code{"Histogram"}, \code{"Density"}, \code{"Box"}, or
#'     \code{"Violin"}.
#' @param tidy.labels Logical; whether to remove common prefixes from the
#'     labels of the input data.
#' @param transpose Logical; should the resulting matrix (of created)
#'     be transposed?
#' @param row.names.to.remove Character vector or delimited string of
#'     row labels specifying rows to remove from the returned table;
#'     default is \code{c("NET", "SUM")}
#' @param column.names.to.remove Character vector or delimited string
#'     of column labels specifying columns to remove from the returned
#'     table; default is \code{c("NET", "SUM")}.
#' @param split Character delimiter to split
#'     \code{row.names.to.remove} and \code{col.names.to.remove}
#'     on. Default is to split on either of \code{","} or \code{";"}.
#'     Assumed to be a regular expression; see \code{\link{strsplit}}.
#' @param hide.empty.rows.and.columns Logical; if \code{TRUE} empty
#'     rows and columns will be removed from the data.  Empty here
#'     meaning that a row or column contains all \code{NA} values, or
#'     in the case of percentages, that a row or column contains only
#'     0's. Retained for backwards-compatibility but is superseded by
#'     \code{hide.empty.rows} and \code{hide.empty.columns}.
#' @param hide.empty.rows Logical; hide rows with only NAs or 0's (percentages).
#' @param hide.empty.columns Logical; hide columns with only NAs or 0's (percentages).
#' @param select.rows String; Comma separated list of rows, by name or index
#'     to select from input table. If blank (default), then all rows are selected.
#' @param select.columns String; Comma separated list of columns, by name or index
#'     to select from input table. If blank (default), then all columns are selected.
#' @param auto.order.rows Logical; Automatically order rows by correspondence analysis.
#' @param sort.rows Logical; whether to sort the rows of the table. This operation is
#'     performed after row selection. (Ignored if \code{auto.order.rows} is true).
#' @param sort.rows.column String; If \code{sort.rows} is true, this column
#'     (specified by name or index) is used for sorting the rows. If not specified,
#'     the column with the largest \code{Column n} or the right-most column
#'     will be used for sorting.
#' @param sort.rows.exclude String; If \code{sort.rows} is \code{TRUE}, then rows
#'      in \code{sort.rows.exclude} will be excluded from sorting and
#'      appended at the bottom of the table.
#' @param sort.rows.decreasing Logical; Whether rows should be sorted in decreasing order.
#' @param auto.order.columns Logical; Automatically order columns by correspondence analysis.
#' @param sort.columns Logical; whether to sort the columns of the table.
#'      This operation is performed after column selection (Ignored if
#'      \code{auto.order.columns} is true.
#' @param sort.columns.row String; If \code{sort.columns} is true, this row
#'      (specified by name or index) is used for sorting the columns. If not specified,
#'      the row with the largest \code{n} or the bottom row
#'      will be used for sorting.
#' @param sort.columns.exclude String; If \code{sort.columns} is \code{TRUE}, then columns
#'      in \code{sort.columns.exclude} will be excluded from sorting and
#'      appended at the right of the table.'
#' @param sort.columns.decreasing Logical; Whether columns should be sorted in decreasing order.
#' @param hide.output.threshold Integer; If sample size ('Column n' or 'n') is provided
#'      then each cell in the input table will be checked to ensure
#'      'n' or 'Column n' is larger than specified threshold, otherwise an error
#'      message is given.
#' @param hide.values.threshold Integer; If sample size ('Column n' or 'n') is provided
#'      then each cell in the input table will be checked to ensure
#'      'n' or 'Column n' is larger than specified threshold,
#'      otherwise the cell will be set to \code{NA}.
#' @param hide.rows.threshold Integer; If sample size ('Column n' or 'n')
#'      is provided, then rows and with sample sizes smaller than threshold
#'      will be removed from table. Vectors will be treated as 1-d matrices
#' @param hide.columns.threshold Integer; If sample size ('Column n' or 'n')
#'      is provided, then columns with sample sizes smaller than threshold
#'      will be removed from table. Vectors will not be affected.
#' @param first.k.rows Integer; Number of rows to select from the top of the input table. This occurs after select and sort.
#' @param last.k.rows Integer; Number of rows to select from the bottom of the input table. This occurs after select and sort.
#' @param first.k.columns Integer; Number of columns to select from the left of the input table. This occurs after select and sort.
#' @param last.k.columns Integer; Number of columns to select from the right of the input table. This occurs after select and sort.
#' @param reverse.rows Logical; Whether to reverse order of rows. This operation is
#'      performed after row selection and sorting.
#' @param reverse.columns Logical; Whether to reverse order of columns. This operation
#'      is peformed after column selection and sorting.
#' @param show.labels Logical; If \code{TRUE}, labels are used for
#'     names in the data output if raw data is supplied.
#' @param as.percentages Logical; If \code{TRUE}, aggregate values in the
#' output table are given as percentages summing to 100. If \code{FALSE},
#' column sums are given.
#' @param hide.percent.symbol Percentage data is shown without percentage symbols and the symbol
#'  is also removed from the statistic attribute.
#' @param categorical.as.binary If data is aggregated and this is true, then categorical variables will be converted into indicator variables for each level in the factor.
#' @param date.format One of \code{"Automatic", "US", "International" or "No date formatting"}.
#' This is used to determine whether strings which are interpreted as dates
#' in the (row)names will be read in the US (month-day-year) or the
#' International (day-month-year) format. By default US format is used
#' if it cannot be deduced from the input data.
#' @param values.title The title for the values axis of a chart (e.g.,
#'     the y-axis of a column chart or the x-axis of a bar chart).
#' @param column.labels A comma separated list of names to replace the default column names
#'      of \code{pd$data}. This is applied after all other data manipulations
#' @param row.labels A comma separated list of names to replace the default row names
#'      of \code{pd$data}. This is applied after all other data manipulations
#' @details It is assumed that only one of \code{input.data.pasted},
#'     \code{input.data.table}, \code{input.data.tables},
#'     \code{input.data.other}, \code{input.data.raw} is non-NULL.
#'     They are checked for nullity in that order.
#' @importFrom flipU ConvertCommaSeparatedStringToVector
#' @importFrom flipTransformations ParseUserEnteredTable
#'     SplitVectorToList
#' @importFrom flipTables TidyTabularData RemoveRowsAndOrColumns SelectRows SelectColumns SortRows SortColumns ReverseRows ReverseColumns HideOutputsWithSmallSampleSizes HideValuesWithSmallSampleSizes HideRowsWithSmallSampleSizes HideColumnsWithSmallSampleSizes AutoOrderRows AutoOrderColumns ConvertQTableToArray
#' @importFrom flipData TidyRawData
#' @importFrom flipFormat Labels Names ExtractCommonPrefix
#' @importFrom flipStatistics Table WeightedTable
#' @importFrom flipU IsQTable
#' @importFrom verbs Sum
#' @importFrom stats setNames
#' @return A list with components \itemize{ \item \code{data} - If
#'     possible, a named vector or matrix, or if that is not posible
#'     or a data.frame is requested, a data.frame.  \item
#'     \code{weights} - Numeric vector of user-supplied weights.
#'     \item \code{values.title} - Character string to be used for the
#'     y-axis title; will only be a non-empty string if some
#'     aggregation has been performed on \code{data} \item
#'     \code{scatter.variable.indices} A named vector indicating which
#'     columns in \code{data} should be plotted in a scatterplot as
#'     \code{x}, \code{y}, \code{sizes}, and \code{colors}. Is
#'     \code{NULL} if \code{chart.type} does not contain
#'     \code{"Scatter"} or \code{"Bubble"}. \code{NA} is used when the
#'     data does not exist.  }
#' @export
#' @seealso \code{\link[flipTables]{AsTidyTabularData}},
#'     \code{\link[flipData]{TidyRawData}},
#'     \code{\link[flipTransformations]{ParseUserEnteredTable}}
PrepareData <- function(chart.type,
                        subset = TRUE,
                        weights = NULL,
                        input.data.table = NULL,
                        input.data.tables = NULL,
                        input.data.raw = NULL,
                        input.data.pasted = NULL,
                        input.data.other = NULL,
                        data.source = NULL,
                        signif.append = FALSE,
                        signif.symbol = "Arrow",
                        signif.symbol.size = 12,
                        signif.p.cutoffs = c(0.5, 0.2, 0.1, 0.05, 0.01, 0.005, 0.001, 1e-04, 1e-05, 1e-06),
                        signif.colors.pos = rep("#0000FF", 10),
                        signif.colors.neg = rep("#FF0000", 10),
                        signif.colors.on.font = FALSE,
                        first.aggregate = NULL,
                        scatter.input.columns.order = NULL,
                        scatter.mult.yvals = FALSE,
                        group.by.last = FALSE,
                        tidy = TRUE,
                        tidy.labels = FALSE,
                        transpose = FALSE,
                        select.rows = NULL,
                        first.k.rows = NA,
                        last.k.rows = NA,
                        select.columns = NULL,
                        first.k.columns = NA,
                        last.k.columns = NA,
                        auto.order.rows = FALSE,
                        sort.rows = FALSE,
                        sort.rows.exclude = c("NET", "SUM", "Total"),
                        sort.rows.column = NULL,
                        sort.rows.decreasing = FALSE,
                        auto.order.columns = FALSE,
                        sort.columns = FALSE,
                        sort.columns.exclude = c("NET", "SUM", "Total"),
                        sort.columns.row = NULL,
                        sort.columns.decreasing = FALSE,
                        hide.output.threshold = 0,
                        hide.values.threshold = 0,
                        hide.rows.threshold = 0,
                        hide.columns.threshold = 0,
                        reverse.rows = FALSE,
                        reverse.columns = FALSE,
                        row.names.to.remove = c("NET", "SUM", "Total"),
                        column.names.to.remove = c("NET", "SUM", "Total"),
                        split = "[;,]",
                        hide.empty.rows.and.columns = TRUE,
                        hide.empty.rows = hide.empty.rows.and.columns,
                        hide.empty.columns = hide.empty.rows.and.columns,
                        hide.percent.symbol = FALSE,
                        as.percentages = FALSE,
                        categorical.as.binary = NULL,
                        date.format = "Automatic",
                        show.labels = TRUE,
                        column.labels = "",
                        row.labels = "",
                        values.title = "")
{

    # Scenarios to address
    # - User provides a single numeric variable and wants to plot a bar for each value.
    # - User provides a single categorical variable and wants to plot a bar for each value.
    # - User provides two numeric variables and wants to plot a stacked bar plot of the unique values.
    # - User provides two numeric variables and wants to plot a stacked column chart of the crosstab.
    # - Data is in a weird format (e.g., JSON) for Venn diagram.
    # - User wants to treat variables or variable sets NOT as 'raw' data. E.g., performing a correspondence analysis of raw data.
    # - User wants to treat pasted data as raw data.
    # - User wants to treat otherData as raw data
    # - Scatterplots of raw data, where separate drop boxes have been used as inputs.
    # - Scatterplots of raw data, where a table has been used as an input.
    # - Scatterplots of raw data, where pasted data has been used as an input.
    # - Venn diagrams of JSON.
    # - Venn diagrams of multiple binary variables
    # - Histogram, Density, Bean, Violin, and Box plots of numeric variables
    # - Histogram, Density, Bean, Violin, and Box plots of an x and a y variable, where the histograms are conditional on the X.
    # - Aggregation by crosstabbing
    # - Sankey requires a data.frame
    # - means of multiple variables of raw data if aggregating
    ## Other things for the future...
    # - Taking the average of multiple numeric variables.
    # - Frequencies of multiple categorical variables (Pick One - Multi)

    #### This function does the following things:
    # 0. Checks if an input contains a subscripted Q Table and removes attributes
    #    if the Viz output was created before the release of Q Table subscripting.
    # 1. Converts the data inputs into a single data object called 'data'.
    # 2. Filters the data and/or removes missing values
    # 3. Aggregate the data if so required.
    # 4. Tailoring the data for the chart type.
    # 5. Transformations of the tidied data (sorting, transposing, removing rows).

    # This function needs to be frequently understood and generalized
    # by multiple people. Consequently, the goal has been to write the code in such a
    # way as to make it as easy to read and maintain as possible. In particular,
    # many obvious ways to make this code more efficent have been ignored in the interests
    # of making it easy to read (and in recognition that the efficiency gains would be trivial anyway).

    ###########################################################################
    # 0. Check subscripted QTables unclassed and attr removed for legacy outputs.
    ###########################################################################

    allow.qtable.fun <- get0("allowQTables", mode = "function", envir = .GlobalEnv, ifnotfound = function() FALSE)
    allow.qtables <- allow.qtable.fun() || get0("ALLOW.QTABLE.CLASS", ifnotfound = FALSE, envir = .GlobalEnv)

    if (!allow.qtables)
    {
        input.data.table <- unclassQTable(input.data.table)
        input.data.tables <- unclassQTable(input.data.tables)
        input.data.raw <- unclassQTable(input.data.raw)
        input.data.other <- unclassQTable(input.data.other)
    }

    ###########################################################################
    # 1. Converts the data inputs into a single data object called 'data'.
    ###########################################################################
    data.source.index <- if (is.null(data.source)) NULL else
        switch(data.source,
                "Link to a table" = 1,
                "Link to a table in 'Pages'" = 1,
                "Link to multiple tables" = 2,
                "Link to multiple tables in 'Pages'" = 2,
                "Link to a variable" = 3,
                "Link to a variable in 'Data'" = 3,
                "Link to variables" = 3,
                "Link to variables in 'Data'" = 3,
                "Question Type: Pick Any" = 3,
                "Variable Set: Binary - Multi" = 3,
                "Question Type: Number - Multi" = 3,
                "Variable Set: Numeric - Multi" = 3,
                "Type or paste in data" = 4,
                "Use an existing R Output" = 5,
                "Use an existing R Output in 'Pages'" = 5,
                "Link to questions" = 3,
                "Link to variable sets in 'Data'" = 3,
                "Link to a question" = 3,
                "Link to a variable in 'Data'" = 3,

                       { # Default
                           warning("'", data.source, "' is not a recognized data source.")
                           3
                       }
                )
    # Convert lists of NULLs into single NULLs.
    if (all(sapply(input.data.raw, is.null)))
        input.data.raw <- NULL
    # Ignore colors/sizes/labels if x and y are not supplied
    if (length(input.data.raw) >= 2 && all(vapply(input.data.raw[1:2], is.null, logical(1L))))
        input.data.raw <- NULL
    if (all(vapply(input.data.pasted, is.null, logical(1L))))
        input.data.pasted <- NULL
    # Check that there is no ambiguity regarding which input to use.
    checkNumberOfDataInputs(data.source.index, input.data.table, input.data.tables,
                            input.data.raw, input.data.pasted, input.data.other)
    # Assign the data to 'data'
    data <- processInputData(input.data.table, subset, weights)
    if (is.null(data))
        data <- input.data.tables
    if (is.null(data))
        data <- coerceToDataFrame(input.data.raw, chart.type)
    if (is.null(data))
        data <- input.data.other
    if (is.null(data))
        data <- processPastedData(input.data.pasted,
                                  warn = tidy,
                                  date.format, subset, weights)

    # Sanitize a data.frame containing a (likely subscripted) QTable
    if (is.data.frame(data) && any(qtable.elements <- vapply(data, IsQTable, logical(1L))))
    { # Prevent subscripting and avoid using table names as legend titles
        .sanitizeQTable <- function(x) {
            x <- unclass(x)
            if (!isScatter(chart.type))
                attr(x, "name") <- NULL
            x
        }
        data[qtable.elements] <- lapply(data[qtable.elements], .sanitizeQTable)
    }

    # Replacing variable names with variable/question labels if appropriate
    if (is.data.frame(data))
        names(data) <- if (show.labels) Labels(data) else Names(data)
    chart.title <- attr(data, "title")

    ###########################################################################
    # 2. Filters the data and/or removes missing values
    ###########################################################################
    if (isScatter(chart.type) && !is.null(input.data.raw) && containsQTable(input.data.raw))
        subset <- TRUE
    filt <- length(subset) > 1 && NROW(subset) == NROW(data)
    if (!is.null(input.data.raw) || filt || NROW(weights) == NROW(data))
    {
        missing <- if (chart.type %in% c("Venn", "Sankey") && !any(checkRegressionOutput(input.data.raw)))
            "Exclude cases with missing data" else "Use partial data"
        n <- NROW(data)
        if (invalid.joining <- !is.null(attr(data, "InvalidVariableJoining")))
        {
            if (!isDistribution(chart.type) && length(subset) > 1 || NROW(weights) > 1)
                warning("The variables have been automatically spliced together without ",
                        "any knowledge of which case should be matched with which. ",
                        "This may cause the results to be misleading.")
        }
        # As we can potentially use the variable in two different ways, we suppress the warning
        if (isScatter(chart.type))
        {
            # Make sure column names are unique otherwise TidyData will remove
            # them WITHOUT warning
            data <- suppressWarnings(TidyRawData(data, subset = subset,
                    weights = weights, missing = missing, error.if.insufficient.obs = FALSE,
                    remove.missing.levels = FALSE))
        }
        if (!isScatter(chart.type))
            data <- TidyRawData(data, subset = subset, weights = weights,
                        missing = missing, error.if.insufficient.obs = FALSE,
                        remove.missing.levels = isDistribution(chart.type))
        if (invalid.joining)
            attr(data, "InvalidVariableJoining") <- TRUE
        n.post <- NROW(data)
        if (missing == "Exclude cases with missing data" && n.post < n)
            warning("After removing missing values and/or filtering, ", n.post,
                    " observations remain.")
        weights <- setWeight(data, weights)
    }
    if (filt)
        attr(data, "assigned.rownames") <- FALSE


    ###########################################################################
    # 3. Aggregate the data if so required.
    ###########################################################################
    crosstab <- !(chart.type %in% c("Scatter", "Venn") || isDistribution(chart.type)) &&
                 (rawDataLooksCrosstabbable(input.data.raw) || group.by.last)
    if (is.null(first.aggregate))
        first.aggregate <- crosstab
    if ((chart.type %in% c("Scatter", "Venn") || isDistribution(chart.type)) &&
        first.aggregate)
    {
        warning("Data is not aggregated for this chart type.")
        first.aggregate <- FALSE
    }
    if (crosstab || first.aggregate)
    {
        #crosstab <- NCOL(data) == 2 || group.by.last
        if (crosstab && !is.null(attr(data, "InvalidVariableJoining")))
            warning("The variables being crosstabbed have different lengths; ",
                    "it is likely that the crosstab is invalid.")
        data <- aggregateDataForCharting(data, weights, chart.type,
                    crosstab, categorical.as.binary, as.percentages)
        if (crosstab)
            group.by.last <- TRUE
    }

    ###########################################################################
    # 4. Tailoring the data for the chart type.
    ###########################################################################
    multiple.tables <- isTableList(input.data.table) || isTableList(input.data.tables)
    data <- prepareForSpecificCharts(data, multiple.tables, input.data.raw, chart.type,
                                     weights, show.labels, scatter.mult.yvals)
    weights <- setWeight(data, weights)
    scatter.mult.yvals <- isTRUE(attr(data, "scatter.mult.yvals"))

    ###########################################################################
    # 5. Transformations of the tidied data (e.g., sorting, transposing, removing rows).
    ###########################################################################
    original.dim.names <- dimnames(data)
    original.is.multistat <- isQTableWithMultStatistic(data)
    if (isTRUE(transpose) && isScatter(chart.type))
    {
        warning("Data was not transposed. This option is incompatible with Scatter charts")
        transpose <- FALSE
    }

    # Add info about significance arrows - this needs to occur here
    # so that the stat testing info makes use of RearrangeRowsColumn
    has.statistics.testing.info <- !is.null(attr(input.data.table, "QStatisticsTestingInfo", exact = TRUE))
    if (has.statistics.testing.info && signif.append)
        data <- addStatTesting(data, attr(data, "QStatisticsTestingInfo"), signif.p.cutoffs,
                    signif.colors.pos, signif.colors.neg, signif.colors.on.font, signif.symbol, signif.symbol.size)
    if (has.statistics.testing.info && !signif.append)
        attr(data, "QStatisticsTestingInfo") <- NULL

    # Do not drop 1-column table to keep name for legend
    drop <- (tidy && (chart.type %in% c("Pie", "Donut") ||
            !any(nchar(select.columns), na.rm = TRUE) &&
            !any(nchar(column.labels), na.rm = TRUE)))
    data <- transformTable(data, chart.type, multiple.tables, tidy, drop,
                   is.raw.data = !is.null(input.data.raw) || !is.null(input.data.pasted) || !is.null(input.data.other),
                   hide.output.threshold, hide.values.threshold, hide.rows.threshold, hide.columns.threshold,
                   transpose, group.by.last || first.aggregate,
                   hide.empty.rows, hide.empty.columns, date.format)

    # Sort must happen AFTER tidying
    data <- RearrangeRowsColumns(data,
                                 multiple.tables =  multiple.tables,
                                 select.rows, first.k.rows, last.k.rows,
                                 select.columns, first.k.columns, last.k.columns,
                                 row.names.to.remove, column.names.to.remove, split,
                                 auto.order.rows, auto.order.columns,
                                 sort.rows, sort.rows.decreasing, sort.rows.column,
                                 sort.rows.exclude, reverse.rows,
                                 sort.columns, sort.columns.decreasing, sort.columns.row,
                                 sort.columns.exclude, reverse.columns)



    # Calculate percentages after all the select/hide operations are completed
    data <- convertPercentages(data, as.percentages, hide.percent.symbol, chart.type, multiple.tables)

    # Update QStatisticsTestingInfo to match data manipulations
    # This is not used by R-viz or PPT, only for Excel exporting
    if (!is.null(attr(input.data.table, "QStatisticsTestingInfo", exact = TRUE)) && signif.append)
        data <- updateQStatisticsInfo(data, original.dim.names, original.is.multistat, transpose)

    if (any(nchar(column.labels)))
        data <- replaceDimNames(data, 2, column.labels)
    if (any(nchar(row.labels)))
        data <- replaceDimNames(data, 1, row.labels)

    if (scatter.mult.yvals)
        data <- convertScatterMultYvalsToDataFrame(data, input.data.raw, show.labels, date.format)


    ###########################################################################
    # Finalizing the result.
    ###########################################################################


    if (tidy.labels)
        data <- tidyLabels(data, chart.type)
    if (isScatter(chart.type)) # to remove span NETS
        data <- RemoveRowsAndOrColumns(data,
                row.names.to.remove = row.names.to.remove,
                column.names.to.remove = column.names.to.remove, split = split)
    if (filt && !is.null(attr(subset, "label")) && !is.null(input.data.raw) && NCOL(data) == 1 &&
        chart.type %in% c("Table", "Area", "Bar", "Column", "Line", "Radar", "Palm", "Time Series"))
    {
        # Do not drop 1-column table (from aggregated data) to keep name for legend
        data <- CopyAttributes(as.matrix(data), data)
        colnames(data) <- attr(subset, "label")
        drop <- FALSE
    }
    data <- setAxisTitles(data, chart.type, drop, values.title)
    values.title <- attr(data, "values.title")
    categories.title <- attr(data, "categories.title")
    attr(data, "values.title") <- NULL
    attr(data, "categories.title") <- NULL
    if (multiple.tables)
    {
        for (i in seq_along(data))
        {
            attr(data[[i]], "values.title") <- NULL
            attr(data[[i]], "categories.title") <- NULL
            if (NCOL(data[[i]]) > 2)
                attr(data[[i]], "statistic") <- NULL
        }
    }
    if (isScatter(chart.type) && !is.null(input.data.raw))
        data <- rmScatterDefaultNames(data)
    if (scatter.mult.yvals)
        attr(data, "scatter.mult.yvals") <- TRUE

    # Do not re-assign scatter variable indices if it already
    # exists (this is sometimes set in ExtractChartData
    # for some S3 classes) unless specifically requested
    if (isScatter(chart.type) && !scatter.mult.yvals &&
        (is.null(attr(data, "scatter.variable.indices")) ||
         any(nchar(select.columns), na.rm = TRUE)))
        attr(data, "scatter.variable.indices") <- scatterVariableIndices(input.data.raw, data, show.labels)

    # This is a work around bug RS-3402
    # This is now fixed in Q 5.2.7+, but we retain support for older versions
    # by converting to a matrix if necessary
    if (chart.type == "Table" && !is.null(attr(data, "statistic")) &&
        (is.null(dim(data)) || length(dim(data)) == 1))
    {
        tmp <- attr(data, "statistic")
        data <- as.matrix(data)
        attr(data, "statistic") <- tmp
    }

    # Modify multi-stat QTables so they are 3 dimensional arrays
    # and statistic attribute from the primary statistic
    # This is needed to correctly export chart to powerpoint and
    # R GUI code checks the statistic attribute to determine axis formatting
    if (!tidy && is.array(data) && !is.null(attr(data, "questions")) &&
        is.null(attr(data, "statistic")))
    {
        qattr <- attr(data, "questions")
        data <- ConvertQTableToArray(data)
        attr(data, "questions") <- qattr
    }
    if (sort.rows)
        attr(data, "sorted.rows") <- TRUE
    if (!is.null(input.data.table))
        attr(data, "footerhtml") <- attr(input.data.table, "footerhtml", exact = TRUE)

    list(data = data,
         weights = weights,
         values.title = values.title,
         categories.title = categories.title,
         chart.title = chart.title,
         chart.footer = attr(data, "footer", exact = TRUE),
         scatter.variable.indices = attr(data, "scatter.variable.indices"))
}

replaceDimNames <- function(x, dim, labels)
{
    if (length(dim(x)) < dim)
        x <- CopyAttributes(as.matrix(x), x)

    new.labels <- paste0(dimnames(x)[[dim]], rep("", dim(x)[dim])) # get length right
    tmp.labels <- ConvertCommaSeparatedStringToVector(labels)
    tmp.len <- min(length(tmp.labels), length(new.labels))
    new.labels[1:tmp.len] <- tmp.labels[1:tmp.len]
    dimnames(x)[[dim]] <- new.labels
    return(x)
}


#' Handle input of table or tables
#' @noRd
#' @description This function allows a list of tables to be supplied
#'  via the \code{input.data.table} argument in the same way as
#'  \code{input.data.tables}.
#' @param x Input data which may be a matrix or list of matrix
unlistTable <- function(x)
{
    if (is.null(x))
        return(x)
    if (is.list(x) && !is.data.frame(x) && length(x) == 1)
        return(x[[1]])
    x
}

isTableList <- function(x)
{
    inherits(x, "list") && !is.data.frame(x) && is.list(x) && length(x) > 1 &&
    (is.matrix(x[[1]]) || is.data.frame(x[[1]]) || is.numeric(x[[1]]))
}

isScatter <- function(chart.type)
{
    grepl("Scatter|Bubble", chart.type)
}

#' @importFrom verbs Sum
crosstabOneVariable <- function(x, group, weights = NULL,
        categorical.as.binary = FALSE, as.percentages = FALSE)
{
    data <- data.frame(x = x, y = group)
    data$w <- if (is.null(weights)) rep.int(1L, NROW(data)) else weights

    if (is.numeric(x) || !categorical.as.binary)
    {
        data$x <- AsNumeric(data$x, binary = FALSE)
        if (!is.null(weights))
        {
            data$xw <- data$x * weights
            out <- Table(xw ~ y, data = data, FUN = sum) / Table(w ~ y, data = data, FUN = sum)

        } else
            out <- Table(x ~ y, data = data, FUN = mean)
        attr(out, "statistic") <- "Average"
        return(out)
    }
    out <- Table(w ~ x + y, data = data, FUN = sum)
    if (as.percentages)
    {
        out <- out / Sum(data$w * !is.na(data$x), remove.missing = FALSE) * 100
        attr(out, "statistic") <- "%"
    } else
        attr(out, "statistic") <- "Counts"
    out
}





#' Aggregrate Raw Data For Charting
#' @param data \code{data.frame} containing raw data
#' @param weights numeric vector of weights
#' @param chart.type character; type of chart to be plotted
#' @param crosstab Aggregate using a contingency table.
#' @param categorical.as.binary Whether to convert factors to indicator variables
#' @param as.percentages Whether to return percentages instead of counts.
#'     This is only used if the chart.type is "Heat". The difference between these
#'     calculations is this percentage uses the number of observations in the dataframe
#'     as the denomicator. For bar/column charts, it is computing row percentages.
#' @return aggregated data
#' @noRd
#' @importFrom flipStatistics Table WeightedTable
#' @importFrom flipTransformations AsNumeric
aggregateDataForCharting <- function(data, weights, chart.type, crosstab,
        categorical.as.binary, as.percentages)
{
    if (chart.type != "Heat")
        as.percentages <- FALSE

    # In tables that show aggregated tables, only the x-axis title is
    # taken from dimnames. But both names should be set in case
    # the table is transposed
    if (NCOL(data) == 1)
    {
        out <- as.matrix(WeightedTable(unlist(data), weights = weights))
        names(dimnames(out)) <- c(names(data)[1], "")
        attr(out, "statistic") <- "Count"
    }
    else if (crosstab)
    {
        if (is.null(categorical.as.binary))
            categorical.as.binary <- TRUE

        data <- as.data.frame(data)
        tmp.names <- names(data)
        k <- NCOL(data)
        group.var <- data[, k]

        if (k <= 2)
        {
            out <- crosstabOneVariable(data[, 1], group.var, weights,
                        categorical.as.binary, as.percentages)
            if (attr(out, "statistic") == "Average")
                attr(out, "categories.title") <- tmp.names[2]
            else
                names(dimnames(out)) <- tmp.names
        }
        else
        {
            res <- lapply(data[, -k], crosstabOneVariable, group = group.var,
                        weights = weights, categorical.as.binary = categorical.as.binary,
                        as.percentages = as.percentages)
            out <- do.call("rbind", res)

            if (chart.type == "Heat")
                names(dimnames(out)) <- c("", attr(group.var, "question", exact = TRUE))
            else
                names(dimnames(out)) <- c("", tmp.names[2])

            attr.list <- lapply(res, attr, "statistic", exact = TRUE)
            if (all(attr.list == attr.list[[1]]))
                attr(out, "statistic") <- setNames(attr.list[[1]], names(attr.list[1]))
        }
    }
    else # first.aggregate
    {
        if (is.null(categorical.as.binary))
            categorical.as.binary <- FALSE

        if (categorical.as.binary)
        {
            tmp.dat <- data
            tmp.names <- Names(data)
            tmp.numeric <- sapply(data, is.numeric)
        }
        if (is.data.frame(data))
            data <- AsNumeric(data, binary = categorical.as.binary)
        if (!is.null(weights))
        {
            xw <- sweep(data, 1, weights, "*")
            sum.xw <- apply(xw, 2, sum, na.rm = TRUE)
            w <- matrix(weights, nrow(data), ncol(data))
            w[is.na(data)] <- 0
            sum.w <- apply(w, 2, sum)
            out <- sum.xw / sum.w
        } else
           out <- apply(data, 2, mean, na.rm = TRUE)

        if (categorical.as.binary && any(!tmp.numeric))
        {
            ind <- which(!tmp.numeric)
            for (ii in ind)
            {
                tmp.pos <- grep(paste0("^", tmp.names[ii]), names(out))
                names(out)[tmp.pos] <- levels(tmp.dat[[ii]])
            }
        }
        out <- as.matrix(out)

        # If ANY of the variables have been converted to percentages
        # label 'statistic' attribute to prevent mixed summary
        # statistics from being summed
        if (categorical.as.binary && any(!tmp.numeric))
            attr(out, "statistic") <- "%"
        else if (!categorical.as.binary || all(tmp.numeric))
            attr(out, "statistic") <- "Average"
    }
    attr(out, "assigned.rownames") <- TRUE
    out
}

#' coerceToDataFrame
#'
#' @description Takes various formats of data (in particular, lists of variables and
#' data.frames, and forces them to become a data frame. Where the coercion
#' involves creating rows in the data frame that are unlikely to be from the same analysis unit, a warning
#' is provided.
#' @param x Input data which may be a list of variables or dataframe
#' @param chart.type For any value except \code{"Scatter"}, x$Y will be
#'      ignored if x$X contains more than one variable
#' @param remove.NULLs Logical; whether to remove null entries
#' @importFrom flipTables TidyTabularData
#' @return A \code{\link{data.frame}})
#' @importFrom stats sd
#' @importFrom flipChartBasics MatchTable
#' @importFrom flipFormat TidyLabels
#' @importFrom flipU MakeUniqueNames
coerceToDataFrame <- function(x, chart.type = "Column", remove.NULLs = TRUE)
{
    if (is.null(x))
        return(x)
    if (is.data.frame(x))
        return(x)
    if (is.list(x) && length(x) == 1 && is.matrix(x[[1]])) # List only contains a matrix
    {
        tmp.names <- getFullRowNames(x[[1]])
        x <- as.data.frame(x[[1]])
        rownames(x) <- tmp.names
        return(x)
    }
    if (is.character(x))
    {
        x <- TidyTabularData(x)
        rownames(x) <- getFullRowNames(x)
        return(as.data.frame(x))
    }

    # For plotting regression output in a scatterplot, coerce regression object to chart data
    if (any(reg.outputs <- checkRegressionOutput(x)) && isScatter(chart.type) && is.list(x))
    {
        if (reg.outputs[1])
            x[[1]] <- extractRegressionScatterData(x[[1]])
        if (reg.outputs[2])
        {
            # Always expect names attributes of the models or table to be passed by Q/Displayr
            # However, catch case where names arent provided in the Y element of input.data.raw
            reg.names <- if (!is.null(names(x[[2]]))) names(x[[2]]) else LETTERS[seq_along(x[[2]])]
            x[[2]] <- mapply(extractRegressionScatterData,
                             x = x[[2]], y.axis = TRUE, name = reg.names, SIMPLIFY = FALSE)
        }

    }


    # if labels are present in raw data, extract and store for later
    rlabels <- x$labels
    x$labels <- NULL

    # Dealing with situation where x$X is a list containing only one thing.
    if (is.list(x[[1]]) && length(x[[1]]) == 1)
        x[[1]] <- x[[1]][[1]]

    # For Scatterplot, y-coordinates are entered by a multi comboBox
    # Remove duplicates before rownames are messed up
    if (isScatter(chart.type) && length(x) >= 2 && is.list(x[[2]]))
    {
        if (length(x[[2]]) > 1 || NCOL(x[[2]][[1]]) > 1)
            names(x[[2]]) <- NULL
        for (i in seq_along(x[[2]]))
        {
            # Replace rownames to preserve rowspans and duplicated labels
            y.rnames <- getFullRowNames(x[[2]][[i]])
            if (!is.null(nrow(x[[2]][[i]])))
                rownames(x[[2]][[i]]) <- MakeUniqueNames(y.rnames)
            else
                names(x[[2]][[i]]) <- MakeUniqueNames(y.rnames)
        }
        # Remap all Y elements to common array and keep attributes
        if (!is.null(unlist(lapply(x[[2]], rownames))) && length(x[[2]]) >= 2 && any(reg.outputs))
        {
            y.all.rownames <- unique(unlist(lapply(x[[2]], getFullRowNames)))
            base.values <- rep(NA, length(y.all.rownames))
            x[[2]] <- lapply(seq_along(x[[2]]), function(i) {
                vals <- base.values
                indices <- match(names(x[[2]][[i]]), y.all.rownames, nomatch = 0)
                vals[indices] <- x[[2]][[i]]
                names(vals) <- y.all.rownames
                CopyAttributes(vals, x[[2]][[i]])
            })
        }
        x[[2]] <- data.frame(x[[2]], check.names = FALSE, check.rows = FALSE,
                             fix.empty.names = FALSE, stringsAsFactors = FALSE)
        ind.autonames <- grep("^structure\\(|^c\\(", colnames(x[[2]]), perl = TRUE)
        for (ii in ind.autonames)
        {
            tmp.name <- attr(x[[2]][, ii], "name")
            colnames(x[[2]])[ii] <- if (!is.null(tmp.name)) tmp.name else " "
        }
    }

    if (!isScatter(chart.type) && (length(x) == 1 && is.list(x) && (is.matrix(x[[1]]) || !is.atomic(x[[1]]))))
    {
        x <- x[[1]]
        if (is.null(rlabels) && !is.atomic(x))
        {
            rlabels <- x$labels
            x$labels <- NULL
        }
    }

    # Checking to see if all the elements of x are single variables.
    all.variables <- all(sapply(x, NCOL) == 1)
    # Remove entries in the list which are null
    if (remove.NULLs)
        x <- Filter(Negate(is.null), x)
    x.rows <- sapply(x, function(m) NROW(as.data.frame(m)))
    k <- length(x.rows)
    extra.cols <- NULL
    if (isScatter(chart.type))
    {
        # Trim Y if sizes or color variable is provided
        if (NCOL(x$Y) > 1 && (!is.null(x$Z1) || !is.null(x$Z2) || !is.null(x$groups)))
        {
            warning("Only the first column of '", scatterDefaultNames(2),
                    "' variables is used'")
            extra.cols <- x$Y[, -1, drop = FALSE]
            x$Y <- x$Y[, 1, drop = FALSE]
        }
        for (i in 1:k)
        {
            tmp.names <- getFullRowNames(x[[i]])
            if (!is.null(names(x)) && names(x)[i] != "Y" && NCOL(x[[i]]) > 1)
            {
                warning("Only the first column of '", scatterDefaultNames(i),
                        "' variables is used")
                x[[i]] <- x[[i]][, 1, drop = FALSE]
            }
            if (!is.null(nrow(x[[i]])))
                rownames(x[[i]]) <- tmp.names
        }
    }

    # Extracting variable names
    if (isScatter(chart.type))
        nms <- unlist(lapply(1:k, function(i) {
            if (length(dim(x[[i]])) < 2) tidyScatterDefaultNames(names(x)[i])
            else                         colnames(x[[i]]) }))
    else
        nms <- if (all.variables) names(x) else unlist(lapply(x, names)) # i.e. 'X', 'Y', 'labels'

    # Check for row names to match on
    x.all.rownames <- NULL
    removed.rownames <- NULL
    if (isScatter(chart.type) && length(x) > 1)
    {
        # Check for row names to match on
        x.all.rownames <- getFullRowNames(x[[1]])
        for (i in 2:k)
        {
            if (length(x.all.rownames) == 0)
                x.all.rownames <- getFullRowNames(x[[i]])
            else
            {
                tmp.names <- getFullRowNames(x[[i]])
                if (length(tmp.names) > 0)
                {
                    removed.rownames <- unique(c(setdiff(x.all.rownames, tmp.names),
                                                 setdiff(tmp.names, x.all.rownames)))
                    x.all.rownames <- intersect(x.all.rownames, tmp.names)
                }

            }
        }

        # This is only rearranging the tables into the right order/dimensions
        # Note that we don't use MergeTables because this forces tables into the same type
        if (length(x.all.rownames) > 0)
        {
            for (i in 1:k)
                x[[i]] <- MatchTable(x[[i]], ref.names = x.all.rownames,
                                as.matrix = FALSE, trim.whitespace = FALSE,
                                silent.remove.duplicates = TRUE)

            if (!is.null(extra.cols))
                extra.cols <- MatchTable(extra.cols, ref.names = x.all.rownames,
                                as.matrix = FALSE, trim.whitespace = FALSE,
                                silent.remove.duplicates = TRUE)

            if (length(x.all.rownames) < max(x.rows))
            {
                discarded.rows <- if (length(removed.rownames) == 0) NULL else {
                    paste0(": ", paste0(removed.rownames, collapse = ", "))
                }
                if (any(reg.outputs))
                    base.warning <- paste0("Y input coefficients that did not appear in the list of X input ",
                                           "coefficients were discarded")
                else
                {
                    # Suppress warnings when removed rows are named "NET"
                    # This happens often when inputs are BANNERS
                    if (length(removed.rownames) > 0)
                        removed.rownames <- removed.rownames[trimws(removed.rownames) != "NET"]
                    base.warning <- "Rows that did not occur in all of the input tables were discarded"
                }

                if (length(removed.rownames) > 0)
                    warning(base.warning, discarded.rows)
            }
            if (length(rlabels) > 0)
                warning("The 'Labels' variable has been ignored. Using row names of ",
                        "'X-coordinates' and 'Y-coordinates' instead")
            rlabels <- x.all.rownames
        }
    }

    if (any(reg.outputs) && length(x.all.rownames) == 0 && length(x) > 1)
    {
        x.names <- paste0(sQuote(names(x[[1]])), collapse = ", ")
        y.names <- paste0(sQuote(rownames(x[[2]])), collapse = ", ")
        stop("The X coordinate and Y coordinate inputs don't have any variables with matching names. ",
             "Please ensure that there is matching input for both the X and Y coordinate input. ",
             "The X coordinate input has names: ", x.names, ". ",
             "The Y coordinate input has names: ", y.names, ".")
    }

    num.obs <- sapply(x, NROW)
    if (isScatter(chart.type) && is.null(x.all.rownames) &&
        length(unique(num.obs[num.obs > 0])) > 1)
    {
        # If data is aggregated (e.g. the mean of each variable) then
        # the length can differ
        names(num.obs) <- sapply(names(num.obs), tidyScatterDefaultNames)
        ind.diff <- which(num.obs > 0 & num.obs != num.obs[1])
        stop("Variables for '", paste(names(num.obs)[ind.diff], collapse = "', '"),
            "' differ in length from variables for '", names(num.obs)[1], "'. ",
            "Check that all variables are from the same data set.")
    }

    # Splicing together elements of the input list if lengths vary
    # Note that elements of x can contain lists of variables
    invalid.joining <- FALSE
    if (!isScatter(chart.type) && (NCOL(x) > 1 || is.list(x) && length(x) > 1))
    {
        if (invalid.joining <- sd(x.rows) != 0)
        {
            k <- length(x.rows)
            out <- matrix(NA, max(x.rows), k)
            for (i in 1:k)
                out[1:x.rows[i], i] <- x[[i]]
            x <- out
        }
    }
    x <- data.frame(x, stringsAsFactors = FALSE, check.names = FALSE)
    names(x) <- MakeUniqueNames(nms)
    if (!is.null(extra.cols))
        x <- data.frame(x, extra.cols, stringsAsFactors = FALSE, check.names = FALSE)

    # Set rownames
    if (!is.null(rlabels) && nrow(x) == length(rlabels))
         rownames(x) <- MakeUniqueNames(as.character(rlabels))
    if (invalid.joining)
        attr(x, "InvalidVariableJoining") <- TRUE
    return(x)
}


isDistribution <- function(chart.type)
{
    grepl("Bean|Box|Histogram|Density|Violin", chart.type)
}

#' @importFrom flipStatistics ExtractChartData
#' @importFrom verbs FlattenQTable
processInputData <- function(x, subset, weights)
{
    if (is.null(x))
        return(x)

    if (length(subset) > 1)
    {
        msg <- paste("Filters have been applied to this visualization. They have been ignored.",
                "To apply filters you need to instead filter the source data that is being visualized.")
        tb.desc <- attr(x, "basedescription")
        if (is.null(tb.desc) || tb.desc$FilteredProportion == 0)
            warning(msg)
        else if ((mean(subset) * 100) + tb.desc$FilteredProportion != 100)
            warning(msg)
    }
    if (length(weights) > 0)
    {
        msg <- paste("Weights have been applied to this visualization. They have been ignored.",
                "To apply weights you need to instead weight the source data that is being visualized.")
        if (is.null(attr(x, "basedescription")) || is.null(attr(x, "weight.name")))
            warning(msg)
        else if (!isTRUE(attr(weights, "name") == attr(x, "weight.name")))
            warning(msg)
    }

    # Simplify input if only a single table has been specified
    if ("list" %in% class(x) && is.list(x) && !is.data.frame(x))
    {
        if (length(x) == 1)
            x <- x[[1]]
    }

    # Try to use S3 method to extract data
    x <- ExtractChartData(x)
    n.dim <- length(dim(x)) - isQTableWithMultStatistic(x)
    if (n.dim >= 2)
        x <- FlattenQTable(x)

    if (hasUserSuppliedRownames(x))
        attr(x, "assigned.rownames") <- TRUE

    return(x)
}


isQTableWithMultStatistic <- function(x)
{
    !is.null(attr(x, "questions")) && !is.null(attr(x, "name")) && is.null(attr(x, "statistic"))
}

processPastedData <- function(input.data.pasted, warn, date.format, subset, weights)
{
    if (length(subset) > 1)
        warning("Filters have been applied to this visualization. They have been ignored. ",
            "To apply filters you need to instead filter the source data that is being visualized.")
    if (length(weights) > 0)
        warning("Weights have been applied to this visualization. They have been ignored. ",
            "To apply weights you need to instead weight the source data that is being visualized.")

    us.format <- switch(date.format, US = TRUE, International = FALSE, Automatic = NULL, "No date formatting")
    want.data.frame <- length(input.data.pasted) > 1L && isTRUE(input.data.pasted[[2]])
    processed <- tryCatch(ParseUserEnteredTable(input.data.pasted[[1]],
                                  want.data.frame = want.data.frame,
                                  want.factors = FALSE, #input.data.pasted[[2]], #charts has no concept of factors
                                  want.col.names = input.data.pasted[[3]],
                                  want.row.names = input.data.pasted[[4]],
                                  us.format = us.format,
                                  warn = warn),
             error = function(e) {input.data.pasted[[1]]})
    if (!is.null(processed) && length(input.data.pasted) > 3)
        attr(processed, "assigned.rownames") <- input.data.pasted[[4]]
    if (!is.null(processed) && want.data.frame)
        attr(processed, "assigned.rownames") <- TRUE
    if (!is.null(attr(processed, "row.column.names")))
        names(dimnames(processed)) <- attr(processed, "row.column.names")
    return(processed)
}

#' @importFrom verbs Sum
checkNumberOfDataInputs <- function(data.source.index, table, tables, raw, pasted, other)
{
    data.provided <- !vapply(list(table, tables, raw, pasted, other), is.null, logical(1L))
    n.data <- sum(data.provided)
    if (n.data == 0)
        stop("No data has been provided.")
    else if (is.null(data.source.index))
    {
        if (n.data > 1)
            stop("There are ", n.data, " data inputs. One and only one data argument may be supplied.")

    } else if (!data.provided[data.source.index])
        stop("The data provided does not match the 'data.source.index'.")
}

# For error messages, etc
scatterDefaultNames <- function(i)
{
    return(switch(i,
         "X",
         "Y",
         "Sizes",
         "Colors",
         "Groups"))
}

tidyScatterDefaultNames <- function(x)
{
    return(switch(x,
        X = "X coordinates",
        Y = "Y coordinates",
        Z1 = "Sizes",
        Z2 = "Colors"))
}

rmScatterDefaultNames <- function(data)
{
    # Remove default names so they are not shown in the axis
    if (is.data.frame(data) && !is.null(colnames(data)))
    {
        if (colnames(data)[1] == "X coordinates")
            colnames(data)[1] <- " "
        if (NCOL(data) >= 2 && colnames(data)[2] == "Y coordinates")
            colnames(data)[2] <- "  "
    }
    return(data)
}


scatterVariableIndices <- function(input.data.raw, data, show.labels)
{
    # Use ExtractChartData to convert any raw Regression input
    if (any(reg.outputs <- checkRegressionOutput(input.data.raw)))
    {
        if (reg.outputs[1])
            input.data.raw[[1]] <- extractRegressionScatterData(input.data.raw[[1]])
        if (reg.outputs[2])
            input.data.raw[[2]] <- lapply(input.data.raw[[2]], extractRegressionScatterData, y.axis = TRUE)
    }

    # Creating indices in situations where the user has provided a table.
    len <- length(input.data.raw)
    indices <- c(x = 1,
                 y = 2,
                 sizes = if (NCOL(data) >= 3) 3 else NA,
                 colors = if (NCOL(data) >= 4) 4 else NA,
                 groups = NCOL(data))
    if (is.null(input.data.raw) || is.data.frame(input.data.raw) || is.list(input.data.raw) && len == 1)
        return(indices)

    .getColumnIndex <- function(i)
    {
        if (i > len)
            return(NA)
        if (raw.is.null[i])
            return(NA)
        ind <- cumsum(!raw.is.null)[i]
        lst <- input.data.raw[[i]]
        if (is.null(lst))
            return(NA)
        nms <- names(data)

        # If inputs are variables, match on label/variable name to avoid problems with duplicates
        # This should not be applied on tables which do not necessarily have unique names
        if (!is.null(attr(lst, "label")) && is.null(attr(lst, "questions")))
        {
            nm <- if (show.labels) Labels(lst) else Names(lst)
            if (is.null(nm) || length(nm) != 1)
                return(ind)
            pos <- match(nm, nms)
            if (!is.na(pos))
                return(pos)
        }
        return(ind)
    }


    # Indices corresponding to selections in input.raw.data
    raw.is.null <- sapply(input.data.raw, is.null)
    indices["x"] <- .getColumnIndex(1)
    indices["y"] <- .getColumnIndex(2)
    indices["sizes"] <- .getColumnIndex(3)
    indices["colors"] <- .getColumnIndex(4)
    indices["groups"] <- .getColumnIndex(5)
    indices
}

checkForNegPercent <- function(x)
{
    ind.negative <- which(x < 0)
    if (length(ind.negative) > 0)
    {
        warning("Percentages calculated ignoring negative values.")
        x[ind.negative] <- 0
    }
    return(x)
}


asPercentages <- function(data)
{
    if (length(dim(data)) == 2 && is.null(attr(data, "statistic")) &&
        length(attr(data, "questions")) == 2 && attr(data, "questions")[2] == "SUMMARY")
    {
        # 1-dimensional table with multiple statistics
        data[,1] <- checkForNegPercent(data[,1])
        data[,1] <- prop.table(data[,1]) * 100
    }
    else if (length(dim(data)) > 2)
    {
        # 2-dimensional table with statistics
        data[,,1] <- checkForNegPercent(data[,,1])
        if (NCOL(data) == 1)
            data[,,1] <- suppressWarnings(prop.table(data[,,1])) * 100
        else
            data[,,1] <- prop.table(suppressWarnings(TidyTabularData(data)), 1) * 100
        dimnames(data)[[3]][1] <- "%"
    }
    else if (NCOL(data) > 1)
    {
        # 2-dimensional table without statistics
        data <- checkForNegPercent(data)
        data <- prop.table(data, 1) * 100
        attr(data, "statistic") <- "Row %"
    }
    else
    {
        # 1-dimensional table without statistics
        data <- checkForNegPercent(data)
        data <- prop.table(data) * 100
        attr(data, "statistic") <- "%"
    }
    data
}

RearrangeRowsColumns <- function(data,
                                 multiple.tables,
                                 select.rows, first.k.rows, last.k.rows,
                                 select.columns, first.k.columns, last.k.columns,
                                 row.names.to.remove, column.names.to.remove, split,
                                 auto.order.rows, auto.order.columns,
                                 sort.rows, sort.rows.decreasing, sort.rows.column,
                                 sort.rows.exclude, reverse.rows,
                                 sort.columns, sort.columns.decreasing, sort.columns.row,
                                 sort.columns.exclude, reverse.columns)
{
    if (multiple.tables)
    {
        for(i in seq_along(data))
            data[[i]] = RearrangeRowsColumns(data[[i]], FALSE,
                                 select.rows, first.k.rows, last.k.rows,
                                 select.columns, first.k.columns, last.k.columns,
                                 row.names.to.remove, column.names.to.remove, split,
                                 auto.order.rows, auto.order.columns,
                                 sort.rows, sort.rows.decreasing, sort.rows.column,
                                 sort.rows.exclude, reverse.rows,
                                 sort.columns, sort.columns.decreasing, sort.columns.row,
                                 sort.columns.exclude, reverse.columns)
        return(data)
    }

    # Select first so that sorting only occurs in rows/columns of interest
    data <- SelectRows(data, select = select.rows)
    data <- SelectColumns(data, select = select.columns)

    if (auto.order.rows)
    {
        data <- try(AutoOrderRows(data))
        if (inherits(data, "try-error"))
            stop("Could not perform correspondence analysis on table. Try hiding empty rows.")
    }
    else if (sort.rows)
        data <- SortRows(data, sort.rows.decreasing, sort.rows.column, sort.rows.exclude)
    if (reverse.rows)
        data <- ReverseRows(data)

    if (auto.order.columns)
    {
        data <- try(AutoOrderColumns(data))
        if (inherits(data, "try-error"))
            stop("Could not perform correspondence analysis on table. Try hiding empty columns.")
    }
    else if (sort.columns)
        data <- SortColumns(data, sort.columns.decreasing, sort.columns.row, sort.columns.exclude)
    if (reverse.columns)
        data <- ReverseColumns(data)

    # Keep hidden rows/columns until after sorting
    # Sort is often performed on the NET values
    data <- RemoveRowsAndOrColumns(data,
                row.names.to.remove = row.names.to.remove,
                column.names.to.remove = column.names.to.remove, split = split)

    # Keep last to retain order from sorting
    data <- SelectRows(data, first.k = first.k.rows, last.k = last.k.rows)
    data <- SelectColumns(data, first.k = first.k.columns, last.k = last.k.columns)

}

#' @importFrom flipTables RemoveRowsAndOrColumns HideEmptyRows HideEmptyColumns
#' @importFrom flipTime AsDate AsDateTime IsDateTime
#' @importFrom flipU CopyAttributes
#' @importFrom verbs Sum
transformTable <- function(data,
                           chart.type,
                           multiple.tables,
                           tidy,
                           drop,
                           is.raw.data,
                           hide.output.threshold,
                           hide.values.threshold,
                           hide.rows.threshold, hide.columns.threshold,
                           transpose,
                           first.aggregate,
                           hide.empty.rows, hide.empty.columns,
                           date.format,
                           table.counter = 1)
{
    if (multiple.tables)
    {
        for (i in seq_along(data))
            data[[i]] = transformTable(data[[i]],
                                       chart.type,
                                       FALSE,
                                       FALSE,
                                       FALSE,
                                       is.raw.data,
                                       0, 0, 0, 0, # sample size not used
                                       transpose,
                                       first.aggregate,
                                       hide.empty.rows, hide.empty.columns,
                                       date.format,
                                       i)
        return(data)
    }

    if (hide.empty.rows)
        data <- if (isListOrRaggedArray(data)) lapply(data, HideEmptyRows)
                else HideEmptyRows(data)

    if (hide.empty.columns)
    {
        if (isScatter(chart.type))
            old.names <- colnames(data)
        data <- if (isListOrRaggedArray(data)) lapply(data, HideEmptyColumns)
                else HideEmptyColumns(data)
    }

    # Switching rows and columns
    # This is the first operation performed to ensure that both
    # hide.rows.threshold and row.names.to.remove refer to rows AFTER tranposing
    if (isTRUE(transpose))
    {
        if (length(dim(data)) > 2)
            new.data <- aperm(data, c(2, 1, 3))
        else
            new.data <- t(data)
        data <- CopyAttributes(new.data, data)
        attr(data, "questions") <- rev(attr(data, "questions"))
    }

    # Checking sample sizes (if available)
    # This needs to happen after row/columns have been (de)selected
    if (any(as.integer(hide.output.threshold), na.rm = TRUE))
        data <- HideOutputsWithSmallSampleSizes(data, hide.output.threshold)
    if (any(as.integer(hide.values.threshold), na.rm = TRUE))
        data <- HideValuesWithSmallSampleSizes(data, hide.values.threshold)
    if (any(as.integer(hide.rows.threshold), na.rm = TRUE))
        data <- HideRowsWithSmallSampleSizes(data, hide.rows.threshold)
    if (any(as.integer(hide.columns.threshold), na.rm = TRUE))
        data <- HideColumnsWithSmallSampleSizes(data, hide.columns.threshold)

    # Set axis names before dropping dimensions (but AFTER transpose)
    data <- setAxisTitles(data, chart.type, drop)
    if (chart.type == "Scatter" && is.null(dim(data)))
    {
        tmp.names <- names(data)
        dim(data) <- c(length(data), 1)
        if (!is.null(tmp.names))
            rownames(data) <- tmp.names
    }

    # Convert to matrix to avoid state names from being turned into numeric values
    # when TidyTabularData is called
    if (gsub(" ", "", chart.type) == "GeographicMap" && is.data.frame(data))
        data <- CopyAttributes(as.matrix(data), data)

    # This must happen after sample sizes have been used
    # (only first statistic is retained after tidying)
    if (tidy && !chart.type %in% c("Venn", "Sankey", "Heat") &&
        !isScatter(chart.type) && !isDistribution(chart.type))
            data <- tryCatch(TidyTabularData(data), error = function(e) { data })


    if (!grepl("^No date", date.format) && date.format != "Automatic")
    {
        input.us.format <- !grepl("International", date.format)
        output.format.str <- if (!grepl("International", date.format)) "%b %d %Y" else "%d %b %Y"
        if (!is.null(rownames(data)) && IsDateTime(rownames(data)))
        {
            tmp.dates <- try(suppressWarnings(AsDate(rownames(data), us.format = input.us.format)), silent = TRUE)
            if (inherits(tmp.dates, "try-error"))
                tmp.dates <- suppressWarnings(AsDate(rownames(data)))
            rownames(data) <- format(tmp.dates, output.format.str)
        }
        else if (IsDateTime(names(data)))
        {
            tmp.dates <- try(suppressWarnings(AsDate(names(data), us.format = input.us.format)), silent = TRUE)
            if (inherits(tmp.dates, "try-error"))
                tmp.dates <- suppressWarnings(AsDate(names(data)))
            names(data) <- format(tmp.dates, output.format.str)
        }
    }
    return(data)
}

convertPercentages <- function(data, as.percentages, hide.percent.symbol, chart.type,
                               multiple.tables, table.counter = 1)
{
    if (multiple.tables)
    {
        for (i in seq_along(data))
            data[[i]] <- convertPercentages(data[[i]], as.percentages, hide.percent.symbol,
                            chart.type, FALSE, i)
        return(data)
    }

    ### If data is already percentages in Qtable then divide by 100
    ### Note that R outputs and pasted data will already be in decimals
    #stat <- attr(data, "statistic")
    #qst <- attr(data, "questions")
    #if (!is.null(stat) && !is.null(qst) && grepl("%)?$", stat))
    #    data <- data / 100

    # Convert to percentages - this must happen AFTER transpose and RemoveRowsAndOrColumns
    if (as.percentages && chart.type != "Venn")
    {
        percentages.warning <- paste0("The data has not been converted to percentages/proportions. ",
        "To convert to percentages, first convert to a more suitable type (e.g., create a table).")
        if (!is.numeric(data) && !is.data.frame(data) &&
            (is.null(attr(data, "questions")) || chart.type %in% c("Pie", "Donut", "Heat")))
            warning(percentages.warning)
        else if (chart.type %in% c("Pie", "Donut"))
        {
            data <- data / Sum(data) * 100
            attr(data, "statistic") <- "%"
        }
        else if (chart.type == "Heat" && isTRUE(grepl("%$", attr(data, "statistic"))))
            data <- data
        else
            data <- asPercentages(data) # converts character QTables to numeric

        if (isTRUE(attr(data, "values.title") == "n") || isTRUE(attr(data, "values.title") == "Count"))
            attr(data, "values.title") <- "%"
    }

    if (hide.percent.symbol)
    {
        if (isTRUE(grepl("%", attr(data, "statistic"))))
            attr(data, "statistic") <- "Percent"
        else if (!is.null(attr(data, "questions")) && !is.null(attr(data, "name")) &&
                  is.null(attr(data, "statistic")))
        {
            dlen <- length(dim(data))
            primary.stat <- dimnames(data)[[dlen]][1]
            if (grepl("%", primary.stat))
                dimnames(data)[[dlen]][1] <- gsub("%", "Percent", primary.stat)
        }
    }
    return(data)
}

#' @importFrom flipTables TidyTabularData
#' @importFrom flipTransformations AsNumeric
#' @importFrom flipU MakeUniqueNames
#' @importFrom verbs SumEachRow
prepareForSpecificCharts <- function(data,
                                     multiple.tables,
                                     input.data.raw,
                                     chart.type,
                                     weights,
                                     show.labels,
                                     scatter.mult.yvals)
{
    if (!isDistribution(chart.type) && chart.type != "Table" && !is.null(input.data.raw) &&
        is.list(input.data.raw$X) && length(input.data.raw$X) > 10 && !inherits(input.data.raw$X, "Regression"))
        warning("With a large number of variables, it may be better to first create ",
                 "a table and then create a visualization using the table.")

    # Multiple tables
    if (multiple.tables)
    {
        data <- lapply(data, TidyTabularData)
        # flipStandardCharts::Scatterplot takes an array input, with column numbers indicating how to plot.
        if (isScatter(chart.type))
            attr(data, "scatter.variable.indices") <- c(x = 1, y = 2, sizes = 3, colors = 4)
    }
    else if (chart.type == "Table" || chart.type == "Heat")
    {
        # Do nothing
    }
    else if (chart.type == "Venn")
    {
        missing.data.rows <- SumEachRow(as.matrix(is.na(data))) > 0
        if (any(missing.data.rows))
        {
            data <- data[!missing.data.rows, ]
            warning(Sum(missing.data.rows), " case(s) with missing data have been removed.")
        }
    }
    else if (chart.type == "Sankey")
    {
        data <- coerceToDataFrame(data)
    }
    # Scatterplots
    else if (isScatter(chart.type))
    {
        if (isTRUE(scatter.mult.yvals) ||
            (is.list(input.data.raw$Y) && length(input.data.raw$Y) > 1))
        {
            # Tag data for reformatting but this is preformed later after
            # Row/column manipulations
            attr(data, "scatter.mult.yvals") <- TRUE

        } else if (NCOL(input.data.raw$Y[[1]]) > 1 && is.null(input.data.raw$Z1) &&
            is.null(input.data.raw$Z2) && is.null(input.data.raw$groups))
        {
            if (!(isQTableWithMultStatistic(input.data.raw$Y[[1]]) &&
                  length(dim(input.data.raw$Y[[1]])) < 3))
                attr(data, "scatter.mult.yvals") <- TRUE

            if (isQTableWithMultStatistic(input.data.raw$Y[[1]]))
            {
                if (length(dim(input.data.raw$Y[[1]])) < 3)
                    attr(data, "ycol") <- 1
                else
                    attr(data, "ycol") <- NCOL(input.data.raw$Y[[1]])
            }

        } else
        {
            if (!is.data.frame(data) && !is.matrix(data))
                data <- TidyTabularData(data)

            # Removing duplicate columns
            if (length(dim(data)) == 2 && any(d <- duplicated(names(data))))
                data <- data[, !d]

            # flipStandardCharts::Scatterplot takes an array input, with column numbers indicating how to plot.
            if (is.null(attr(data, "scatter.variable.indices")))
                attr(data, "scatter.variable.indices") <- scatterVariableIndices(input.data.raw, data, show.labels)
        }
    }
    # Charts that plot the distribution of raw data (e.g., histograms)
    else if (isDistribution(chart.type))
    {
        # input.data.raw could be NULL and the result below be a logical of zero length.
        if (is.null(input.data.raw))
            input.data.raw <- list(NULL)
        len <- Sum(!vapply(input.data.raw, is.null, FALSE))
        if (len > 1L)  # variables from multiple GUI controls
        {
            if (NCOL(input.data.raw[[1]]) > 1 && (NCOL(input.data.raw[[2]]) == 1 || len > 2))
                stop("If using a grouping variable, you may only have one additional variable.")
            # Splitting the first variable by the second
            else if (#!is.null(input.data.raw[[2]]) &&
                NCOL(input.data.raw[[1]]) == 1 && NCOL(input.data.raw[[2]]) == 1)
            {
                if (!is.null(weights))
                    weights <- SplitVectorToList(weights, data[[2]])
                data <- SplitVectorToList(data[[1]], data[[2]])
                attr(data, "weights") <- weights
            }
        }
        else # Coercing data to numeric format, if required
            data <- AsNumeric(data, binary = FALSE)
        #if (!is.list(data))
        #    data <- list(data)
    }
    else
    {
        # Set rownames before TidyTabularData so that factor are not converted to numeric
        tmp.stat <- attr(data, "statistic")
        data <- useFirstColumnAsLabel(data,
            allow.numeric.rownames = chart.type %in% c("Area", "Bar", "Column", "Line", "Stream"))
        attr(data, "statistic") <- tmp.stat
    }
    data
}


setWeight <- function(x, weights)
{
    if (!is.null(w <-  attr(x, "weights")))
        return(w)
    weights
}

#' Check for object of class list or a \emph{ragged} array
#' @noRd
isListOrRaggedArray <- function(x)
    inherits(x, "list") || (inherits(x, "array") && !inherits(x, "ts") &&
                            !all(vapply(x, length, 1L) == 1))


#' @noRd
useFirstColumnAsLabel <- function(x, remove.duplicates = TRUE,
    allow.numeric.rownames = TRUE, allow.duplicate.rownames = TRUE)
{
    if (length(dim(x)) != 2 || ncol(x) == 1)
        return(x)
    if (NROW(x) == 1) # single row input
        return(x)
    if (hasUserSuppliedRownames(x))
        return(x)

    if (!allow.numeric.rownames && is.numeric(x[,1]))
        return(x)

    # Catch Q Tables which have numeric row names but are
    # not raw data tables. It is not appropriate to use
    # the first column as a label in this case because
    # it contains a statistic.
    if (allow.numeric.rownames
        && IsQTable(x)
        && !isRawDataQTable(x))
        return(x)

    # What to do with duplicate rownames?
    ind.dup <- duplicated(x[,1])

    # Duplicated numeric vectors are most likely data variables, not rownames
    if (any(ind.dup) && is.numeric(x[,1]))
        return(x)

    # For duplicated character vectors, we remove duplicates
    if (any(ind.dup))
    {
        if (!allow.duplicate.rownames) # scatterplot
        {
            warning("First column was not used as labels ",
                    "because it contains duplicated values: ",
                    paste(unique(x[ind.dup,1]), collapse=", "))
            return(x)
        }

        # If too many duplicates, then assume it is not expected to be a rowname
        # The exception is when the rownames are QDates
        is.date <- is.factor(x[,1]) &&
            all(!is.na(suppressWarnings(AsDate(levels(x[,1]), on.parse.failure = "silent"))))
        if ((!is.date) && mean(ind.dup, na.rm = T) > 0.9) # too many duplicates
            return(x)
        wmsg <- if (IsDateTime(x[,1])) ". Check aggregation level of date variable '"
                else                   ". Consider aggregating on '"

        warning("Duplicated entries in '", colnames(x)[1], "': ",
            paste(unique(x[ind.dup,1]), collapse = ", "),
            wmsg, colnames(x)[1], "'.")
        if (remove.duplicates)
        {
            warning("Only the first unique entry is shown.")
            x <- x[!ind.dup, ]
        }
        else
            return(x)
    }
    if (inherits(x[,1], 'Date') || inherits(x[,1], 'POSIXct') ||
        inherits(x[,1], 'POSIXlt') || inherits(x[,1], 'POSIXt'))
        r.tmp <- format(x[,1], "%b %d %Y")
    else if (is.factor(x[,1])) # QDates are also factors
        r.tmp <- make.unique(as.character(x[,1]))
    else
        r.tmp <- make.unique(as.character(x[,1]))

    is.missing <- is.na(r.tmp)
    if (any(is.missing))
        warning("Rows ", paste(which(is.missing), collapse = ","),
                " have been omitted because of missing values.")
    ind <- which(!is.missing)

    c.title <- colnames(x)[1]
    c2.title <- if (NCOL(x) == 2) colnames(x)[2]
    x <- x[ind, -1, drop = FALSE]
    rownames(x) <- r.tmp[ind]
    attr(x, "categories.title") <- c.title
    if (!is.null(c2.title))
        attr(x, "values.title") <- c2.title
    return(x)
}

setAxisTitles <- function(x, chart.type, drop, values.title = "")
{
    if (isScatter(chart.type))
    {
        # Charting functions will automatically use column names
        attr(x, "categories.title") <- ""
        attr(x, "values.title") <- ""

    } else if (chart.type == "Heat")
    {
        # No default axis labels for summary tables
        # Because it depends on the question type used to create the table
        if (length(attr(x, "questions")) == 2 &&
            "SUMMARY" %in% attr(x, "questions"))
        {
            attr(x, "categories.title") <- ""
            attr(x, "values.title") <- ""
        }

        if (is.null(attr(x, "categories.title")))
            attr(x, "categories.title") <- names(dimnames(x))[2]
        if (is.null(attr(x, "categories.title")))
            attr(x, "categories.title") <- attr(x, "questions")[2]

        if (is.null(attr(x, "values.title")))
            attr(x, "values.title") <- names(dimnames(x))[1]
        if (is.null(attr(x, "values.title")))
            attr(x, "values.title") <- attr(x, "questions")[1]
    } else
    {
        # Extract categories.title from aggregated data
        if (is.null(attr(x, "categories.title")))
            attr(x, "categories.title") <- names(dimnames(x))[1]
        # Extract categories.title from Qtables
        if (is.null(attr(x, "categories.title")) && !is.null(attr(x, "questions")))
            attr(x, "categories.title") <- attr(x, "questions")[1]
        if (!is.null(attr(x, "statistic")) && grepl("%$", attr(x, "statistic")))
            attr(x, "values.title") <- "%"
        else if (!is.null(attr(x, "statistic")) && grepl("Percent", attr(x, "statistic")))
            attr(x, "values.title") <- ""
        else if (any(nchar(attr(x, "statistic"))))
            attr(x, "values.title") <- attr(x, "statistic")
        if (is.null(attr(x, "values.title")) && length(dimnames(x)) == 3)
            attr(x, "values.title") <- dimnames(x)[[3]][1]
    }
    if (sum(nchar(values.title)) > 0)
        attr(x, "values.title") <- values.title
    if (is.null(attr(x, "values.title")))
        attr(x, "values.title") <- ""
    if (drop && !is.data.frame(x) && !chart.type %in% c("Scatter", "Heat"))
    {
        # only drop 1 dimension from a 2d matrix
        if (length(dim(x)) == 2 && (dim(x)[2] == 1 || dim(x)[1] == 1)) {
            if (dim(x)[2] == 1) {
                tmp.vec <- x[, 1]
                names(tmp.vec) <- rownames(x)
            }
            else if (dim(x)[1] == 1) {
                tmp.vec <- x[1, ]
                names(tmp.vec) <- colnames(x)
            }
            attr(tmp.vec, "statistic") <- attr(x, "statistic")
            attr(tmp.vec, "questions") <- attr(x, "questions")
            attr(tmp.vec, "categories.title") <- attr(x, "categories.title")
            attr(tmp.vec, "values.title") <- attr(x, "values.title")
            x <- tmp.vec
        }
        else
            x <- CopyAttributes(drop(x), x)
    }
    x
}

getFullRowNames <- function(x)
{
    if (!is.null(attr(x, "span")))
        return(apply(attr(x, "span")$rows, 1, paste, collapse = " - "))
    else if (!is.null(nrow(x)) && hasUserSuppliedRownames(x))
        return(MakeUniqueNames(rownames(x)))
    else if (!is.list(x) && is.null(nrow(x)))
        return(MakeUniqueNames(names(x)))
    else if (is.list(x) && length(x) == 1)
        return(getFullRowNames(x[[1]]))
    else
        return(NULL)
}


#' Helps tidy Q variables and tables
#' @description Inputs supplied via input.data.raw can be in a range of
#'  formats. This function does a minimal job of checking for attribute
#'  and using these as names when appropriate. Currently, it does
#'  two functions. (1) Returns the span instead of the values and
#'  (2) assigns column names to 1-dimensional Q tables. Inputs which
#'  cannot be safely converted to a matrix (e.g. date/time or factors)
#'  are returned as is without any changes.
#'
#' @param x Q table or variable
#' @param use.span Logical; Whether the span categories should be returned
#'  instead of the values in the table. Row names will be preserved.
#'  A warning will be given if this option is selected but no span
#'  attribute is found in \code{x}.
#' @param show.labels This option is only relevant for Q variables.
#'   For tables, the resulting variable will always be named by
#'   by the 'name' attribute, but for variables both the 'label' and
#'   'name' attribute can be used.
#' @param is.scatter.annot.data This condition is applied to input
#'   data expected to be used for annotation data for scatterplots.
#'   it checks that the data is one-dimensional and stops immediately
#'   and gives an error if this condition is not met. This avoid
#'   some nonsense output or misleading error messages that might
#'   be given by PrepareData.
#'
#' @export
PrepareForCbind <- function(x, use.span = FALSE, show.labels = TRUE,
                        is.scatter.annot.data = FALSE)
{
    if (is.null(x))
        return(x)
    if (is.scatter.annot.data && NCOL(x) > 1)
        stop("Annotation data for Scatterplots should be a single-column table ",
             "or variable with the same number of values as the number of ",
             "points in the chart")

    allow.qtables <- get0("ALLOW.QTABLE.CLASS", ifnotfound = FALSE, envir = .GlobalEnv)

    if (!allow.qtables)
        x <- unclassQTable(x)

    if (use.span && is.null(attr(x, "span")))
        warning("Spans were not used as this attribute was not found in the data.")

    new.dat <- NULL
    if (inherits(x, c("POSIXct", "POSIXt", "Date")) || is.factor(x))
    {
        # For variables, this function is not really required
        # and for non-atomic types it results in info being lost
        new.dat <- data.frame(x)

    } else if (use.span && !is.null(attr(x, "span")))
    {
        # Q tables can always be converted to a matrix
        new.dat <- as.matrix(attr(x, "span")$rows[, 1])
        rownames(new.dat) <- if (!is.null(rownames(x))) rownames(x) else names(x)

        # Assign a blank name, so this column is not
        # accidentally used for another variable
        # The space is needed to avoid ugly R defaults
        colnames(new.dat) <- " "
        new.dat <- CopyAttributes(new.dat, x)
        return(new.dat)
    }
    else if (!is.list(x))
    {
        # Avoid trying to convert complex data structures
        # including dataframes which might have different types
        new.dat <- as.matrix(x)

    } else
        new.dat <- x

    # Multi-column tables are generally already correctly named
    if ((is.data.frame(x) || !is.list(x)) && ncol(new.dat) == 1)
    {
        if (!is.null(attr(x, "label")) && show.labels)     # x is a variable
            colnames(new.dat) <- Labels(x)
        else if (!is.null(attr(x, "name")) && length(Names(x)) == 1) # x is a table or a variable
            colnames(new.dat) <- Names(x)
        else
            colnames(new.dat) <- " "
    }
    CopyAttributes(new.dat, x)
}



rawDataLooksCrosstabbable <- function(input.data.raw, data)
{
    if (is.null(input.data.raw))
        return(FALSE)
    if (is.null(input.data.raw))
        return(FALSE)
    not.nulls <- !vapply(input.data.raw, is.null, logical(1L))
    if (length(not.nulls) == 1)
        return(FALSE)
    if (!not.nulls[1] || !not.nulls[2])
        return(FALSE)
    if (length(not.nulls) > 2)
    {
        if (Sum(not.nulls) != 2)
            return(FALSE)
        input.data.raw <- input.data.raw[1:2]
    }
    nms <- names(input.data.raw)
    ncols <- vapply(input.data.raw, NCOL, integer(1L))
    if (any(ncols != 1))
        return(FALSE)
    return(all(nms == c("X", "Y")))
}

hasUserSuppliedRownames <- function(data)
{
    if (is.null(rownames(data)))
        return(FALSE)
    tmp <- attr(data, "assigned.rownames")
    if (isTRUE(tmp))
        return(TRUE)
    if (!is.null(tmp) && !tmp)
        return(FALSE)
    if (length(dim(data)) < 2 && is.null(names(data)))
        return(FALSE)

    # Default row names
    rnames <- gsub("Row ", "", rownames(data))
    if (all(rnames == as.character(1:nrow(data))))
        return(FALSE)

    return(TRUE)
}


#' @importFrom utils tail
tidyLabels <- function(data, chart.type)
{
    tmp <- NULL
    vertical.chart <- isDistribution(chart.type) || chart.type == "Venn"
    if (length(dim(data)) >= 2)
    {
        orig.names <- if (vertical.chart) colnames(data)
                      else                rownames(data)
        if (!IsDateTime(orig.names))
        {
            tmp <- ExtractCommonPrefix(orig.names)
            if (!is.na(tmp$common.prefix))
            {
                warning(sprintf("'%s' has been removed from labels. To turn off de-select 'DATA MANIPULATION > Tidy labels'", tmp$common.prefix))
                if (vertical.chart)
                    colnames(data) <- tmp$shortened.labels
                else
                {
                    rownames(data) <- tmp$shortened.labels
                    if (is.null(attr(data, "categories.title")))
                        attr(data, "categories.title") <- tmp$common.prefix
                }
            }
        }
    }
    else if (!is.null(names(data))) # lists and vectors
    {
        if (!IsDateTime(names(data)))
        {
            tmp <- ExtractCommonPrefix(names(data))
            if (!is.na(tmp$common.prefix))
            {
                warning(sprintf("'%s' has been removed from labels. To turn off de-select 'DATA MANIPULATION > Tidy labels'", tmp$common.prefix))
                names(data) <- tmp$shortened.labels
                if (is.null(attr(data, "categories.title")))
                    attr(data, "categories.title") <- tmp$common.prefix
            }
        }
    }

    # Remove span labels
    if (isScatter(chart.type) &&  !is.null(rownames(data)) &&
        all(grepl(" - ", rownames(data), fixed = TRUE)))
    {
        rownames(data) <- MakeUniqueNames(sapply(rownames(data),
            function(x) tail(strsplit(x, " - ")[[1]], n = 1)))
    }
    data
}


checkRegressionOutput <- function(x)
{
    # First element always a single element
    # Second element is a list of elements
    # Last four elements are Z1, Z2, groups and labels that should never be regression outputs
    return(c(inherits(x$X, "Regression"), any(sapply(x$Y, function(e) inherits(e, "Regression")))))
}

#' @importFrom flipFormat TidyLabels
extractRegressionScatterData <- function(x, y.axis = FALSE, name = NULL)
{
    if (!inherits(x, "Regression"))
        return(x)
    chart.data <- ExtractChartData(x)
    if (!is.null(x$importance))
        names(chart.data) <- TidyLabels(names(chart.data))
    if (y.axis)
    {
        chart.data <- as.array(chart.data)
        attr(chart.data, "name") <- name
    }
    return(chart.data)
}


# This function is used when scatter.mult.yvals = TRUE
# It converts the input data frame which a data series in each column
# into a the standard input format, where the data series
# is indicated by the value in the "Groups" column
# If x-coordinates are supplied in input.data.raw$X, then
# rownames attached to the x-coordinates will be used as
# rownames of the resulting data frame
# Otherwise, the rownames of data will be used as the
# x-coordinates and the rownames of the output data
# will be blank (with spaces as padding for uniqueness)
# This function also updates the attribute "scatter.variable.indices"
# to describe the format of the output data frame

convertScatterMultYvalsToDataFrame <- function(data, input.data.raw, show.labels, date.format)
{
    data.row.labels <- rownames(data)
    n <- nrow(data)
    if (any(reg.outputs <- sapply(input.data.raw$Y, function(e) inherits(e, "Regression"))))
    {
        extracted.data.raw.Y <- input.data.raw$Y
        extracted.data.raw.Y[reg.outputs] <- lapply(input.data.raw$Y[reg.outputs], ExtractChartData)
        regression.names <- names(input.data.raw$Y)
        idx <- which(reg.outputs)
        for(i in seq_along(idx))
            attr(extracted.data.raw.Y[[idx[i]]], "label") <- regression.names[idx[i]]
        y.names <- if (show.labels) Labels(extracted.data.raw.Y) else Names(extracted.data.raw.Y)
    } else
        y.names <- if (show.labels) Labels(input.data.raw$Y) else Names(input.data.raw$Y)

    # Figure out which columns to use as the X and Y coordinates
    if (is.list(input.data.raw$Y) && is.null(input.data.raw$X))
    {
        # No X-coordinates supplied in variables
        m <- length(input.data.raw$Y)
        y.ind <- 1:m
        xvar <- rep(1:n, m)

    } else if (is.null(input.data.raw$Y) && hasUserSuppliedRownames(data))
    {
        # Use rowlabels as X-coordinate if character labels given
        m <- ncol(data)
        if (!is.null(attr(data, "ycol")))
            m <- attr(data, "ycol")
        y.ind <- 1:m
        xvar <- rep(rownames(data), m)
        data.row.labels <- rep("", nrow(data))
    } else
    {
        # Otherwise use first column as X-coordinates
        m <- ncol(data) - 1
        if (!is.null(attr(data, "ycol")))
            m <- attr(data, "ycol") - 1
        y.ind <- (1:m) + 1
        xvar <- rep(data[,1], m)
    }

    if (!hasUserSuppliedRownames(data))
        data.row.labels <- rep("", nrow(data))
    if (length(y.names) < m)
        y.names <- colnames(data)[y.ind]
    if (length(y.names) < m)
        y.names <- paste("Group", 1:m)
    if (any(checkRegressionOutput(input.data.raw)) && length(y.names) >= m)
        y.names <- colnames(data)[y.ind]

    # Data from other statistics is restructured and appended separately
    extravar <- NULL
    if (!is.null(attr(data, "ycol")))
    {
        # Other statistics are in the rest of input.data.raw$Y[[1]]
        # But we need to take from data because we may have removed row/cols
        yvar <- as.vector(unlist(data[,y.ind]))
        y.names <- dimnames(input.data.raw$Y[[1]])[[2]]
        tmp.ind <- charmatch(y.names, colnames(data)[y.ind])
        y.names <- y.names[!is.na(tmp.ind)]
        y.names.patt <- paste(paste0("\\Q", y.names, "\\E"), collapse = "|")
        stat.names <- dimnames(input.data.raw$Y[[1]])[[3]]
        extravar <- matrix(NA, nrow = length(yvar), ncol = length(stat.names) - 1)

        for (i in 2:length(stat.names))
        {
            stat.names.patt <- paste0("\\Q.", stat.names[i], "\\E$") # make patt strict (e.g 'p')!
            tmp.ind <- intersect(grep(stat.names.patt, colnames(data)),
                                 grep(y.names.patt, colnames(data)))
            if (length(tmp.ind) > 0)
                extravar[,i-1] <- unlist(data[,tmp.ind])
        }
        colnames(extravar) <- stat.names[-1]

    } else if (length(dim(data)) >= 3)
    {
        # Other statistics are in the 3rd dimension of table
        yvar <- as.vector(unlist(data[,y.ind,1]))
        extravar <- apply(data[, y.ind, -1, drop = FALSE], 3, unlist)

    } else # simple case with no other statistics
        yvar <- as.vector(unlist(data[,y.ind]))


    # newdata needs to use data rather than input.data.raw
    # otherwise it will not handle filters etc
    newdata <- data.frame(X = xvar,
                          Y = yvar,
                          Groups = factor(rep(y.names, each = n), levels = y.names),
                          stringsAsFactors = FALSE)

    if (length(extravar) > 0)
        newdata <- cbind(newdata, extravar)
    rownames(newdata) <- if (length(unique(data.row.labels)) <= 1) NULL
                         else                                      MakeUniqueNames(rep(data.row.labels, m))
    if (!grepl("^No date", date.format) && date.format != "Automatic")
    {
        if (IsDateTime(as.character(newdata[,1])))
            newdata[,1] <- format(AsDate(as.character(newdata[,1]),
            us.format = !grepl("International", date.format)), "%b %d %Y")
    }

    # Preserve column names where possible
    if (!is.null(input.data.raw$X))
        colnames(newdata)[1] <- colnames(data)[1]
    else if (!is.null(qst <- attr(data, "questions")))
    {
        colnames(newdata)[1] <- qst[1]
        if (length(qst) >= 2)
            colnames(newdata)[3] <- qst[2]
    }
    if (length(dim(data)) == 3)
        colnames(newdata)[2] <- dimnames(data)[[3]][1]
    else if (!is.null(attr(data, "statistic")))
        colnames(newdata)[2] <- attr(data, "statistic")


    data <- newdata
    attr(data, "scatter.variable.indices") <- c(x = 1, y = 2, sizes = 0, colors = 3, groups = 3)
    return(data)
}

#' @importFrom flipU IsQTable
containsQTable <- function(x)
{
    if (is.data.frame(x)) return(FALSE)
    if (!is.list(x)) return(IsQTable(x))
    any(vapply(x, containsQTable, logical(1L)))
}

#' @importFrom abind abind
addStatTesting <- function(x, x.siginfo, p.cutoffs, colors.pos, colors.neg, colors.on.font, symbol, symbol.size)
{
    arrow.dir <- x.siginfo$significancedirection
    if (all(arrow.dir == "None"))
        return(x)

    arrow.pval <- x.siginfo$pcorrected
    arrow.colors <- rep("", length(arrow.dir))
    ind.pos <- which(arrow.dir == "Up")
    for (ii in ind.pos)
    {
        j <- max(which(arrow.pval[ii] < p.cutoffs))
        arrow.colors[ii] <- colors.pos[j]
    }
    ind.neg <- which(arrow.dir == "Down")
    for (ii in ind.neg)
    {
        j <- max(which(arrow.pval[ii] < p.cutoffs))
        arrow.colors[ii] <- colors.neg[j]
    }

    dn <- dim(x)
    if (is.null(dn)) # vector
    {
        tmp.x <- matrix(x, length(x), 1, dimnames = list(names(x), NULL))

    } else if (length(dn) == 1)
    {
        dn <- c(dn, 1)
        tmp.x <- matrix(x, ncol = 1, dimnames = list(rownames(x), NULL))
    } else if (is.null(attr(x, "statistic", exact = TRUE)) && length(dn) == 2)
    {
        tmp.x <- array(x, c(dn[1], 1, dn[2]))
        dimnames(tmp.x) <- list(dimnames(x)[[1]], NULL, dimnames(x)[[2]])
    } else
        tmp.x <- x

    # Append new cell-statistic and annotation for each differently colored arrow
    # Usually, this is one color each for each direction but there are theoretically 10 levels each
    mat.list <- list(tmp.x)
    annot.list <- list()
    signames <- c()
    k <- 1
    for (tmp.dir in c("Up", "Down"))
    {
        tmp.col <- unique(arrow.colors[which(arrow.dir == tmp.dir)])
        for (cc in tmp.col)
        {
            j <- length(mat.list)
            mat.list[[j+1]] <- matrix(arrow.dir == tmp.dir & arrow.colors == cc,
                nrow=nrow(tmp.x), ncol=ncol(tmp.x), byrow = TRUE)
            tmp.signame <- paste0("signif", tmp.dir, cc)
            signames <- c(signames, tmp.signame)

            # Add annotation for font colors
            if (colors.on.font)
            {
                annot.list[[k]] <- list(type = "Recolor text",
                    data = tmp.signame, threstype = "above threshold", threshold = 0,
                    color = cc)
                k <- k + 1
            }

            # Add annotation for symbol (arrow or caret)
            if (symbol != "None")
            {
                annot.list[[k]] <- list(type = paste(symbol, "-", tolower(tmp.dir)),
                    data = tmp.signame, threstype = "above threshold", threshold = 0,
                    color = cc, size = symbol.size)
                k <- k + 1
            }
        }
    }
    new.dat <- abind(mat.list, along = 3)
    if (length(dim(tmp.x)) == 3)
        dimnames(new.dat)[[3]] <- c(dimnames(tmp.x)[[3]], signames)
    else
        dimnames(new.dat)[[3]] <- c(attr(x, "statistic", exact = TRUE), signames)
    new.dat <- CopyAttributes(new.dat, x)
    attr(new.dat, "statistic") <- NULL
    attr(new.dat, "signif.annotations") <- annot.list
    return(new.dat)
}

updateQStatisticsInfo <- function(x, original.dim.names, original.is.multistat, transpose)
{
    x.siginfo <- attr(x, "QStatisticsTestingInfo")
    if (is.null(x.siginfo))
        return(x)
    original.ndim <- length(original.dim.names)
    if (original.ndim < 2 || (original.ndim == 2 && original.is.multistat))
        original.ncol <- 1
    else
        original.ncol <- length(original.dim.names[[2]])
    if ((length(dim(x)) < 2 || NCOL(x) == 1) && original.ncol == 1)
    {
        curr.names <- if (length(dim(x)) == 0) names(x) else rownames(x)
        ind <- match(curr.names, original.dim.names[[1]])
        attr(x, "QStatisticsTestingInfo") <- x.siginfo[ind,]
        return(x)
    }
    if (transpose)
        original.dim.names <- original.dim.names[2:1]
    rows.changed <- length(dimnames(x)[[1]]) != length(original.dim.names[[1]]) ||
        any(dimnames(x)[[1]] != original.dim.names[[1]])
    cols.changed <- length(dimnames(x)[[2]]) != length(original.dim.names[[2]]) ||
        any(dimnames(x)[[2]] != original.dim.names[[2]])
    if (!rows.changed && !cols.changed)
        return(x)

    x.siginfo <- attr(x, "QStatisticsTestingInfo")
    cind <- if (transpose) 1 else 2
    rind <- if (transpose) 2 else 1
    nc <- length(original.dim.names[[cind]])
    row.ord <- match(dimnames(x)[[1]], original.dim.names[[1]])
    col.ord <- match(dimnames(x)[[2]], original.dim.names[[2]])
    ind2d <- expand.grid(col.ord, row.ord)
    ind <- 1:nrow(ind2d)
    for (ii in 1:length(ind))
        ind[ii] <- (ind2d[ii, cind] - 1) * nc + ind2d[ii, rind]
    attr(x, "QStatisticsTestingInfo") <- x.siginfo[ind,]

    # Remove QStatisticsTestingInfo if any of it is invalid
    # to avoid an exception getting thrown on export
    if (any(!is.finite(attr(x, "QStatisticsTestingInfo")$significancearrowratio)))
        attr(x, "QStatisticsTestingInfo") <- NULL
    return(x)
}

#' @importFrom flipU IsQTable
unclassQTable <- function(data)
{
    if (is.null(data)) return(data)
    if (is.list(data) && !is.data.frame(data))
    {
        original.class <- class(data)
        # Use TRUE to retain attributes and block them being forgotten in the lapply call
        data[TRUE] <- lapply(data, unclassQTable)
        class(data) <- setdiff(original.class, "QTable")
        return(data)
    }
    if (IsQTable(data) && is.null(attr(data, "table.select.subscripted")))
    {
        data <- unclass(data)
        data.attributes <- attributes(data)
        is.subscripted.table <- !is.null(data.attributes[["original.questiontypes"]])
        if (!is.subscripted.table) return(data)
        data.attribute.names <- names(data.attributes)
        attr.to.remove <- qTableAttributesToRemove(data.attribute.names)
        attributes(data)[attr.to.remove] <- NULL
        return(data)
    }
    data
}

#' @importFrom verbs IsQTableAttribute
qTableAttributesToRemove <- function(attr.names)
{
    qtable.attr.names <- eval(formals(IsQTableAttribute)[["qtable.attrs"]])
    qtable.attr.names <- c(qtable.attr.names, paste0("original.", qtable.attr.names))
    attr.names %in% qtable.attr.names & !attr.names %in% c("dim", "dimnames", "names")
}

#' Check the questions and statistics attribute,
#' and the dimnames in the last dimension,
#' to work out if this table is likely to be
#' a raw data table in Q/Displayr
#' @noRd
isRawDataQTable <- function(x) {
    questions <- attr(x, "questions")
    if ("RAW DATA" %in% questions)
        return(TRUE)

    statistic <- attr(x, "statistic")
    if (statistic %in% c("Values", "Labels"))
        return(TRUE)

    dn <- dimnames(x)
    if (is.null(dn))
        return(FALSE)

    last.dn <- dn[length(dn)]
    if (last.dn %in% c("Values", "Labels"))
        return(TRUE)

    return(FALSE)
}
Displayr/flipChart documentation built on Sept. 20, 2024, 10:56 a.m.