#' Parse data from dataEntry R GUI control
#'
#' Takes a raw character matrix returned by dataEntry R GUI control
#' and attempts to parse it to something more friendly such as a
#' numeric matrix.
#' @param raw.matrix Character matrix
#' @param warn Whether to show warnings
#' @param want.data.frame logical; should a \code{data.frame} be returned instead
#' of a matrix or vector? If the input matrix returns a text matrix, this converted
#' to a dataframe
#' @param want.factors logical; should a text variable be converted to a factor?
#' Ignored if \code{want.data.frame} is \code{FALSE}
#' @param want.col.names logical; should the first row be interpretted as column names?
#' Ignored if \code{want.data.frame} is \code{FALSE}
#' @param want.row.names logical; should the first colulm be interpretted as row names?
#' Ignored if \code{want.data.frame} is \code{FALSE}
#' @param us.format logical; should the U.S. convention be used when parsing dates?
#' Ignored if \code{want.data.frame} is \code{FALSE}
#' @return if \code{want.data.frame == FALSE}, numeric vector or matrix, possibly
#' with an attribute \code{"statistic"} if row and column names are present; otherwise,
#' a \code{data.frame}
#' @details First removes empty rows and columns. If every entry of
#' the resulting matrix (or vector) is numeric, a matrix without
#' row or column names is returned. If the matrix has row and
#' column names and a value in the \code{[1,1]} entry, that entry
#' will be returned as an attribute in the output called
#' \code{"statistic"}.
#' @note If characters are present in the entire first row (including
#' the \code{[1,1]} entry), but the first column has all numeric
#' entries aside from that \code{[1,1]} entry, then the resulting matrix
#' will have column names, but no row names. And similarly for the case when
#' the first row is all numeric, but the \code{[1,1]} entry is not, the output
#' matrix will have row names, but no column names.
#' to extract both row and column names from the resulting matrix
#' @importFrom utils tail
#' @export
ParseUserEnteredTable <- function(raw.matrix,
warn = TRUE,
want.data.frame = FALSE,
want.factors = TRUE,
want.col.names = TRUE,
want.row.names = FALSE,
us.format = NULL)
{
if (all(raw.matrix == ""))
stop("no data has been entered")
keep.col.names <- FALSE
keep.row.names <- FALSE
old.col.names <- colnames(raw.matrix)
old.row.names <- rownames(raw.matrix)
if (!is.null(old.col.names) && any(is.na(suppressWarnings(as.numeric(old.col.names))))) {
keep.col.names <- TRUE
want.col.names <- FALSE
}
if (!is.null(old.row.names) && any(is.na(suppressWarnings(as.numeric(old.row.names))))) {
keep.row.names <- TRUE
want.row.names <- FALSE
}
if (keep.row.names || keep.col.names) {
# If table already has dimnames then maybe no parsing is needed
res <- asNumeric(raw.matrix, nrow(raw.matrix), ncol(raw.matrix))
if (is.numeric(res)) {
dimnames(res) <- dimnames(raw.matrix)
return(res)
}
}
m <- removeEmptyRowsAndColumns(raw.matrix, drop = FALSE)
m <- extractTableTitle(m)
m.title <- attr(m, "title")
if (!is.null(m.title))
m <- removeEmptyRowsAndColumns(m, !want.data.frame)
m <- extractRowColumnTitles(m)
row.col.title <- attr(m, "row.column.names")
res <- NULL
if (!isTRUE(want.data.frame)) # including NULL
res <- parseAsVectorOrMatrix(m, FALSE)
# Try parsing as dataframe if output was a character matrix
if (isTRUE(want.data.frame) || is.character(res) && NCOL(res) > 1)
{
if (!is.null(res))
{
want.col.names <- isTRUE(attr(res, "col.names.given"))
want.row.names <- isTRUE(attr(res, "row.names.given"))
}
res <- ParseAsDataFrame(as.matrix(m), warn, want.factors, want.col.names, want.row.names, us.format)
}
attr(res, "row.names.given") <- NULL
attr(res, "col.names.given") <- NULL
attr(res, "row.column.names") <- row.col.title
attr(res, "title") <- m.title
if (keep.col.names)
colnames(res) <- tail(old.col.names, ncol(res))
if (keep.row.names)
rownames(res) <- tail(old.row.names, nrow(res))
return(res)
}
extractTableTitle <- function(x)
{
if (NCOL(x) == 1)
return(x)
# We require that titles are in the first cell of the first row which is otherwise empty
# Note that entries in the 1-2 position are assumed to be column titles
entries.in.first.row <- which(nchar(x[1,]) > 0)
if (length(entries.in.first.row) == 1 && entries.in.first.row == 1)
{
title <- x[1,entries.in.first.row]
x <- x[-1, , drop = FALSE]
attr(x, "title") <- title
return(x)
}
return(x)
}
#' Check For Titles in a User-Entered Table
#'
#' Searches the first row and column
#' in a character matrix to see if they each have a single
#' non-empty entry, and if so returns them in a vector
#' @return matrix with row and column title (if found) moved
#' into the "row.column.names" attribute
#' @noRd
#' @keywords internal
extractRowColumnTitles <- function(m)
{
if (NROW(m) <= 2 || NCOL(m) <= 2 || m[1, 1] != "")
return(m)
row.idx <- m[, 1] != ""
col.idx <- m[1, ] != ""
if (sum(row.idx) != 1L && sum(col.idx) != 1L)
return(m)
row.title <- if (sum(row.idx) == 1L) m[row.idx, 1L] else ""
col.title <- if (sum(col.idx) == 1L) m[1L, col.idx] else ""
m <- m[(1+(sum(col.idx)==1L)):nrow(m), (1+(sum(row.idx)==1L)):ncol(m), drop = FALSE]
attr(m, "row.column.names") <- c(row.title, col.title)
return(m)
}
#' Convert a user-entered/pasted table to a Numeric Matrix
#'
#' Process a user pasted table when the user has not specified
#' that the data is raw data
#' @noRd
#' @keywords internal
#' @importFrom flipTime IsDateTime
#' @importFrom flipU TrimWhitespace
parseAsVectorOrMatrix <- function(m, warn = FALSE)
{
n.row <- NROW(m)
n.col <- NCOL(m)
if (n.row == 1 || n.col == 1)
{
dim.given <- if (n.row == 1) "row.names.given"
else "col.names.given"
vm <- drop(m)
first.entry.chars <- !isTextNumeric(vm[1], allow.missing = TRUE)
if (!first.entry.chars || n.row == n.col) ## unnamed row or column vector
return(asNumeric(m, warn = warn, NROW(m), NCOL(m)))
# Check if entries are dates - note we retain them as characters
# But we want to know if there is a column heading
out <- asNumeric(vm[-1], drop = TRUE, warn = warn)
if (!is.numeric(out))
{
if (IsDateTime(vm[1]) == IsDateTime(vm[-1]))
return(m)
else
{
out <- vm[-1]
attr(out, "name") <- vm[1]
attr(out, dim.given) <- TRUE
return(out)
}
}
attr(out, "name") <- vm[1]
attr(out, dim.given) <- TRUE
return(out)
}
data.attribute <- NULL
row.names.given <- isPossibleLabel(m[-1,1])
col.names.given <- isPossibleLabel(m[1,-1])
# If input data is text, we assume there no row labels unless statistic is supplied
if (any(!isTextNumeric(m[-1,-1], allow.missing = TRUE)))
row.names.given <- isQStatistic(m[1,1])
# Handling ambiguous row/column labels
if (is.na(row.names.given) || is.na(col.names.given))
{
# If input data are percentages then we can do more tests
.is_pct <- function(x) { tmp <- asNumericVector(x); return(isTRUE(grepl("%", attr(tmp, "statistic")))) }
if (.is_pct(m[-1,-1]))
{
if (is.na(row.names.given))
row.names.given <- !.is_pct(m[-1,1])
if (is.na(col.names.given))
col.names.given <- !.is_pct(m[1,-1])
}
# if either is not a label then the 1,1 position cannot be a statistic
else if (isTRUE(!row.names.given))
col.names.given <- isTRUE(isPossibleLabel(m[1,]))
else if (isTRUE(!col.names.given))
row.names.given <- isTRUE(isPossibleLabel(m[,1]))
else
{
# We only need to check whether the 1,1 entry is a statistic
# if BOTH row and column labels might be present
if (isQStatistic(m[1,1]))
{
row.names.given <- TRUE
col.names.given <- TRUE
}
else if (isTRUE(row.names.given))
col.names.given <- FALSE
else if (isTRUE(col.names.given))
row.names.given <- FALSE
else
{
if (nchar(m[1,1]) > 0 && !isTextNumeric(m[1,1]))
{
# at least one of them is a label
# but which one (or both) is unclear
row.names.given <- FALSE
col.names.given <- TRUE
}
else
{
row.names.given <- FALSE
col.names.given <- FALSE
}
}
}
}
if (row.names.given && col.names.given)
{
out <- asNumeric(m[2:n.row, 2:n.col, drop = FALSE], n.row-1, n.col-1)
if (nchar(m[1,1]) > 0)
data.attribute <- m[1, 1]
rownames(out) <- TrimWhitespace(m[-1, 1])
colnames(out) <- TrimWhitespace(m[1, -1])
} else if (row.names.given)
{
out <- asNumeric(m[, 2:n.col, drop = FALSE], n.row, n.col - 1)
rownames(out) <- TrimWhitespace(m[, 1])
} else if (col.names.given)
{
out <- asNumeric(m[2:n.row, , drop = FALSE], n.row - 1, n.col)
colnames(out) <- m[1, ]
}
else
out <- asNumeric(m, n.row, n.col)
if (!is.null(data.attribute))
attr(out, "statistic") <- data.attribute
if (warn && is.character(out))
warning("The entered data could not be interpreted.")
# Save state of row/column names for ParseAsDataFrame
attr(out, "row.names.given") <- row.names.given
attr(out, "col.names.given") <- col.names.given
# Vectors with attributes cannot be printed because of RS-3402
# This is now fixed in Q 5.2.7+, but we retain support for older versions
# by converting to a matrix if necessary
if (!is.null(attr(out, "statistic")) && (is.null(dim(out)) || length(dim(out)) == 1))
{
tmp <- attr(out, "statistic")
out <- as.matrix(out)
attr(out, "statistic") <- tmp
}
out
}
isQStatistic <- function(x)
{
statistic.list <- c("", "%", "% Column Responses", "% Column Share",
"% Excluding NaN", "% Responses", "% Row Responses", "% Row Share",
" %Share", "% Total Responses", "% Total Share", "5th Percentile",
"25th Percentile", "75th Percentile", "95th Percentile",
"Average", "Base n", "Base Population", "Coefficient", "Column %",
"Column Comparisons", "Column n", "Column Names",
"Column Population", "Columns Compared", "Column Standard Error",
"Column Standard Error of Mean", "Corrected p", "Correlation",
"Cumulative %", "d.f", "Effective n", "Effective Base n",
"Expected Average", "Expected %", "Expected Correlation",
"Expected n", "Index", "Interquartile Range", "Labels",
"Lower Confidence Interval", "Lower Confidence Interval %",
"Maximum", "Median", "Minimum", "Missing n", "Mode",
"Multiple Comparison Adjustment", "n", "n Observations",
"Not duplicate", "Observation", "p", "Population", "Probability %",
"Range", "Residual", "Residual %", "Row %", "Row Population",
"Row n", "Standard Deviation", "Standard Error", "Sum", "Text",
"Text With No Blanks", "Total %", "Trimmed Average", "t-Statistic",
"Unique Text", "Upper Confidence Interval",
"Upper Confident Interval %", "Values", "z-Statistic")
return(x %in% statistic.list)
}
# Returns a logical indicating whether the vector of text might be a label
# Returns NA if they are all whole integers or can be parsed as a date
isPossibleLabel <- function(t)
{
v <- asNumericVector(t)
has.numeric.and.missing <- any(isMissing(t)) && any(!is.na(v)) && all(!is.na(v) | isMissing(t))
if (has.numeric.and.missing) # all missing could potentially be a valid label
return(FALSE)
if (any(is.na(v)))
return(TRUE)
# Additional checks to see if the numeric text can also be read as a label
if (any(nchar(t) == 0))
return(FALSE)
if (IsDateTime(t))
return (NA)
if (any(v != as.integer(v)))
return (FALSE)
return(NA)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.