crosstab <- function (..., dec.places = NULL,
type = NULL,
style = "wide",
row.vars = NULL,
col.vars = NULL,
percentages = TRUE,
addmargins = TRUE,
subtotals=TRUE)
# http://rstudio-pubs-static.s3.amazonaws.com/6975_c4943349b6174f448104a5513fed59a9.html
###################################################################################
# #
# Function created by Dr Paul Williamson, Dept. of Geography and Planning, #
# School of Environmental Sciences, University of Liverpool, UK. #
# #
# Adapted from the function ctab() in the catspec packge. #
# #
# Version: 12th July 2013 #
# #
# Output best viewed using the companion function print.crosstab() #
# #
###################################################################################
#Declare function used to convert frequency counts into relevant type of proportion or percentage
{
mk.pcnt.tbl <- function(tbl, type) {
a <- length(row.vars)
b <- length(col.vars)
mrgn <- switch(type, column.pct = c(row.vars[-a], col.vars),
row.pct = c(row.vars, col.vars[-b]),
joint.pct = c(row.vars[-a], col.vars[-b]),
total.pct = NULL)
tbl <- prop.table(tbl, mrgn)
if (percentages) {
tbl <- tbl * 100
}
tbl
}
#Find no. of vars (all; row; col) for use in subsequent code
n.row.vars <- length(row.vars)
n.col.vars <- length(col.vars)
n.vars <- n.row.vars + n.col.vars
#Check to make sure all user-supplied arguments have valid values
stopifnot(as.integer(dec.places) == dec.places, dec.places > -1)
#type: see next section of code
stopifnot(is.character(style))
stopifnot(is.logical(percentages))
stopifnot(is.logical(addmargins))
stopifnot(is.logical(subtotals))
stopifnot(n.vars>=1)
#Convert supplied table type(s) into full text string (e.g. "f" becomes "frequency")
#If invalid type supplied, failed match gives user automatic error message
types <- NULL
choices <- c("frequency", "row.pct", "column.pct", "joint.pct", "total.pct")
for (tp in type) types <- c(types, match.arg(tp, choices))
type <- types
#If no type supplied, default to 'frequency + total' for univariate tables and to
#'frequency' for multi-dimenstional tables
#For univariate table....
if (n.vars == 1) {
if (is.null(type)) {
# default = freq count + total.pct
type <- c("frequency", "total.pct")
#row.vars <- 1
} else {
#and any requests for row / col / joint.pct must be changed into requests for 'total.pct'
type <- ifelse(type == "frequency", "frequency", "total.pct")
}
#For multivariate tables...
} else if (is.null(type)) {
# default = frequency count
type <- "frequency"
}
#Check for integrity of requested analysis and adjust values of function arguments as required
if ((addmargins==FALSE) & (subtotals==FALSE)) {
warning("WARNING: Request to suppress subtotals (subtotals=FALSE) ignored because no margins requested (addmargins=FALSE)")
subtotals <- TRUE
}
if ((n.vars>1) & (length(type)>1) & (addmargins==TRUE)) {
warning("WARNING: Only row totals added when more than one table type requested")
#Code lower down selecting type of margin implements this...
}
if ((length(type)>1) & (subtotals==FALSE)) {
warning("WARNING: Can only request supply one table type if requesting suppression of subtotals; suppression of subtotals not executed")
subtotals <- TRUE
}
if ((length(type)==1) & (subtotals==FALSE)) {
choices <- c("frequency", "row.pct", "column.pct", "joint.pct", "total.pct")
tp <- match.arg(type, choices)
if (tp %in% c("row.pct","column.pct","joint.pct")) {
warning("WARNING: subtotals can only be suppressed for tables of type 'frequency' or 'total.pct'")
subtotals<- TRUE
}
}
if ((n.vars > 2) & (n.col.vars>1) & (subtotals==FALSE))
warning("WARNING: suppression of subtotals assumes only 1 col var; table flattened accordingly")
if ( (subtotals==FALSE) & (n.vars>2) ) {
#If subtotals not required AND total table vars > 2
#Reassign all but last col.var as row vars
#[because, for simplicity, crosstabs assumes removal of subtotals uses tables with only ONE col var]
#N.B. Subtotals only present in tables with > 2 cross-classified vars...
if (length(col.vars)>1) {
row.vars <- c(row.vars,col.vars[-length(col.vars)])
col.vars <- col.vars[length(col.vars)]
n.row.vars <- length(row.vars)
n.col.vars <- 1
}
}
#If dec.places not set by user, set to 2 unlesss only one table of type frequency requested,
#in which case set to 0. [Leaves user with possibility of having frequency tables with > 0 dp]
if (is.null(dec.places)) {
if ((length(type)==1) & (type[1]=="frequency")) {
dec.places <- 0
} else {
dec.places <-2
}
}
#Take the original input data, whatever form originally supplied in,
#convert into table format using requested row and col vars, and save as 'tbl'
args <- list(...)
if (length(args) > 1) {
if (!all(sapply(args, is.factor)))
stop("If more than one argument is passed then all must be factors")
tbl <- table(...)
}
else {
if (is.factor(...)) {
tbl <- table(...)
}
else if (is.table(...)) {
tbl <- eval(...)
}
else if (is.data.frame(...)) {
#tbl <- table(...)
if (is.null(row.vars) && is.null(col.vars)) {
tbl <- table(...)
}
else {
var.names <- c(row.vars,col.vars)
A <- (...)
tbl <- table(A[var.names])
if(length(var.names==1)) names(dimnames(tbl)) <- var.names
#[table() only autocompletes dimnames for multivariate crosstabs of dataframes]
}
}
else if (class(...) == "ftable") {
tbl <- eval(...)
if (is.null(row.vars) && is.null(col.vars)) {
row.vars <- names(attr(tbl, "row.vars"))
col.vars <- names(attr(tbl, "col.vars"))
}
tbl <- as.table(tbl)
}
else if (class(...) == "ctab") {
tbl <- eval(...)
if (is.null(row.vars) && is.null(col.vars)) {
row.vars <- tbl$row.vars
col.vars <- tbl$col.vars
}
for (opt in c("dec.places", "type", "style", "percentages",
"addmargins", "subtotals")) if (is.null(get(opt)))
assign(opt, eval(parse(text = paste("tbl$", opt,
sep = ""))))
tbl <- tbl$table
}
else {
stop("first argument must be either factors or a table object")
}
}
#Convert supplied table style into full text string (e.g. "l" becomes "long")
style <- match.arg(style, c("long", "wide"))
#Extract row and col names to be used in creating 'tbl' from supplied input data
nms <- names(dimnames(tbl))
z <- length(nms)
if (!is.null(row.vars) && !is.numeric(row.vars)) {
row.vars <- order(match(nms, row.vars), na.last = NA)
}
if (!is.null(col.vars) && !is.numeric(col.vars)) {
col.vars <- order(match(nms, col.vars), na.last = NA)
}
if (!is.null(row.vars) && is.null(col.vars)) {
col.vars <- (1:z)[-row.vars]
}
if (!is.null(col.vars) && is.null(row.vars)) {
row.vars <- (1:z)[-col.vars]
}
if (is.null(row.vars) && is.null(col.vars)) {
col.vars <- z
row.vars <- (1:z)[-col.vars]
}
#Take the original input data, converted into table format using supplied row and col vars (tbl)
#and create a second version (crosstab) which stores results as percentages if a percentage table type is requested.
if (type[1] == "frequency")
crosstab <- tbl
else
crosstab <- mk.pcnt.tbl(tbl, type[1])
#If multiple table types requested, create and add these to
if (length(type) > 1) {
tbldat <- as.data.frame.table(crosstab)
z <- length(names(tbldat)) + 1
tbldat[z] <- 1
pcntlab <- type
pcntlab[match("frequency", type)] <- "Count"
pcntlab[match("row.pct", type)] <- "Row %"
pcntlab[match("column.pct", type)] <- "Column %"
pcntlab[match("joint.pct", type)] <- "Joint %"
pcntlab[match("total.pct", type)] <- "Total %"
for (i in 2:length(type)) {
if (type[i] == "frequency")
crosstab <- tbl
else crosstab <- mk.pcnt.tbl(tbl, type[i])
crosstab <- as.data.frame.table(crosstab)
crosstab[z] <- i
tbldat <- rbind(tbldat, crosstab)
}
tbldat[[z]] <- as.factor(tbldat[[z]])
levels(tbldat[[z]]) <- pcntlab
crosstab <- xtabs(Freq ~ ., data = tbldat)
names(dimnames(crosstab))[z - 1] <- ""
}
#Add margins if required, adding only those margins appropriate to user request
if (addmargins==TRUE) {
vars <- c(row.vars,col.vars)
if (length(type)==1) {
if (type=="row.pct")
{ crosstab <- addmargins(crosstab,margin=c(vars[n.vars]))
tbl <- addmargins(tbl,margin=c(vars[n.vars]))
}
else
{ if (type=="column.pct")
{ crosstab <- addmargins(crosstab,margin=c(vars[n.row.vars]))
tbl <- addmargins(tbl,margin=c(vars[n.row.vars]))
}
else
{ if (type=="joint.pct")
{ crosstab <- addmargins(crosstab,margin=c(vars[(n.row.vars)],vars[n.vars]))
tbl <- addmargins(tbl,margin=c(vars[(n.row.vars)],vars[n.vars]))
}
else #must be total.pct OR frequency
{ crosstab <- addmargins(crosstab)
tbl <- addmargins(tbl)
}
}
}
}
#If more than one table type requested, only adding row totals makes any sense...
if (length(type)>1) {
crosstab <- addmargins(crosstab,margin=c(vars[n.vars]))
tbl <- addmargins(tbl,margin=c(vars[n.vars]))
}
}
#If subtotals not required, and total vars > 2, create dataframe version of table, with relevent
#subtotal rows / cols dropped [Subtotals only present in tables with > 2 cross-classified vars]
t1 <- NULL
if ( (subtotals==FALSE) & (n.vars>2) ) {
#Create version of crosstab in ftable format
t1 <- crosstab
t1 <- ftable(t1,row.vars=row.vars,col.vars=col.vars)
#Convert to a dataframe
t1 <- as.data.frame(format(t1),stringsAsFactors=FALSE)
#Remove backslashes from category names AND colnames
t1 <- apply(t1[,],2, function(x) gsub("\"","",x))
#Remove preceding and trailing spaces from category names to enable accurate capture of 'sum' rows/cols
#[Use of grep might extrac category labels with 'sum' as part of a longer one or two word string...]
t1 <- apply(t1,2,function(x) gsub("[[:space:]]*$","",gsub("^[[:space:]]*","",x)))
#Reshape dataframe to that variable and category labels display as required
#(a) Move col category names down one row; and move col variable name one column to right
t1[2,(n.row.vars+1):ncol(t1)] <- t1[1,(n.row.vars+1):ncol(t1)]
t1[1,] <- ""
t1[1,(n.row.vars+2)] <- t1[2,(n.row.vars+1)]
#(b) Drop the now redundant column separating the row.var labels from the table data + col.var labels
t1 <- t1[,-(n.row.vars+1)]
#In 'lab', assign category labels for each variable to all rows (to allow identification of sub-totals)
lab <- t1[,1:n.row.vars]
for (c in 1:n.row.vars) {
for (r in 2:nrow(lab)) {
if (lab[r,c]=="") lab[r,c] <- lab[r-1,c]
}
}
lab <- (apply(lab[,1:n.row.vars],2,function(x) x=="Sum"))
lab <- apply(lab,1,sum)
#Filter out rows of dataframe containing subtotals
t1 <- t1[((lab==0) | (lab==n.row.vars)),]
#Move the 'Sum' label associated with last row to the first column; in the process
#setting the final row labels associated with other row variables to ""
t1[nrow(t1),1] <- "Sum"
t1[nrow(t1),(2:n.row.vars)] <- ""
#set row and column names to NULL
rownames(t1) <- NULL
colnames(t1) <- NULL
}
#Create output object 'result' [class: crosstab]
result <- NULL
#(a) record of argument values used to produce tabular output
result$row.vars <- row.vars
result$col.vars <- col.vars
result$dec.places <- dec.places
result$type <- type
result$style <- style
result$percentages <- percentages
result$addmargins <- addmargins
result$subtotals <- subtotals
#(b) tabular output [3 variants]
result$table <- tbl #Stores original cross-tab frequency counts without margins [class: table]
result$crosstab <- crosstab #Stores cross-tab in table format using requested style(frequency/pct) and table margins (on/off)
#[class: table]
result$crosstab.nosub <- t1 #crosstab with subtotals suppressed [class: dataframe; or NULL if no subtotals suppressed]
class(result) <- "crosstab"
#Return 'result' as output of function
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.