#' @export
#' @title Median polish of two-way tables
#'
#' @description \code{eda_pol} Polishes two-way tables using median, means, or
#' any customizable functions.
#'
#' @param x A three column data frame.
#' @param row Name of column assigned to the row effect.
#' @param col Name of column assigned to the column effect.
#' @param val Name of column assigned to the response variable.
#' @param stat Polishing statistic (default is median).
#' @param plot Boolean determining if an output plot should be generated.
#' @param eps Convergence tolerance parameter.
#' @param maxiter Maximum number of iterations.
#' @param sort Boolean determining if the effects row/columns should be sorted.
#' @param p Re-expression power parameter.
#' @param tukey Boolean determining if Tukey's power transformation should used.
#' If FALSE, the Box-Cox transformation is adopted.
#' @param offset Offset to add to values if at leat one value is 0 and the power
#' is negative.
#' @param col.quant Boolean determining if a quantile classification scheme
#' should be used.
#' @param colpal Color palette to adopt.
#' @param adj.mar Boolean determining if margin width needs to accomodate labels.
#' @param res.size Size of residual values in plot \[0-1\].
#' @param row.size Size of row effect values in plot \[0-1\].
#' @param col.size Size of column effect values in plot \[0-1\].
#' @param res.txt Boolean determining if values should be added to plot.
#' @param label.txt Boolean determining if margin and column labels should be
#' plotted.
#'
#' @returns A list of class \code{eda_polish} with the following named
#' components:
#'
#' \itemize{
#' \item \code{long} The median polish residuals with three columns: Column levels,
#' row levels and residual values.
#' \item \code{wide} The median polish residuals table in wide form.
#' \item \code{row} Row effects table
#' \item \code{col} Column effects table
#' \item \code{global} Overall value (common value)
#' \item \code{iter} Number of iterations before polish stabilizes.
#' \item \code{cv} Table of residuals, row effects, column effects and CV values in long form.
#' \item \code{power} Transformation power applied to values prior to polishing.
#' \item \code{IQ_row} Ratio between interquartile row effect values and 80th quantile of residuals.
#' \item \code{IQ_col} Ratio between interquartile column effect values and 80th quantile of residuals.}
#'
#' @details The function performs a polish on a two way table. By default, it
#' applies a median polish, but other statistical summaries such
#' as the mean can be passed to the function via the \code{stat = } argument.
#' The function returns a list of row/column effects along with global and
#' residual values. It will also generate a colored table if \code{plot =
#' TRUE}.
#'
#' @references
#'
#' \itemize{
#' \item Tukey, John W. 1977. Exploratory Data Analysis. Addison-Wesley
#' \item \href{../articles/polish.html}{Median polish article}}
#'
#' @examples
#' df <- data.frame(region = rep( c("NE", "NC", "S", "W"), each = 5),
#' edu = rep( c("ed8", "ed9to11", "ed12", "ed13to15", "ed16"), 4),
#' perc = c(25.3, 25.3, 18.2, 18.3, 16.3, 32.1, 29, 18.8,
#' 24.3, 19, 38.8, 31, 19.3, 15.7, 16.8, 25.4, 21.1, 20.3, 24, 17.5))
#'
#' M <- eda_pol(df, row = region, col = edu, val = perc, plot = FALSE)
#' plot(M)
eda_pol <- function (x, row= NULL, col = NULL, val = NULL, stat = median, plot = TRUE,
eps = 0.01, maxiter = 5, sort = FALSE, p = 1, tukey = FALSE,
offset = 0.00001, col.quant = FALSE, colpal = "RdYlBu",
adj.mar = TRUE, res.size = 1, row.size = 1, col.size = 1,
res.txt = TRUE, label.txt = TRUE){
if(!missing(x))
{
row <- deparse(substitute(row))
col <- deparse(substitute(col))
val <- deparse(substitute(val))
}
if(is.data.frame(x)){
# Check that table is properly formatted ----
if(dim(x)[2] < 3) stop("The data frame must be in long form with at least 3 columns.")
if(is.null(row)) stop("You need to define the row name column.")
if(is.null(col)) stop("You need to define the column name column.")
if(is.null(val)) stop("You need to define the response values column.")
} else {
stop("Table must be a three column dataframe.")
}
x2 <- data.frame(x[[col]], x[[row]], x[[val]])
colnames(x2) <- c(col, row, val)
if( (any(x2[[val]] < 0)) & (p < 1 )) {
stop(paste("You are applying a power of ",p," to negative values."))
}
if( (any(x2[[val]] == 0)) & (p < 0 )) {
x2[[val]] <- x2[[val]] + offset
print("Warning: zeros were found in the data. Given a negative power, an offset was added")
print("This may lead to large residuals")
}
# Check re-expression is desired
if(p !=1){
x2[[val]] <- eda_re(x2[[val]], p = p, tukey = tukey)
}
# Get values column ----
# val <- colnames(x)[-which(colnames(x) %in% c(row, col))]
# val <- substitute(val)
# row.eff <- aggregate(val ~ eval(row), data = x2, FUN = \(x) 0)
row.eff <- aggregate(x2[[val]] ~ x2[[row]], FUN = \(x) 0)
col.eff <- aggregate(x2[[val]] ~ x2[[col]], FUN = \(x) 0)
colnames(row.eff) <- c(row, val)
colnames(col.eff) <- c(col, val)
# Get initial global effect and residuals ----
# If maxiter == 0, output original data table
if(maxiter == 0) {
global <- 0
row.eff[[val]] <- 0
col.eff[[val]] <- 0
x2 <- as.data.frame(x2)
i <- 0
} else {
global <- stat(x2[[val]])
x2[[val]] <- x2[[val]] - global
res.sum1 <- 0
# Proceed with the polish routine ----
for (i in seq_len(maxiter)){
row.pol <- aggregate(x2[[val]] ~ x2[[row]], FUN = stat)
colnames(row.pol) <- c(row, val)
row.eff[[val]] <- row.eff[[val]] + row.pol[[val]]
global <- global + stat(col.eff[[val]])
col.eff[[val]] <- col.eff[[val]] - stat(col.eff[[val]])
x2 <- merge(x2, row.pol, by = row, suffixes = c("",".fun"))
x2[[val]] <- x2[[val]] - x2[[paste0(val,".fun")]]
x2[[paste0(val,".fun")]] <- NULL
col.pol <- aggregate(x2[[val]] ~ x2[[col]], FUN = stat)
colnames(col.pol) <- c(col, val)
col.eff[[val]] <- col.eff[[val]] + col.pol[[val]]
global <- global + stat(row.eff[[val]])
row.eff[[val]] <- row.eff[[val]] - stat(row.eff[[val]])
x2 <- merge(x2, col.pol, by = col, suffixes = c("",".fun"))
x2[[val]] <- x2[[val]] - x2[[paste0(val,".fun")]]
x2[[paste0(val,".fun")]] <- NULL
res.sum2 <- sum(abs(x2[[val]]))
if(res.sum2 == 0 | abs(res.sum2 - res.sum1) < eps){
break
} else{
res.sum1 <- res.sum2
}
}
}
# Create wide version of table ----
x2w <- reshape(x2, direction = "wide",
idvar = row, timevar = col, sep = "##")
colnames(x2w) <- gsub( ".*##", "", colnames(x2w))
rownames(x2w) <- as.character(x2w[[row]])
x2w <- x2w[ , -1]
x2w <- x2w[ as.character(row.eff[[row]]), ] # Sort row before binding
x2w <- cbind(eff = row.eff[[val]], x2w)
x2w <- x2w[ , c("eff", as.character(col.eff[[col]]))] # Sort column before binding
x2w <- rbind( eff = c(global, col.eff[[val]]), x2w)
# Respect column levels (if present)
if(!is.null(levels(x[[col]])) & sort == FALSE) {
x2w <- x2w[ , c(colnames(x2w[[col]]), c("eff", levels(x[[col]]))) ]
x2[[col]] <- factor(x2[[col]], levels = levels(x[[col]]) )
}else if (sort == TRUE){
col.lvl <- levels(reorder(col.eff[,1],col.eff[,2]))
x2[[col]] <- factor(x2[[col]], levels = col.lvl )
x2w <- x2w[ , c(colnames(x2w[[col]]), c("eff", col.lvl)) ]
}
# Respect row levels (if present)
if(!is.null(levels(x[[row]])) & sort == FALSE) {
x2w <- x2w[c("eff", levels(x[[row]])), ]
x2[[row]] <- factor(x2[[row]], levels = levels(x[[row]]) )
}else if (sort == TRUE){
row.lvl <- levels(reorder(row.eff[,1],row.eff[,2]))
x2[[row]] <- factor(x2[[row]], levels = row.lvl )
x2w <- x2w[c("eff", row.lvl), ]
}
# Create output list ----
names(row.eff)[2] <- "effect"
names(col.eff)[2] <- "effect"
# Compute comparison value
cv <- merge(x2, row.eff)
names(cv)[4] <- paste0(row,".eff")
cv <- merge(cv, col.eff)
names(cv)[5] <- paste0(col,".eff")
cv = cv[ ,-(1:2)]
cv$cv <- cv[,2] * cv[,3] / global
# Compute IQRoQ
IQRrow <- IQR(row.eff$effect)
IQRcol <- IQR(col.eff$effect)
resQ <- as.vector(quantile(abs(x2[[val]]), 0.8))
IQ_row <- IQRrow / resQ
IQ_col <- IQRcol / resQ
# Generate list
out <- list(long = x2, wide = x2w, row = row.eff, col = col.eff,
global = global, iter = i, cv = cv,
power = p, IQ_row = IQ_row, IQ_col=IQ_col)
class(out) <- "eda_polish"
# Generate plot ----
if(plot == TRUE){
plot.eda_polish(x=out, type = "residuals", k = 1,
col.quant = col.quant, colpal = colpal, adj.mar = adj.mar,
res.size = res.size, row.size = row.size, col.size = col.size,
res.txt = res.txt, label.txt = label.txt)
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.