R/plotluck.R
In plotluck: 'ggplot2' Version of "I'm Feeling Lucky!"

#' @import ggplot2
#' @importFrom plyr ddply summarise .
#' @importFrom stats as.formula median quantile runif xtabs
#' @importFrom grDevices colorRampPalette hcl
#' @importFrom stats na.omit na.pass
#'
safe_log <- function(x,...) {
  ifelse(x<=0,0,log(x,...))
}

# calculate the floor or ceiling of the exponent of a number
# note: for log-modulus (offset=1), sign of returned value signifies sign of
#   input value x; therefore, different exponents cannot be represented for |x| < 1.
# note: for log-modulus, in order to distinguish between -10^0, 0, and 10^0,
#   we encode -10^x=-x, -10=-1, -1=0, 0=1, 1=2, 10^x=x+2, ..

encode.int.power <- function(x, base=10, floor=TRUE, offset=0) {

  # if floor=FALSE, compute ceiling
  # -1 * floor(-1*x) = ceiling(x)
  floor.factor <- ifelse(floor, 1, -1)

  if (offset == 0) {
    # regular log transform
    y <- floor.factor * floor(floor.factor * safe_log(x, base))
  } else {
    # log-modulus transform
    # no fractional values!
    x <- floor.factor * floor(floor.factor * x)

    y <- floor.factor * floor(floor.factor * sign(x) * safe_log(abs(x), base))

    # encoding as described in note above
    y <- y + ifelse(x >= 1, 2, ifelse(x >= 0, 1, 0))
  }
  y
}


decode.int.power <- function(x, base=10, offset=0) {
  if (offset == 0) {
    # regular log transform
    base^x
  } else {
    # log-modulus transform
    ifelse(x <= 0, - base^abs(x),
           ifelse(x == 1, 0,
                  base^abs(x-2)))
  }
}


# calculate breaks for log/log-modulus scale
log_mod_breaks <- function(n=5, base=10, offset=0)
{
  function(x) {

    rng <- range(x, na.rm=TRUE)

    if (any(is.infinite(rng))) {
      # Note: known issue with log scaling
      # https://github.com/hadley/ggplot2/issues/930
      # seems to be happening mostly with scatter plot, density/hex less affected
      warning('Skipping tick mark computation due to existing ggplot issue.
                 Try using hex or density instead of scatter plot or histogram.')
      return(1)
    }

    min.pos <- encode.int.power(rng[1], base=base, floor=TRUE,  offset=offset)
    max.pos <- encode.int.power(rng[2], base=base, floor=FALSE, offset=offset)

    # the range of powers that the values span
    total  <- abs(max.pos - min.pos)

    if (total + 1 <= n/2) {
      # less than half of requested ticks:
      # introduce subdivisions by lowering base
      sub <- floor(n/(total + 1))
      base <- base^(1/sub)
      min.pos <- encode.int.power(rng[1], base=base, floor=TRUE,  offset=offset)
      max.pos <- encode.int.power(rng[2], base=base, floor=FALSE, offset=offset)
      by <- 1
    } else {
      # enough or more dynamic range than breaks requested
      by <- max(1, floor(total/(n-1)))
    }

    sapply(seq(min.pos, max.pos, by),
           function(x) decode.int.power(x,base=base, offset=offset))
  }
}


# log (offset=0) or log-modulus (offset=1) scale
# John & Draper 1980; see e.g. http://blogs.sas.com/content/iml/2014/07/14/log-transformation-of-pos-neg/
log_mod_trans <- function(base = exp(1), offset=0)
{
  eps<-1e-100
  if (offset == 0) {
    # regular log transform
    trans <- function(x) log(x, base)
    inv   <- function(x) base^x
    domain = c(eps, Inf)
  } else {
    # log-modulus transform
    trans <- function(x) sign(x) * log(abs(x) + offset, base)
    inv   <- function(x) sign(x) * (base^abs(x) - offset)
    domain = c(-Inf, Inf)
  }
  scales::trans_new(paste0("log-mod-", format(base)),
                    trans,
                    inv,
                    log_mod_breaks(base=base, offset=offset),
                    domain=domain)
}


# calculate axis scale transform based on value distribution
# either:
# - identity
# - log-10
# - log-10-modulus: sign(x) * log(1+|x|)
# John & Draper 1980; see http://blogs.sas.com/content/iml/2014/07/14/log-transformation-of-pos-neg/

get.trans.fun <- function(data, x, expansion.thresh=2, label=x, verbose=FALSE) {

  if (!is.numeric(data[[x]]) ||
      length(unique(data[[x]])) <= 3) {
    return(list(scales::identity_trans(), label))
  }

  q <- quantile(data[[x]], c(0, 0.25, 0.5, 0.75, 1), na.rm=TRUE)

  # heuristic:
  # - compute the ratio of the plotting range occupied by the
  #   'core' part of the distribution between the lower and upper
  #   quartile
  # - if range is positive only, candidate transform is log
  # - if range includes negative values, candidate transform is log-modulus
  # - compute this ratio for the case that transform is applied
  # - decide for transform is this ratio is greater than expansion.thresh

  range.core     <- c(q[4], q[2])
  range.complete <- c(q[5], q[1])
  ratio.before   <- diff(range.core) / diff(range.complete)

  sgn <- ifelse(q[5] <= 0,
                -1,
                ifelse(q[1] > 0,
                       1,
                       0))

  # offset == 1 is interpreted as using log-modulus, instead of log
  if (sgn == 1) {
    offset <- 0
  } else {
    offset <- 1
  }

  fun.trans <- log_mod_trans(base=10, offset=offset)

  if (ratio.before == 0) {
     ratio.after <- 1
  } else {
     range.core.after     <- fun.trans$trans(range.core)
     range.complete.after <- fun.trans$trans(range.complete)
     ratio.after          <- diff(range.core.after) / diff(range.complete.after)
  }
  if (verbose) {
    if (ratio.after <= expansion.thresh * ratio.before) {
      cat(sprintf('Not applying logarithmic axis scaling for %s; expansion ratio is %f, trans.log.thresh = %f\n',
                  x, ratio.after/ratio.before, expansion.thresh))
    } else {
      cat(sprintf('Applying logarithmic axis scaling for %s; expansion ratio is %f, trans.log.thresh = %f\n',
                  x, ratio.after/ratio.before, expansion.thresh))
    }
  }
  if (ratio.after > expansion.thresh * ratio.before) {
    list(fun.trans,  sprintf('%s (log scale)', label))
  } else {
    list(scales::identity_trans(), label)
  }
}


Mode <- function(x) {
  ux <- unique(x)
  # if not returning a data frame, ddply will create an integer result column
  data.frame(ux[which.max(tabulate(match(x, ux)))])
}

weighted.mode <- function(data, x, w) {
  wtd <- plyr::ddply(data, x, function(d) sum(d[[w]], na.rm=TRUE))
  idx <- which.max(wtd[[2]])
  # need to rename to make later merge possible
  names(wtd)[1] <- '.wtd.mode.'
  wtd[idx, 1, drop=FALSE]
}

# calculate measure of central tendency per group.
# for numeric data[[x]], apply (weighted or unweighted) method as specified.
# for ordered factors, compute the (weighted or unweighted) mean or median over
#    the sorted integer levels, then round at the end.
# for unordered factors, always compute the mode (most frequent value)
# ordered.as.num - treat ordered factors as integers, and return a numeric average
# (in the case that the centers are used for sorting only, this prevents the rounding loss)
group.central <- function(data, x, group.vars, w='NULL', method=c('median', 'mean'),
                          col.name='.center.', ordered.as.num=FALSE) {

  method <- match.arg(method)

  is.num <- is.numeric(data[[x]])
  is.ord <- (!is.num) && is.ordered(data[[x]])

  lev <- NULL
  grp.center <- NULL

  if (is.ord) {
    # treat ordinals as integer
    lev <- levels(data[[x]])
    data[[x]] <- as.integer(data[[x]])
  }

  if (w == 'NULL' || length(unique(data[[w]])) == 1) {
    if (is.num || is.ord) {
      fun <- ifelse(method=='median', median, mean)
      grp.center <- plyr::ddply(data, group.vars, function(d) fun(d[[x]], na.rm=TRUE))
    } else {
      # known issue: for ordering, it would be better to take also the frequency of the mode
      # into account
      grp.center <- plyr::ddply(data, group.vars, function(d) Mode(d[[x]]))
    }
  } else {
    if (is.num || is.ord) {
      if (method == 'median') {
        # workaround for problems in wtd.quantile:
        # - if all values are NA, wtd.mean throws error: 'zero non-NA points'
        # - if weights are integer, sum can lead to NA due to overflow
        f <- function(d) if (all(is.na(d[[x]]))) { NA } else { Hmisc::wtd.quantile(d[[x]],
                                                                                   weights=as.numeric(d[[w]]), probs=0.5,
                                                                                   type='i/(n+1)', na.rm=TRUE, normwt=TRUE) }
        grp.center <- plyr::ddply(data, group.vars, f)
      } else {
        grp.center <- plyr::ddply(data, group.vars, function(d) Hmisc::wtd.mean(d[[x]], d[[w]], na.rm=TRUE))
      }
    } else { # !(is.num || is.ord)
      grp.center <- plyr::ddply(data, group.vars, function(d) weighted.mode(d, x, w))
    }
  }

  # for ordinals, reattach levels, but keep order
  if (is.ord && (!ordered.as.num)) {
    grp.center[[length(grp.center)]] <- ordered(lev[round(grp.center[[length(grp.center)]])], levels=lev, exclude=NULL)
  }

  names(grp.center)[length(grp.center)] <- col.name
  grp.center
}

# make all values of data[[x]] identically equal to the group mean
replace.by.central <- function(data, x, g, w) {
  x.avg <- group.central(data, x, g, w, method='mean', col.name='.replace.center.')
  data <- merge(data, x.avg)
  data[[x]] <- data[[length(data)]]
  data[[length(data)]] <- NULL
  data
}

# return data frame with the levels of unordered factor x resorted by (weighted) frequency
order.factor.by.freq <- function(data, x, w='NULL', decreasing=FALSE, verbose=FALSE) {
  x.data <- data[[x]]
  if (is.numeric(x.data) || is.ordered(x.data)) {
    # do not change ordered factors
    return(data)
  }

  if (w=='NULL') {
    if (verbose) {
      cat(sprintf('Ordering %s levels by number of lines\n', x))
    }
    tab <- table(x.data, useNA='ifany')
  } else {
    if (verbose) {
      cat(sprintf('Ordering %s levels by sum of %s\n', x, w))
    }
    tab <- xtabs(as.formula(sprintf('%s ~ %s', w, x)), data, exclude=NULL, na.action=na.pass)
  }
  ord <- order(tab, decreasing=decreasing)
  data[[x]] <- factor(x.data, levels=levels(x.data)[ord],
                      exclude=NULL)
  return(data)
}


# return data frame with the levels of unordered factor x resorted by (weighted) central tendency of dependent variable y
order.factor.by.value <- function(data, x, y, w='NULL', decreasing=FALSE, verbose=FALSE) {

  x.data <- data[[x]]
  if (is.numeric(x.data) || is.ordered(x.data) || length(unique(data[[x]])) == 1) {
    # do not change ordered factors
    return(data)
  }

  if (is.null(y) || y == 'NULL' || y == x) {
    return(order.factor.by.freq(data, x, w, decreasing))
  }

  y.data <- data[[y]]

  if (verbose) {
    cat(sprintf('Ordering %s levels by %s\n', x, y))
  }
  # we are displaying median lines for distributions, so the sorting should agree with this
  centers <- group.central(data, y, x, w, method='median', ordered.as.num=TRUE)

  # if medians are equal for two or more classes, resolve ties by using the mean
  if (length(unique(centers$.center.)) != nrow(centers)) {
    centers <- group.central(data, y, x, w, method='mean', ordered.as.num=TRUE)
  }
  if (!is.numeric(centers$.center.)) {
    centers$.center. <- as.integer(centers$.center.)
  }
  ord <- order(centers$.center., decreasing=decreasing)

  data[[x]] <- factor(x.data,
                      levels= centers[ord, x],
                      exclude=NULL)
  data
}

# reorder unordered factor levels of x,y,z to highlight dependencies

order.factors <- function(data, x, y, z='NULL', w='NULL', verbose=FALSE) {

  # if at least one factor is ordered, sort the other factors accordingly
  # if none is ordered, sort according to z (if it exists), otherwise y

  if (z == 'NULL') {
    v.all <- c(y,x)
  } else {
    v.all <- c(z,y,x)
  }
  v.sort <- NULL
  for (v in v.all) {
    if (is.numeric(data[[v]]) || is.ordered(data[[v]])) {
      v.sort <- v
      break
    }
  }
  if (is.null(v.sort)) {
    v.sort <- v.all[1]
    data <- order.factor.by.freq(data, v.sort, w)
  }

  for (v in setdiff(v.all, v.sort)) {
    if ((!is.numeric(data[[v]])) && (!is.ordered(data[[v]]))) {
      # v is sortable
      data <- order.factor.by.value(data, v, v.sort, w, verbose=verbose)
    }
  }
  data
}

# limit the number of levels of a factor so as to not overcrowd the display
# - for unordered factors, keep the highest ones, group lower levels into '.other.'
# - for ordered factors, form about equidistant subgroups, in order
# - If there a slightly more levels than desired (according to tol), still don't
#   keep it, as quantizing makes the factor less intelligible
# - by.frequency keep the most frequent levels (according to count or weight)
#
limit.factor.levels <- function(data, x, w='NULL',
                                max.levels=30,
                                tol=max.levels*1.5,
                                by.frequency=TRUE,
                                reverse=FALSE) {

  x.data <- data[[x]]

  if (is.numeric(x.data)) {
    return(data)
  }

  # note: there can be unused levels!
  u <- length(levels(x.data))

  if (u <= tol) {
    return(data)
  }
  message(sprintf('Factor variable %s has too many levels (%d), truncating to %d', x, u, max.levels))

  if (!is.ordered(x.data)) {
    ord <- seq(length(levels(x.data)))
    if(by.frequency) {
      # keep the most frequent levels in the same order, combine the rest into '.other.',
      data.tmp <- data
      data.tmp[[x]] <- as.integer(data.tmp[[x]])
      tab <- NULL
      if (w=='NULL') {
        tab <- table(data.tmp[[x]], useNA='ifany')
      } else {
        tab <- xtabs(as.formula(sprintf('%s ~ %s', w, x)), data.tmp,  exclude=NULL, na.action=na.pass)
      }
      # make sure that ties are broken according to previous order!
      tab <- tab[order(tab, as.numeric(names(tab)), decreasing=FALSE)]
    }
    levels.ord <- levels(x.data)[as.integer(names(tab))]
    ll <- length(levels.ord)
    if (!reverse) {
      levels.reduced <- levels.ord[seq(max(1,ll-max.levels+1),ll)]
    } else {
      levels.reduced <- levels.ord[seq(min(max.levels,ll))]
    }
    # note: we want to maintain the original order of levels!
    idx.trunc <- !(x.data %in% levels.reduced)
    x.data <- factor(x.data, levels=c('.other.', levels(x.data)),exclude=NULL)
    x.data[idx.trunc] <- '.other.'
    data[[x]] <- factor(x.data, exclude=NULL)

  } else {  # is.ordered(data[[x]])

    #  Note: The weighted case is very hard to get right for very unequal weights.
    #  Ignoring for now.

    breaks <- 1 + (seq(0,max.levels)/max.levels) * (length(levels(x.data))-1)

    # dig.lab: in cut(), avoids scientific formatting for integers
    dig.lab=50
    x.data <- cut(as.integer(x.data), breaks=breaks,
                  right=FALSE, include.lowest=TRUE, ordered.result=TRUE, dig.lab=dig.lab)

    # recreate level information by parsing those generated by 'cut'
    # make lists of elements, e.g.:
    #  - original levels 'a', 'b', 'c', 'd'
    #  - binned level '[2,4)' ->  'c,d'
    x.data <- ordered(x.data)

    reconstruct.levels <- function(interval, levels) {
      left.bracket <- substr(interval,1,1)
      right.bracket <- substr(interval,nchar(interval),nchar(interval))
      eps <- 0
      if (left.bracket == '(') {
        eps <- 1e-10
      }
      if (right.bracket == ')') {
        eps <- -1e-10
      }
      left.idx  <- ceiling(eps + as.numeric(gsub('[(\\[]([0-9.]+),.*', '\\1', interval)))
      right.idx <- floor(eps + as.numeric(gsub('.*,([0-9.]+).', '\\1', interval)))

      # catch if level doesn't fit interval pattern - this shouldn't happen!
      if (is.na(left.idx) || is.na(right.idx)) {
        return(interval);
      }
      return(paste(levels[left.idx:right.idx], sep='', collapse=','))
    }

    levels(x.data) <- sapply(levels(x.data), reconstruct.levels, levels=levels(data[[x]]))
    x.data <- x.data[,drop=T]
    data[[x]] <- x.data
  }
  data
}

# if one numerical axis has very few values, it sometimes makes sense to treat them as a factor
discretize.few.unique <- function(data, x, few.unique.as.factor=5, verbose=FALSE) {
  if (!is.numeric(data[[x]])) {
    return(data)
  }
  non.na <- !is.na(data[[x]])
  # works for both data frames and tibbles
  u <- nrow(unique(data[non.na,x,drop=FALSE]))

  if (u <= few.unique.as.factor &&
      nrow(data) > u * u) { # heuristic similar to histogram binning
    data[[x]] <- ordered(data[[x]], exclude=NULL)
    info.threshold(verbose, '%s has only %d levels, treating as factor', few.unique.as.factor, x, u)
  }
  data
}


# determine number of histogram bins
# note: this is a modification the 'nclass.FD' function, which sometimes returns '1'
# for very uneven distributions.

nclass.FD.modified <- function(x, max.bins=200) {
  u <- unique(x)
  if (length(u) == 1) {
    # only one value
    return(1L)
  }
  # minimum reasonable bin size
  h <- 0.5 * min(abs(diff(u)), na.rm=TRUE)

  # for very few unique values, show all of them individually
  if (length(u) > 10) {
    h2 <- 2 * stats::IQR(x, na.rm=TRUE)
    if (h2 == 0)
      h2 <- 2 * stats::mad(x, constant=2, na.rm=TRUE)
    if (h2 > 0)
      h <- max(h, h2 * length(x)^(-1/3))
  }

  # capping the number of bins, since for some features the estimated bins are very high causing the hist() function to throw an error
  num.bins <- ceiling(diff(range(u, na.rm=TRUE))/h)
  num.bins <- min(max.bins,
                  max(min(5, length(u), max.bins, length(x)/log2(length(x))),
                      num.bins))
  num.bins
}


# break a numeric variable into intervals
# - method=quantile - bins with equal number of samples
# - method=histogram - equidistant intervals
# - max.breaks - requested number of breaks
# - tol - if there are less than that many distinct values to
#     begin with, don't do additional binning
# - estimate.breaks=FALSE - use exactly max.breaks many breaks

discretize <- function(data, x,
                       w='NULL',
                       max.breaks,
                       tol=max.breaks*1.5,
                       estimate.breaks=TRUE,
                       method=c('quantile', 'histogram')) {
  x.data <- data[[x]]

  if (!is.numeric(x.data)) {
    return(data)
  }

  u <- length(unique(x.data))

  if (missing(max.breaks)) {
    max.breaks = ceiling(min(u-1, sqrt(nrow(data))))
  }
  max.breaks = floor(min(max.breaks, u-1, sqrt(nrow(data))))
  max.breaks <- min(max.breaks, nrow(data))


  if (u - 1 <= max(tol, max.breaks)) {
    return(data)
  }

  method <- match.arg(method)

  breaks <- NULL
  # compute number of breaks
  if (method == 'quantile') {
    probs <- seq(0, max.breaks) / max.breaks
    if (w=='NULL') {
      breaks <- quantile(x.data, probs=probs, na.rm=TRUE)
    } else {
      breaks <- Hmisc::wtd.quantile(x.data, weights=data[[w]],
                                    probs=probs, na.rm=TRUE, normwt=TRUE,
                                    type='i/(n+1)')
    }
    data[[x]] <- ordered(cut(x.data, breaks=unique(breaks), include.lowest=TRUE),
                         exclude=NULL)

  } else { # method == 'histogram'
    n.breaks <- max.breaks
    if (estimate.breaks) {
      n.breaks <- nclass.FD.modified(x.data, max.breaks)
    }
    breaks <- graphics::hist(x.data, breaks=n.breaks, plot=FALSE)$breaks
    # leave type numeric
    step<-breaks[2] - breaks[1]
    data[[x]] <- round((x.data-breaks[1])/step)*step+breaks[1]
  }

  data
}

# - convert character vectors into factors
# - with na.rm=TRUE, remove rows containing NA
# - from here on, don't exclude any levels from factors any more
# - check for all-NA and constant values
# - convert character vectors into factors
# - convert logicals into ordered factors

preprocess.factors <- function(data, vars, na.rm=FALSE) {

  for (x in vars) {

    x.data <- data[[x]]
    non.na <- !is.na(x.data)
    if (length(which(non.na)) == 0) {
      stop(sprintf('Variable %s has only missing values, giving up', x))
    }

    u <- length(unique(x.data[non.na]))
    if (u == 1) {
      stop(sprintf('Variable %s has only single level ("%s"), giving up', x, unique(x.data[non.na])))
    }

    if (u == 2) {
      # binary variables are trivially ordered
      data[[x]] <- ordered(x.data)
    } else if (!is.numeric(x.data)) {

      if (!is.factor(x.data)) {
        # i.e., character or logical
        x.data <- factor(x.data, exclude=NULL)
      }

      # suppress unused levels
      # for NA treatment, see http://r.789695.n4.nabble.com/droplevels-inappropriate-change-td4723942.html
      # droplevels(x.data, exclude=exclude.factor) does not seem to be working correctly
      data[[x]] <- x.data[,drop=TRUE]
    }
  }

  if (na.rm) {
    factor.with.na.level <- any(sapply(seq(length(data)),
                                       function(x) {is.factor(data[[x]]) && any(is.na(levels(data[[x]])))}))
    if (factor.with.na.level) {
      message('Option "na.rm=TRUE", but data contains factor(s) with an NA level. Keeping these ...')
    }
    data <- na.omit(data)

    if (nrow(data) == 0) {
      stop(sprintf('Variable(s) have only missing values, giving up'))
    }

  }

  # if na.rm==TRUE, all NA's have been removed. In either case, from now on all levels should be
  # included

  for (i in seq(length(data))) {
    if (is.factor(data[[i]])) {
      data[[i]] <- factor(data[[i]], exclude=NULL, ordered=is.ordered(data[[i]]))
      # note: rename <NA> - else ggplot doesn't show a color legend
      idx<-which(is.na(levels(data[[i]])))
      if (length(idx) > 0) {
        levels(data[[i]])[idx] <- '?'
      }
    }
  }

  data
}


marginalize <- function(data, var.list, w='NULL') {
  rhs <- paste(var.list, sep='', collapse='+')
  prop.table(xtabs(as.formula(sprintf('%s ~ %s', ifelse(w=='NULL', c(''), w), rhs)), data, exclude=NULL, na.action=na.pass))
}

# entropy: H(vars) = - \sum p(vars)log2(p(vars))
# assumption: vars are factors
entropy <- function(data, vars, w) {
  tab  <- marginalize(data, vars, w)
  log.tab <- log2(tab)
  log.tab[tab==0] <- 0 # guard for -Inf
  -sum(tab * log.tab)
}


# conditional entropy: H(y|x) = H(x,y) - H(x)
# assumption: x, y are factors
cond.entropy <- function(data, x, y, w) {
  if (x == y) {
    0
  } else {
    hxy <- entropy(data, c(x, y), w)
    hx <- entropy(data, x, w)
    hxy - hx
  }
}

quantize <- function(data, v, target='NULL', w, n.levels, estimate.breaks=FALSE, method='histogram') {
  if (is.numeric(data[[v]])) {
    data <- discretize(data, v, w=w,
                       max.breaks=n.levels, tol=n.levels, estimate.breaks, method=method)
  } else {
    if (target != 'NULL') {
      data <- order.factor.by.value(data, v, target, w=w)
    }
    data <- limit.factor.levels(data, v, w,
                                max.levels=n.levels,
                                tol=n.levels)
  }
  data
}


# return a list of conditional entropies H(target|x), for each column in the data frame.
# for comparability, all variables are quantized into the same number of bins
cond.entropy.data <- function(data, target, given, w='NULL') {
  n.row <- nrow(data)
  n.levels <- floor(max(2, min(log2(n.row), n.row/10))) # heuristic

  cond.entropy.quantized <- function(data, x, y, w, n.levels) {
    if (x == y) {
      0
    } else {
      x.data <- quantize(data, x, y, w, n.levels)
      # note: we need to make a copy for the special case of x=w
      names(x.data)[names(x.data)==x]<-'.x'
      if (w != 'NULL') {
        x.data[[w]] <- data[[w]]
      }
      cond.entropy(x.data, '.x', y, w)
    }
  }

  entropies <- matrix(nrow=length(data), ncol=length(data))
  colnames(entropies) <- names(data)
  rownames(entropies) <-names(data)

  if (!missing(target)) {
    # single row/column
    data <- quantize(data, target, w=w, n.levels=n.levels)
    entropies[target,] <- sapply(names(data), function(x)
      cond.entropy.quantized(data, x, target, w, n.levels))
  } else {
    for (target in names(data)) {
      data <- quantize(data, target, w=w, n.levels=n.levels)
      if (!missing(given)) {
        entropies[target, given] <- cond.entropy.quantized(data, x=given, y=target, w, n.levels)
      } else {
        # all vs all
        entropies[target,] <- sapply(names(data), function(x)
          cond.entropy.quantized(data, x, target, w, n.levels))
      }

    }
  }
  entropies
}


# heuristic to decide whether to plot a heat map:
# - axes x, y must be either numeric, or ordered factors
# - axes must have at least min.size distinct values
# - at least a fraction min.coverage of the grid points must have data
is.grid.like <- function(data, x, y, z, min.size=3, min.coverage=0.5) {
  if (!is.numeric(data[[z]]) ||
      (!(is.numeric(data[[x]]) || is.ordered(data[[x]]))) ||
      (!(is.numeric(data[[y]]) || is.ordered(data[[y]])))) {
    return(FALSE)
  }
  ux <- length(unique(data[[x]]))
  if (ux < min.size) {
    return(FALSE)
  }
  uy <- length(unique(data[[y]]))
  if (uy < min.size) {
    return(FALSE)
  }
  u <- nrow(unique(data[,c(x, y)]))
  u >= min.coverage * ux * uy
}

# some common themes
theme_panel_num_x <-
  theme(panel.grid.major.x=element_line(color='black', linetype='dotted'),
        panel.grid.minor.x=element_line(color='black',linetype='dotted'))
theme_panel_num_y <-
  theme(panel.grid.major.y=element_line(color='black', linetype='dotted'),
        panel.grid.minor.y=element_line(color='black',linetype='dotted'))
# points on the grid line are hard to see; remove grid line for factors
theme_panel_fac_x <- theme(panel.grid.major.x=element_blank(),
                           panel.grid.minor.x=element_blank())
theme_panel_fac_y <- theme(panel.grid.major.y=element_blank(),
                           panel.grid.minor.y=element_blank())
# for factors, write horizontal axis labels at an angle to avoid overlap
theme_slanted_text_x <- theme(axis.text.x=element_text(angle=-45, hjust=0, vjust=1))


# detect if x axis text might overlap
estimate.label.overlap <- function(labels, breaks=seq(length(labels))) {
   if (length(breaks) < 2) {
      return(FALSE)
   }
   lb <- length(breaks)

   # account for some empty space at the boundaries
   mrg.left <- (breaks[2]-breaks[1])/2
   mrg.right <- (breaks[lb]-breaks[lb-1])/2
   rg <- breaks[lb] - breaks[1] + mrg.left + mrg.right

   for (i in seq(lb)) {
      space.left  <- ifelse(i==1,  2*mrg.left,  (breaks[i]-breaks[i-1]))/rg
      space.right <- ifelse(i==lb, 2*mrg.right, (breaks[i+1]-breaks[i]))/rg
      label.width <- as.numeric(grid::convertX(unit(1, 'strwidth', labels[i]),'npc'))
      if (label.width > min(space.left, space.right)) {
         return(TRUE)
      }
   }
   return(FALSE)
}


# add color and/or fill scale layers to plot
# - use qualitative (sequential) brewer scale for (un)ordered factors, if
#   palette size supports it.
#   Otherwise, extend it using colorRampPalette.
# - if too few colors, use qualitative (first few colors of brewer palette might not look good by themselves!)

get.palette <- function(x, palette.brewer.seq='YlGn', palette.brewer.qual='Set1',
                        adjust.size=FALSE) {

   is.num <- is.numeric(x)
   is.ord <- is.ordered(x)
   if (is.num) {
      u <- length(unique(x))
   } else {
      # note: there can be unused levels
      u <- length(levels(x))
   }
   if (u <= 2) {
      # treat as qualitative
      is.ord <- FALSE
      is.num <- FALSE
   }
   name <- ifelse(is.num || is.ord, palette.brewer.seq, palette.brewer.qual)

   sz <- RColorBrewer::brewer.pal.info[name, 'maxcolors']
   if (is.na(sz)) {
      stop('invalid palette name: ', name)
   }

   pal <- RColorBrewer::brewer.pal(sz, name)

   if (adjust.size) {
      if ((is.num || is.ord) && u < sz) {
         # note: the first few colors of the brewer palettes are very light, sometimes
         # hard to see. If less values are needed than the palette size, we want to
         # rather drop the lightest ones instead of the darkest ones.
         pal <- pal[seq(sz-u+1, sz)]
      }
      if ((!is.num) && u > sz) {
         # not enough colors, interpolate
         # note: maybe do something different for qualitative scale?
         pal <- colorRampPalette(pal)(u)
      }
      names(pal) <- levels(x)
   }
   pal
}

add.color.fill <- function(p, data, x, aesth=c('color', 'fill'),
                           palette.brewer.seq='YlGn',
                           palette.brewer.qual='Set1') {

   na.value <- 'darkgray'
   if ('color' %in% aesth) {
      p <- p + aes_string(color=x)
   }
   if ('fill' %in% aesth) {
      p <- p + aes_string(fill=x)
   }

   pal <- get.palette(data[[x]], palette.brewer.seq, palette.brewer.qual, TRUE)
   lp <- length(pal)

   if (is.numeric(data[[x]])) {
      if ('color' %in% aesth) {
         p <- p + scale_color_gradientn(colors=pal[c(1,lp)])
      }
      if ('fill' %in% aesth) {
         p <- p + scale_fill_gradientn(colors=pal[c(1,lp)])
      }
   } else { # !is.num

      if ('color' %in% aesth) {
         p <- p + scale_color_manual(values=pal, na.value=na.value)
      }
      if ('fill' %in% aesth) {
         p <- p + scale_fill_manual(values=pal, na.value=na.value)
      }
   }
   p
}

# place legend either at the bottom or on the right, wherever it occupies less space
# pack as tightly as possible
add.color.legend <- function(p, data, x, aesth=c('color','fill')) {

  aesth=match.arg(aesth)
  p <- p + theme(legend.key.width=unit(0.5,'lines'))
  if (is.numeric(data[[x]])) {
    # color bars always on the right margin, to avoid number label overwriting
    if (aesth == 'color') {
      p <- p + guides(color=guide_colorbar(direction='vertical'), fill=FALSE)
    } else {
      p <- p + guides(fill=guide_colorbar(direction='vertical'), color=FALSE)
    }
    p <-  p + theme(legend.position='right', legend.background=element_blank())
    return(p)
  }

  # rough estimate of key symbol plus spacing; in the default theme.gray(), net key size is 1.2 lines
  size.key <- as.numeric(grid::convertX(unit(1.6, 'lines'), 'npc'))
  title.length <- as.numeric(grid::convertX(unit(1, 'strwidth', x), 'npc'))
  legend.margin <- as.numeric(unit(0.2, 'cm'))

  # vertical placement
  ll <- length(levels(data[[x]]))
  npc.length <- sapply(levels(data[[x]]), function(x) grid::convertX(unit(1, 'strwidth', x), 'npc'))
  npc.length <- sort(npc.length, decreasing=TRUE)
  ncol.vert <- ceiling(ll*size.key)
  nrow.vert <- ceiling(ll/ncol.vert)
  npc.width.vert <- legend.margin + max(title.length, ncol.vert * (size.key + npc.length[1]))

  # horizontal placement
  # try out how many legend columns we can fit underneath the graph
  ncol.horz <- 0
  npc.horz <- title.length
  while (ncol.horz < ll && (npc.horz + npc.length[ncol.horz+1] < 1)) {
    ncol.horz <- ncol.horz + 1
    npc.horz <- npc.horz + npc.length[ncol.horz] + size.key
  }
  nrow.horz <- ceiling(ll/ncol.horz)
  npc.height.horz <- legend.margin + nrow.horz * size.key
  aspect.ratio <- as.numeric(grid::convertX(unit(1, 'npc'),'cm')) /
    as.numeric(grid::convertY(unit(1, 'npc'),'cm'))
  if (npc.width.vert < aspect.ratio * npc.height.horz) {
    # vertical - by column, starting with first level at the bottom
    if (aesth == 'color') {
      p <- p + guides(color=guide_legend(direction='vertical', byrow=FALSE,
                                         reverse=TRUE, ncol=ncol.vert), fill=FALSE)
    } else {
      p <- p + guides(fill=guide_legend(direction='vertical', byrow=FALSE,
                                        reverse=TRUE, ncol=ncol.vert), color=FALSE)
    }
    p + theme(legend.position='right', legend.background=element_blank())
  } else {
    # horizontal - by row
    if (aesth == 'color') {
      p <- p + guides(color=guide_legend(direction='horizontal', byrow=TRUE,
                                         reverse=FALSE, nrow=nrow.horz), fill=FALSE)
    } else {
      p <- p + guides(fill=guide_legend(direction='horizontal', byrow=TRUE,
                                        reverse=FALSE, nrow=nrow.horz), color=FALSE)
    }
    p + theme(legend.position='bottom', legend.background=element_blank())
  }
}

add.smooth.line <- function(p, w='NULL', fill.smooth='NULL') {

  if (w == 'NULL') {
    # note: for more than 1000 points, ggplot2 uses mgcv package,
    # leads to weird dependency issue
    if (fill.smooth == 'NULL') {
      p + geom_smooth(alpha=0.2, na.rm=TRUE)
    } else {
      p + geom_smooth(alpha=0.2, na.rm=TRUE, color=fill.smooth, fill=fill.smooth)
    }
  } else {
    if (fill.smooth == 'NULL') {
      p + geom_smooth(aes_string(weight=w), alpha=0.2, na.rm=TRUE)
    } else {
      p + geom_smooth(aes_string(weight=w), alpha=0.2, na.rm=TRUE, color=fill.smooth, fill=fill.smooth)
    }
  }
}

add.axis.transform <- function(p, data, x, ax=c('x','y'), trans.log.thresh=2, verbose=FALSE) {
  if (!is.numeric(data[[x]])) {
    p
  } else {
    ax <- match.arg(ax)
    trans <- get.trans.fun(data, x, trans.log.thresh, verbose=verbose)
    if (ax == 'x') {
      # note: "scale_x_continuous(trans=trans.x[[1]], name=trans.x[[2]])" doesn't allow
      # to overwrite the label later
      p + scale_x_continuous(trans=trans[[1]]) + xlab(trans[[2]])
    } else {
      p + scale_y_continuous(trans=trans[[1]]) + ylab(trans[[2]])
    }
  }
}

# note: these layers work, but in case a log transform is applied,
# the transformed data is received here - we need the original one.
StatCenterX <- ggproto("StatCenterX", Stat,
                       required_aes = c("x"),
                       default_aes = aes(xintercept = ..x..),

                       setup_params = function(data, params) {
                          if (is.null(params$weight)) {
                             params$weight <- 1
                          }
                          params
                       },

                       compute_group = function(data, scales, weight) {
                          grp <- setdiff(names(data), c('x','weight'))
                          data <- group.central(data, 'x', grp, w='weight', method='median', col.name='x')
                          data
                       }
)

StatCenterY <- ggproto("StatCenterY", Stat,
                      required_aes = c("y", "group"),

                      setup_params = function(data, params) {
                         if (is.null(params$weight)) {
                            params$weight <- 1
                         }
                         params
                      },

                      compute_group = function(data, scales, weight) {
                         grp <- setdiff(names(data), c('y','weight'))
                         data <- group.central(data, 'y', grp, w='weight', method='median', col.name='y')
                         data$x <- data$group
                         data
                      }
)

geom_point_center <- function(mapping = NULL, data = NULL,
                              position = position_dodge(width=0.9),
                              shape = 1, color = "black", size = 2, fill = NA,  alpha = NA,
                              show.legend = NA, inherit.aes = TRUE, na.rm = TRUE, ...)
{
   layer(data = data, mapping = mapping, stat = StatCenterY, geom = GeomPoint,
         position = position, show.legend = show.legend, inherit.aes = inherit.aes,
         params = list(shape=shape, color=color, size=size, fill=fill, alpha=alpha,
                       na.rm = na.rm, ...))
}

# different from base geom_vline(), adapt color according to group, because of inherit.aes=TRUE
geom_vline_center <- function(mapping = NULL, data = NULL,
                              position = 'identity',
                              color = NULL, size = 0.7, alpha = NA,
                              linetype='dashed',
                              show.legend = NA, inherit.aes = TRUE, na.rm = TRUE, ...)
{
   layer(data = data, mapping = mapping, stat = StatCenterX, geom = GeomVline,
         position = position, show.legend = show.legend, inherit.aes = inherit.aes,
         params = list(size=size, alpha=alpha, linetype=linetype,
                       na.rm = na.rm, ...))
}

geom_vline_inheritable <- function (mapping = NULL, data = NULL, ..., xintercept, na.rm = FALSE,
          show.legend = NA)
{
   if (!missing(xintercept)) {
      data <- data.frame(xintercept = xintercept)
      mapping <- aes(xintercept = xintercept)
      show.legend <- FALSE
   }
   layer(data = data, mapping = mapping, stat = StatIdentity,
         geom = GeomVline, position = PositionIdentity, show.legend = show.legend,
         inherit.aes = TRUE, params = list(na.rm = na.rm, ...))
}


# 2D scatter plot of numeric or factor variables
# - overlay smoothing line if both variables are numeric
# - dedupe.scatter='area' - unique repeated points, count frequency
# - dedupe.scatter='jitter' - plot each repeated point separately, add
#   jitter if there are more than min.points.jitter with identical coordinates
# - counts/weights are drawn with a shaded circle of proportional area
# - jitter.x, jitter.y - the amount of jittering as a multiple of resolution
# - trans.log.thresh - threshold to decide on log-transform
# - fill.smooth - fill color for smoothing line
gplt.scatter <- function(data, x, y, w='NULL',
                         dedupe.scatter=c('area','jitter'),
                         min.points.jitter=3,
                         jitter.x=0.4,
                         jitter.y=0.4,
                         trans.log.thresh=2,
                         max.factor.levels=30,
                         fill.smooth='NULL',
                         verbose=FALSE,
                         ...) {

  dedupe.scatter <- match.arg(dedupe.scatter)
  flip <- FALSE
  if (is.numeric(data[[x]]) && (!is.numeric(data[[y]]))) {
    # HACK: ggplot2 does not implement vertical dodging, therefore we
    # use coord_flip() as a workaround
    flip <- TRUE
    tmp <- x
    x <- y
    y <- tmp
  }

  num.x <- is.numeric(data[[x]])
  num.y <- is.numeric(data[[y]])
  ord.x <- is.ordered(data[[x]])
  ord.y <- is.ordered(data[[y]])

  data <- order.factors(data, x, y, w=w, verbose=verbose)
  for (v in c(x,y)) {
    data <- limit.factor.levels(data, v, w=w,
                                max.levels=max.factor.levels)
  }

  if (dedupe.scatter == 'area') {
    # record frequency/total weight for each unique row of the data
    if (w == 'NULL') {
      # equivalent to 'uniq -c'
      data <- plyr::ddply(data, names(data), function(D) nrow(D))
      names(data)[length(data)] <- '.freq.'
      w <- '.freq.'
    } else {
      data <- plyr::ddply(data, setdiff(names(data), w), function(D) sum(D[[w]], na.rm=TRUE))
      names(data)[length(data)] <- w
    }
  }

  # dodging

  if (!num.x) {
    # example involving dodging:
    # i2<-iris
    # i2$c<-factor(cut(i2$Petal.Width,3))
    # names(i2)[6]<-'pw.quant'
    # plotluck(Petal.width~Species|pw.quant,i2, opts=plotluck.options(geom='scatter'))
    pos <- position_dodge(0.5)
  } else {
    pos <- position_identity()
  }

  # if there are a lot of points, better make them transparent
  alpha <- max(0.3, 0.8 - 0.8/2000 * nrow(data))

  p <- ggplot(data, aes_string(x=x, y=y, weight=w))

  if (w != 'NULL' && length(unique(data[[w]])) > 1) {
    # use point size to represent count/weight
    p <- p + geom_point(aes_string(size=w), alpha=alpha,
                        position=pos, na.rm=TRUE, ...) +
      scale_size(guide=FALSE)
  } else {
    # use jittering
    if (max(table(data[, c(x, y)], useNA='ifany')) >= min.points.jitter) {
      # test for repeated points, optionally jitter
      # - if both x and y are numeric, jitter in both directions
      # - if one is a factor, only jitter in this direction
      #   (enough empty space between levels)
      if (num.x && !num.y) {
        jitter.x <- 0
        # resolution(data[[y]], zero=FALSE) == 1
      } else if (!num.x && num.y) {
        # resolution(data[[x]], zero=FALSE) == 1
        jitter.y <- 0
      } else {
        jitter.x <- jitter.x * resolution(data[[x]], zero=FALSE)
        jitter.y <- jitter.y * resolution(data[[y]], zero=FALSE)
      }
    } else {
      jitter.x <- 0
      jitter.y <- 0
    }

    p <- p + geom_point(alpha=alpha, position=position_jitter(width=jitter.x, height=jitter.y),
                        na.rm=TRUE, ...)
  }

  # draw smoothing line
  if (num.x && num.y) {
    p <- add.smooth.line(p, w, fill.smooth)
  }

  # axis transformation
  p <- add.axis.transform(p, data, x, 'x', trans.log.thresh, verbose=verbose)
  p <- add.axis.transform(p, data, y, 'y', trans.log.thresh, verbose=verbose)

  if (!flip) {
    if (num.x) {
      p <- p + theme_panel_num_x + theme_panel_num_y
    } else {
      p <- p + theme_panel_num_y
      if (length(levels(data[[x]])) < 10) {
        # grid lines for factors only necessary when very many
        p <- p + theme_panel_fac_x
      } else {
        p <- p + theme(panel.grid.major.x=element_line(color='black', linetype='dotted'),
                       panel.grid.minor.x=element_blank())
      }
      if (estimate.label.overlap(levels(data[[x]]))) {
        p <- p + theme_slanted_text_x
      }
    }
  } else {
    p <- p + coord_flip()
    if (num.x) {
      p <- p + theme_panel_num_y + theme_panel_num_x
    } else {
      p <- p + theme_panel_num_x
      if (length(levels(data[[x]])) < 10) {
        p <- p + theme_panel_fac_y
      } else {
        p <- p + theme(panel.grid.major.y=element_line(color='black', linetype='dotted'),
                       panel.grid.minor.y=element_blank())
      }
    }
  }
  list(plot=p, data=data)
}


# hexbin plot with overlayed smoothing line
gplt.hex <- function(data, x, y, w='NULL', trans.log.thresh=2,
                     fill.smooth='NULL', verbose=FALSE, ...) {

  p <- ggplot(data, aes_string(x=x, y=y)) +
    geom_hex(na.rm=TRUE, ...)

  p <- add.smooth.line(p, w, fill.smooth)

  # axis transformation
  p <- add.axis.transform(p, data, x, 'x', trans.log.thresh, verbose=verbose)
  p <- add.axis.transform(p, data, y, 'y', trans.log.thresh, verbose=verbose)

  p <- p +
    # log scaling of color often reveals more details
    scale_fill_gradientn(colors=c(hcl(66,60,95), hcl(128,100,45)), trans='log', guide=FALSE) +
    theme(legend.position='right') +
    theme_panel_num_x + theme_panel_num_y

  list(plot=p, data=data)
}


# heat map
# point grid of discretized values, with optional discretized color (z)
# the color is a representation of an appropriate central tendency measure
# for the point bin (mode for factors, median for ordinal and numeric vectors)
gplt.heat <- function(data, x, y, z='NULL', w='NULL', trans.log.thresh=2,
                        max.factor.levels=30,
                        resolution.heat=30,
                        palette.brewer.seq='YlGn',
                        palette.brewer.qual='Set1',
                        verbose=FALSE,
                        ...) {

  num.x <- is.numeric(data[[x]])
  ord.x <- is.ordered(data[[x]])
  num.y <- is.numeric(data[[y]])
  ord.y <- is.ordered(data[[y]])

  num.z <- FALSE
  ord.z <- FALSE
  ex.z <- (!is.null(z)) && (z != 'NULL')
  if (ex.z) {
    num.z <- is.numeric(data[[z]])
    ord.z <- is.ordered(data[[z]])
    if (!num.z && !ord.z) {
      data <- order.factor.by.freq(data, z, w)
    }
  }

  data <- order.factors(data, x, y, z, w, verbose=verbose)

  # quantize variables
  for (v in c(x,y)) {
    if (is.numeric(data[[v]])) {
      data <- discretize(data, v, w=w, max.breaks=resolution.heat, estimate.breaks=FALSE,
                         method='histogram')
    } else {
      data <- limit.factor.levels(data, v, w=w,
                                  max.levels=min(resolution.heat,max.factor.levels),
                                  tol=max.factor.levels)
    }
  }

  g.vars <- names(data)
  if (ex.z) {
    g.vars <- setdiff(g.vars, z)
  }

  if (w != 'NULL') {
    g.vars <- setdiff(g.vars, w)
  }

  # HACK: remove precomputed columns
  g.vars <- setdiff(g.vars, '.center.')

  # replace z values with group center
  if (ex.z) {
    data <- replace.by.central(data, z, g.vars, w)

    breaks <- length(get.palette(data[[z]], palette.brewer.seq,
                               palette.brewer.qual)) - 1

    if (num.z) {
      # if z distribution is very skewed, better to quantize than to histogram
      trans.z <- get.trans.fun(data, z, trans.log.thresh)
      method <- 'histogram'
      if (trans.z[[1]]$name != 'identity') {
        method <= 'quantile'
      }

      # limited by color palette
      data <- discretize(data, z, w=w, max.breaks=breaks, tol=breaks,
                         estimate.breaks=FALSE, method=method)
    } else {
      data <- limit.factor.levels(data, z, w,
                                  max.levels=breaks,
                                  tol=breaks)
    }
    g.vars <- c(g.vars, z)
  }

  # sum up counts or weights
  if (w == 'NULL') {
    data <- plyr::ddply(data, g.vars, function(D) nrow(D))
    names(data)[length(data)] <- '.freq.'
    w <- '.freq.'
  } else {
    data <- plyr::ddply(data, g.vars, function(D) sum(D[[w]], na.rm=TRUE))
    names(data)[length(data)] <- w
  }

  # scale the area of the rectangle such that:
  # - the largest one is equal to the grid length
  # - the smallest one is still visible
  # - the area is proportional to the weight

  res.x <- resolution(as.numeric(data[[x]]))
  res.y <- resolution(as.numeric(data[[y]]))

  w.max <- sqrt(max(data[[w]]))
  w.min <- sqrt(min(data[[w]]))
  w.rg <- w.max - w.min
  if (w.rg == 0) {
     sz <- rep(1, nrow(data))
  } else {
     sz.max <- 1
     sz.min <- max(0.1, w.min/w.max)
     sz <- sz.min + (sqrt(data[[w]]) - w.min) * (sz.max - sz.min)  / w.rg
  }

  data$width <- res.x * sz
  data$height <- res.y * sz

  p <- ggplot(data, aes_string(x=x, y=y, height='height', width='width', color=z, fill=z, weight=w)) +
    geom_tile(na.rm=TRUE, ...)

  # axis transformation
  # known issue: logarithmic scaling can make the rectangles overlap!
  # plotluck(name ~ sleep_total + brainwt, data = msleep)
  # disabling for now ...
  #p <- add.axis.transform(p, data, x, 'x', trans.log.thresh, verbose=verbose)
  #p <- add.axis.transform(p, data, y, 'y', trans.log.thresh, verbose=verbose)

  if (ex.z) {
    p <- add.color.fill(p, data, z,
                        palette.brewer.seq=palette.brewer.seq,
                        palette.brewer.qual=palette.brewer.qual)
    p <- add.color.legend(p, data, z, 'fill')
  }

  # show gridlines
  p <- p + theme_panel_num_x + theme_panel_num_y
  if (!num.x && estimate.label.overlap(levels(data[[x]]))) {
    p <- p + theme_slanted_text_x
  }

  list(plot=p, data=data)
}

# cleveland dot plot or bar plot with stat_bin
# no log transforms for bars; see http://www.perceptualedge.com/articles/b-eye/dot_plots.pdf
gplt.dot <- function(data, x, w='NULL', vertical=TRUE,
                     max.factor.levels=30, geom=c('dot', 'bar'), verbose=FALSE, ...) {

  geom <- match.arg(geom)

  if (!is.ordered(data[[x]])) {
    data <- order.factor.by.freq(data, x, w)
  }

  data <- limit.factor.levels(data, x, w=w,
                              max.levels=max.factor.levels)

  ylab <- 'count'
  if (w != 'NULL') {
    ylab <- w
  }

  p <- ggplot(data, aes_string(x=x, weight=w))
  if (geom == 'dot') {
    # if there are only few points, plot direction is not obvious to the viewer
    if (!vertical) {
      shp <- '^'
    } else {
      shp <- '>'
    }
    p <- p + geom_point(stat='count', shape=shp, size=6, na.rm=TRUE, ...)
  } else {
    p <- p + geom_bar(position=position_dodge(), alpha=0.8, width=0.2, na.rm=TRUE, ...)
  }

  if (!vertical) {
    p <- p + theme_panel_num_y + ylab(ylab)

    if (length(levels(data[[x]])) < 10) {
      p <- p + theme_panel_fac_x
    } else {
      p <- p + theme(panel.grid.major.x=element_line(color='black', linetype='dotted'),
                     panel.grid.minor.x=element_blank())
    }

    if (estimate.label.overlap(levels(data[[x]]))) {
      p <- p + theme_slanted_text_x
    }

  } else { # horizontal
    p <- p + coord_flip() + theme_panel_num_x + ylab(ylab)
    if (length(levels(data[[x]])) < 10) {
      p <- p + theme_panel_fac_y
    } else {
      p <- p + theme(panel.grid.major.y=element_line(color='black', linetype='dotted'),
                     panel.grid.minor.y=element_blank())
    }
  }
  list(plot=p, data=data)
}


# violin plot with overlayed median point
# assumption: x is factor, y is numeric
gplt.violin <- function(data, x, y, w='NULL',
                        trans.log.thresh=2,
                        max.factor.levels=30,
                        verbose=FALSE,
                        ...) {

   flip <- FALSE
   if (is.numeric(data[[x]])) {
      if (is.numeric(data[[y]])) {
         stop('Violin plot requires one factor variable')
      }
      flip <- TRUE
      tmp <- x
      x <- y
      y <- tmp
   }
   # note: geom_violin throws an error if all values are equal
   # workaround: add very small jitter
   data[[y]] <- data[[y]] +
      1E-8 * min(data[[y]], na.rm=TRUE) * (runif(nrow(data)) - 0.5)

   if (!is.ordered(data[[x]])) {
      data <- order.factor.by.value(data, x, y, w=w, verbose=verbose)
   }

   data <- limit.factor.levels(data, x, w=w,
                               max.levels=max.factor.levels)

   p <- ggplot(data, aes_string(x=x, y=y, weight=w, ymax=max(y,na.rm=TRUE))) +
      geom_violin(scale='width', alpha=0.8, na.rm=TRUE, ...)

   if ('.center.' %in% names(data)) {
      # dodge does not work correctly when width is not specified
      # see https://github.com/hadley/ggplot2/issues/525
      p <- p + geom_point(mapping=aes_(y=~.center.),
                          position=position_dodge(width=0.9),
                          size=2, shape=1, na.rm=TRUE)
   }

   # '+ geom_point_center()' is unfortunately not working correctly for the case that a
   # group (y-value) AND a color is specified, e.g.,
   # plotluck(Petal.Length~Species|Petal.Width,iris,opts=plotluck.options(geom='violin',max.factor.levels.color=10))
   # In this case, compute_group() in StatCenterX is only called once per y-value.

   # axis transformation
   p <- add.axis.transform(p, data, y, 'y', trans.log.thresh, verbose=verbose)

   if (!flip) {
      p <- p + theme_panel_fac_x + theme_panel_num_y

      if (estimate.label.overlap(levels(data[[x]]))) {
         p <- p + theme_slanted_text_x
      }

   } else {
      p <- p + coord_flip() + theme_panel_fac_y + theme_panel_num_x
   }
   list(plot=p, data=data)
}


# box plot
# assumption: x is factor, y is numeric
gplt.box <- function(data, x, y, w='NULL',
                     trans.log.thresh=2,
                     max.factor.levels=30,
                     verbose=FALSE,
                     ...) {

  flip <- FALSE
  if (is.numeric(data[[x]])) {
    if (is.numeric(data[[y]])) {
      stop('Box plot requires one factor variable')
    }
    flip <- TRUE
    tmp <- x
    x <- y
    y <- tmp
  }

  if (!is.ordered(data[[x]])) {
    data <- order.factor.by.value(data, x, y, w, verbose=verbose)
  }

  data <- limit.factor.levels(data, x, w=w,
                              max.levels=max.factor.levels)

  p <- ggplot(data, aes_string(x=x, y=y, weight=w, ymax=max(y,na.rm=TRUE))) +
    geom_boxplot(alpha=0.8, na.rm=TRUE, ...)

  # axis transformation
  p <- add.axis.transform(p, data, y, 'y', trans.log.thresh, verbose=verbose)

  if (!flip) {
    p <- p + theme_panel_fac_x + theme_panel_num_y

    if (estimate.label.overlap(levels(data[[x]]))) {
      p <- p + theme_slanted_text_x
    }
  } else {
    p <- p + coord_flip() + theme_panel_fac_y + theme_panel_num_x
  }
  list(plot=p, data=data)
}


# density plot with overlayed vertical median lines
gplt.density <- function(data, x, w='NULL',
                         trans.log.thresh=2,
                         verbose=FALSE,
                         ...) {

   ylab <- 'density'
   if (w != 'NULL') {
      ylab <- sprintf('%s density', w)
   }

   p <- ggplot(data, aes_string(x=x, weight=w)) +
      geom_density(aes_(y=~..scaled..), alpha=0.6, adjust=0.5, trim=TRUE, na.rm=TRUE, ...) +
      geom_rug(na.rm=TRUE)
      # unfortunately the following does not work for log transformation -
      # the layer received the transformed data + geom_vline_center()

   if ('.center.' %in% names(data)) {
      p <- p + geom_vline_inheritable(aes_(xintercept=~.center.),
                                      linetype='dashed', size=0.7, na.rm=TRUE)
   }

   # axis transformation
   p <- add.axis.transform(p, data, x, 'x', trans.log.thresh, verbose=verbose) +
      ylab(ylab) +
      # density numbers themselves not very meaningful
      theme(axis.ticks.y=element_blank(),
            axis.text.y=element_blank()) +
      theme_panel_num_x
   list(plot=p, data=data)
}

# histogram plot with overlayed vertical median lines
gplt.histogram <- function(data, x, w='NULL',
                           n.breaks=NA,
                           trans.log.thresh=2,
                           verbose=FALSE,
                           ...) {

  if (is.numeric(n.breaks)) {
    n.breaks <- n.breaks
  } else {
    n.breaks <- nclass.FD.modified(data[[x]])
  }
  bin.width <- diff(range(data[[x]], na.rm=TRUE))/n.breaks
  p <- ggplot(data, aes_string(x=x, weight=w)) +
    geom_histogram(binwidth=bin.width, position='identity', alpha=0.6, na.rm=TRUE, ...) +
    geom_rug(na.rm=TRUE) + geom_vline_center()

  # axis transformation
  p <- add.axis.transform(p, data, x, 'x', trans.log.thresh, verbose=verbose)

  ylab <- 'count'
  if (w != 'NULL') {
    ylab <- w
  }

  p <- p + ylab(ylab) +
    theme_panel_num_x + theme_panel_num_y
  list(plot=p, data=data)
}


# helper function
norm.sum <- function(x) {
   x[is.na(x)] <- 1e-20
   x <- x / sum(x)
   x
}

# spine plot of two variables
# note: We could use mosaicplot, but we want to consistently stick with ggplot
#    another option would be the prodplots package, but currently seems
#    to be in development stage
#
# - plot.margin.x - horizontal gap between rectangles for x-values
# - no gaps between y-rectangles
#
# note: additional columns other than x,y,w are dropped; this precludes later
# faceting on them. They could be preserved, however this function is already
# complicated as it is.

gplt.spine <- function(data, x, y, w='NULL',
                       plot.margin.x=0.05,
                       max.factor.levels=10,
                       palette.brewer.seq='YlGn',
                       palette.brewer.qual='Set1',
                       verbose=FALSE,
                       ...) {

  # ordering and factor truncation
  data <- order.factors(data, x, y, w=w, verbose=verbose)
  for (v in c(x, y)) {
    data <- quantize(data, v, w=w, n.levels=max.factor.levels)
    if (is.numeric(data[[v]])) {
      data[[v]] <- ordered(data[[v]], exclude=NULL)
    }
  }

  x.lev <- length(levels(data[[x]]))
  y.lev <- length(levels(data[[y]]))

  xy.tab  <- marginalize(data, c(x, y), w)
  x.tab   <- marginalize(data, x, w)
  y.cond  <- sweep(xy.tab, 1, x.tab, FUN='/')

  x.tab.df             <- as.data.frame(x.tab, responseName='x.mrg')
  x.tab.df$x.mrg.cum   <- c(0, cumsum(x.tab.df$x.mrg)[1:(x.lev-1)])
  x.tab.df$x.cnt       <- seq(0, x.lev-1)
  x.tab.df$x.center    <- with(x.tab.df, x.mrg.cum  + x.mrg/2 + x.cnt * plot.margin.x)

  y.cond.df            <- as.data.frame(aperm(y.cond), responseName='y.cond')
  #y.cond.df$y.cond.cum <- as.vector(apply(y.cond, 1, function(x) {c(0, cumsum(x)[1:(y.lev-1)])}))

  # note: if there is no data for an x/y combination, make y coordinates non-NA for displaying
  # 'zero-area' rects
  y.cond.df <- plyr::ddply(y.cond.df, x, plyr::here(transform), y.cond=norm.sum(y.cond))
  y.cond.df <- plyr::ddply(y.cond.df, x, transform, y.cond.cum=cumsum(y.cond))
  y.cond.df$y.cond.cum <- y.cond.df$y.cond.cum - y.cond.df$y.cond

  plot.data <- y.cond.df
  plot.data <- merge(plot.data, x.tab.df)

  plot.data$left     <- plot.data$x.mrg.cum  + plot.data$x.cnt * plot.margin.x
  plot.data$right    <- plot.data$left       + plot.data$x.mrg
  plot.data$bottom   <- plot.data$y.cond.cum
  plot.data$top      <- plot.data$bottom     + plot.data$y.cond

  # remove empty rects
  #idx <- plot.data$bottom < plot.data$top & plot.data$left < plot.data$right
  #plot.data <- plot.data[idx,]

  p <- ggplot(plot.data, aes_(xmin=~left, xmax=~right, ymin=~bottom, ymax=~top)) +
    # make outline color same as fill color - black outline will hide colors
    # for very narrow stripes
    geom_rect(aes_string(color=y, fill=y), size=1, alpha=0.6, na.rm=TRUE, ...) +
    scale_x_continuous(breaks=x.tab.df$x.center, labels=levels(data[[x]])) +
    theme(axis.ticks.x=element_blank()) +
    # y reflected in color legend; however, we do want the label when part of a multiplot!
    xlab(x)  + ylab(y) +
    theme_panel_fac_y

  p <- add.color.fill(p, data, y,
                      palette.brewer.seq=palette.brewer.seq,
                      palette.brewer.qual=palette.brewer.qual)
  p <- add.color.legend(p, data, y, 'fill')

  if (estimate.label.overlap(labels=levels(data[[x]]), breaks=x.tab.df$x.center)) {
    p <- p + theme_slanted_text_x
  }
  list(plot=p, data=data)
}

# spine plot of three variables
# - plot.margin.x - horizontal gap between rectangles for x-values
# - plot.margin.y - vertical gap between rectangles for y-values
# - no gaps between z-rectangles
gplt.spine3 <- function(data, x, y, z, w='NULL',
                        plot.margin.x=0.05,
                        plot.margin.y=0.02,
                        max.factor.levels=10,
                        palette.brewer.seq='YlGn',
                        palette.brewer.qual='Set1',
                        verbose=FALSE,
                        ...) {

  # ordering and factor truncation
  data <- order.factors(data, x, y, z, w=w, verbose=verbose)
  for (v in c(x, y, z)) {
    data <- quantize(data, v, w=w, n.levels=max.factor.levels)
    if (is.numeric(data[[v]])) {
      data[[v]] <- ordered(data[[v]], exclude=NULL)
    }
  }

  x.lev <- length(levels(data[[x]]))
  y.lev <- length(levels(data[[y]]))
  z.lev <- length(levels(data[[z]]))

  xyz.tab <- marginalize(data, c(x, y, z), w)
  xy.tab  <- marginalize(data, c(x, y), w)
  x.tab   <- marginalize(data, x, w)
  y.tab   <- marginalize(data, y, w)
  y.cond  <- sweep(xy.tab, 1, x.tab, FUN='/')
  z.cond  <- sweep(xyz.tab, 1:2, xy.tab, FUN='/')

  x.tab.df             <- as.data.frame(x.tab, responseName='x.mrg')
  x.tab.df$x.mrg.cum   <- c(0, cumsum(x.tab.df$x.mrg)[1:(x.lev-1)])
  x.tab.df$x.cnt       <- seq(0, x.lev-1)
  x.tab.df$x.center    <- with(x.tab.df, x.mrg.cum  + x.mrg/2 + x.cnt * plot.margin.x)

  y.tab.df             <- as.data.frame(y.tab, responseName='y.mrg')
  y.tab.df$y.mrg.cum   <- c(0, cumsum(y.tab.df$y.mrg)[1:(y.lev -1)])
  y.tab.df$y.cnt       <- seq(0, y.lev-1)
  y.tab.df$y.center    <- with(y.tab.df, y.mrg.cum  + y.mrg/2 + y.cnt * plot.margin.y)

  y.cond.df            <- as.data.frame(aperm(y.cond), responseName='y.cond')
  y.cond.df$y.cond.cum <- as.vector(apply(y.cond, 1, function(x) {c(0, cumsum(x)[1:(y.lev-1)])}))

  z.cond.df            <- as.data.frame(aperm(z.cond), responseName='z.cond')
  # note: if there is no data for an x/y combination, make z coordinates
  # non.NA and evenly spaced for displaying 'zero-area' rects
  z.cond.df <- plyr::ddply(z.cond.df, c(y,x), plyr::here(transform), z.cond=norm.sum(z.cond))
  z.cond.df <- plyr::ddply(z.cond.df, c(y,x), transform, z.cond.cum=cumsum(z.cond))
  z.cond.df$z.cond.cum <- z.cond.df$z.cond.cum - z.cond.df$z.cond

  #z.cond.df$z.cond.cum <- as.vector(apply(z.cond, 2:1,
  #                                         function(x) {c(0, cumsum(x)[1:(z.lev-1)])}))

  plot.data <- z.cond.df
  plot.data <- merge(plot.data, y.cond.df)
  plot.data <- merge(plot.data, x.tab.df)
  plot.data <- merge(plot.data, y.tab.df)

  plot.data$left     <- plot.data$x.mrg.cum  + plot.data$z.cond.cum * plot.data$x.mrg + plot.data$x.cnt * plot.margin.x
  plot.data$right    <- plot.data$left       + plot.data$z.cond * plot.data$x.mrg
  plot.data$bottom   <- plot.data$y.cond.cum + plot.data$y.cnt * plot.margin.y
  plot.data$top      <- plot.data$bottom     + plot.data$y.cond

  p <- ggplot(plot.data, aes_(xmin=~left, xmax=~right, ymin=~bottom, ymax=~top)) +
    # make outline color same as fill color - black outline will hide colors
    # for very narrow stripes
    # size=1: make sure empty rects are still visible
    geom_rect(aes_string(color=z, fill=z), size=1, alpha=0.6, na.rm=TRUE, ...) +
    scale_y_continuous(breaks=y.tab.df$y.center, labels=levels(data[[y]])) +
    scale_x_continuous(breaks=x.tab.df$x.center, labels=levels(data[[x]]))

  p <- add.color.fill(p, data, z,
                      palette.brewer.seq=palette.brewer.seq,
                      palette.brewer.qual=palette.brewer.qual)
  p <- add.color.legend(p, data, z, 'fill')

  p <- p + xlab(x) + ylab(y) +
    theme_panel_fac_y +
    theme(axis.ticks.x=element_blank(),
          axis.ticks.y=element_blank())

  # write labels slanted to mitigate overlap
  if (estimate.label.overlap(labels=levels(data[[x]]), breaks=x.tab.df$x.center)) {
    p <- p + theme_slanted_text_x
  }
  list(plot=p, data=data)
}


# blank plot with optional message text
gplt.blank <- function(text=NULL, ...) {
  p <- ggplot(data.frame(x=c(0,1), y=c(0,1)), aes_(x=~x, y=~y)) +
    geom_blank(...) +
    labs(x=NULL, y=NULL) +
    xlim(0,1) +
    ylim(0,1) +
    theme(axis.ticks.x=element_blank(),
          axis.text.x=element_blank(),
          axis.ticks.y=element_blank(),
          axis.text.y=element_blank(),
          panel.background=element_blank())

  if (!is.null(text)) {
    p <- p + annotate('text', x=0.5, y=0.5, label=text, hjust=0.5, vjust=0.5)
  }
  list(plot=p, data=NULL)
}


#' Create options structure for \code{plotluck}
#'
#' @param opts An (optional) named list to start with. Anything not specified in ... will be inherited from opts.
#' @param ... Parameters to override default settings
#' @return A named list of options, usable as argument to function \code{\link{plotluck}}.
#'
#'  \code{plotluck} accepts a list of options to modify its behavior. Calling
#'  \code{plotluck.options} without arguments produces a list with the default
#'  values. Specifying any number of attribute/value pairs overrides them
#'  selectively.
#'
#'\tabular{lll}{
#' \strong{Option}\tab\strong{Default}\tab\strong{Comment}\cr
#' \code{na.rm} \tab \code{FALSE} \tab Do not show missing factor values as separate level.\cr
#' \code{geom} \tab \code{"auto"} \tab Override type of plot; the available types for a given formula and variables can be inspected with \code{verbose=TRUE}.\cr
#' \code{sample.max.rows} \tab \code{100000} \tab If the data set has more rows than that, sample it down.\cr
#' \code{trans.log.thresh} \tab \code{2} \tab Threshold for logarithmic axis scaling. Visible magnification factor of the central region of the distribution.\cr
#' \code{n.breaks.histogram} \tab \code{NA} \tab Override the number of histogram breaks.\cr
#' \code{min.points.hex} \tab \code{5000} \tab Minimum data points required to display a hexbin plot.\cr
#' \code{min.points.density} \tab \code{20} \tab Minimum data points required to display a density or histogram plot.\cr
#' \code{min.points.violin} \tab \code{20} \tab Minimum data points required to display a violin or box plot.\cr
#' \code{resolution.heat} \tab \code{30} \tab Grid spacing for heat maps.\cr
#' \code{dedupe.scatter} \tab \code{'area'} \tab To represent multiple instances of the same coordinates in scatter plot: scale the point size, or use jitter?\cr
#' \code{min.points.jitter} \tab \code{3} \tab Minimum number of coordinate duplicates to start jittering points.\cr
#' \code{jitter.x} \tab \code{0.4} \tab Amount of jitter to apply in horizontal direction, as a fraction of resolution.\cr
#' \code{jitter.y} \tab \code{0.4} \tab  Amount of jitter to apply in vertical direction, as a fraction of resolution.\cr
#' \code{few.unique.as.factor} \tab \code{5} \tab If a numeric variable has less than that many unique values, make it an ordered factor.\cr
#' \code{max.factor.levels} \tab \code{30} \tab For factors with more than that many levels, least frequent ones will be pruned into "other".\cr
#' \code{max.factor.levels.color} \tab \code{3} \tab Maximum number of factor levels that can be represented as colors in the same plot.\cr
#' \code{max.factor.levels.violin} \tab \code{20} \tab Maximum number of levels to plot violins; rather switch to box plot. \cr
#' \code{max.factor.levels.spine.x} \tab \code{20} \tab Maximum number of levels to plot in x-direction in a spine plot. \cr
#' \code{max.factor.levels.spine.y} \tab \code{10} \tab Maximum number of levels to plot in y-direction in a spine plot. \cr
#' \code{max.factor.levels.spine.z} \tab \code{5} \tab Maximum number of levels to represent as colors in a spine plot. \cr
#' \code{spine.plot.margin.x} \tab \code{0.05} \tab Horizontal gap between rectangles in a spine plot. \cr
#' \code{spine.plot.margin.y} \tab \code{0.02} \tab Vertical gap between rectangles in a spine plot. \cr
#' \code{facet.max.cols} \tab \code{10} \tab Maximum number of facet columns for conditional variables. \cr
#' \code{facet.max.rows} \tab \code{10} \tab Maximum number of facet rows for conditional variables. \cr
#' \code{facet.num.wrap} \tab \code{6} \tab Preferred number of facets for single conditional variable. \cr
#' \code{facet.num.grid} \tab \code{3} \tab Preferred number of facets for each of two conditional variables. \cr
#' \code{prefer.factors.vert} \tab \code{TRUE} \tab In mixed numeric/factor plots, use vertical axis for the factor. \cr
#' \code{fill.default} \tab \code{"deepskyblue"} \tab Default fill color for density and histogram plots. \cr
#' \code{palette.brewer.seq} \tab \code{"YlGn"} \tab Sequential brewer palette name. \cr
#' \code{palette.brewer.qual} \tab \code{"Set1"} \tab Qualitative brewer palette name. \cr
#' \code{multi.entropy.order} \tab \code{TRUE} \tab Use estimated conditional entropy to order multi-plots. \cr
#' \code{multi.max.rows} \tab \code{6} \tab Maximum number of rows for multi-plots. \cr
#' \code{multi.max.cols} \tab \code{6} \tab Maximum number of columns for multi-plots. \cr
#' \code{multi.in.grid} \tab \code{TRUE} \tab In multi-plots, make a page with multiple plots, or generate each one separately. \cr
#' \code{verbose} \tab \code{FALSE}\tab Print information about plot types, ordering, scaling, etc. \cr}
#'
#' @note \code{plotluck}'s aim is to provide a function that is usable
#'  "out-of-the-box", with no or very little manual tweaking. If you find
#'  yourself needing to change option values repeatedly or find the presets to
#'  be suboptimal, please contact the author.
#'
#' @seealso \code{\link{plotluck}}
#' @export
#'
#' @examples
#' # list all default options
#' plotluck.options()
#'
#' data(iris)
#' # default with violin plot
#' plotluck(iris, Petal.Length~Species)
#'
#' # use box-and-whiskers plot instead
#' plotluck(iris, Petal.Length~Species, opts=plotluck.options(geom='box'))
#'
#' @export
plotluck.options <- function(opts,...) {
  if (missing(opts)) {
    opts <- list(
      na.rm=FALSE,
      geom='auto',
      sample.max.rows=100000,
      trans.log.thresh=2,
      n.breaks.histogram=NA,
      min.points.hex=5000,
      min.points.density=20,
      min.points.violin=20,
      min.points.jitter=3,
      jitter.x=0.4,
      jitter.y=0.4,
      resolution.heat=30,
      dedupe.scatter='area',
      few.unique.as.factor=5,
      max.factor.levels=30,
      max.factor.levels.color=3,
      max.factor.levels.violin=20,
      max.factor.levels.spine.x=20,
      max.factor.levels.spine.y=10,
      max.factor.levels.spine.z=5,
      spine.plot.margin.x=0.05,
      spine.plot.margin.y=0.02,
      facet.max.cols=10,
      facet.max.rows=10,
      facet.num.wrap=6,
      facet.num.grid=3,
      prefer.factors.vert=TRUE,
      fill.default='deepskyblue',
      palette.brewer.seq='YlGn',
      palette.brewer.qual='Set1',
      multi.entropy.order=TRUE,
      multi.max.rows=6,
      multi.max.cols=6,
      multi.in.grid=TRUE,
      verbose=FALSE)
  }
  overrides <- list(...)
  unknown <- setdiff(names(overrides), names(opts))
  if (length(unknown) > 0) {
    stop(sprintf('Unknown options: %s', paste(unknown, sep='',collapse =', ')))
  }
  opts[names(overrides)] <- overrides
  opts
}


sample.data <- function(data, w='NULL', max.rows) {
  n.row <- nrow(data)
  if (n.row > max.rows) {
    if (w == 'NULL') {
      data <- data[sample(1:n.row, max.rows),, drop=FALSE]
    } else {
      # note: weighted sampling itself can be quite slow
      data <- data[sample(1:n.row, max.rows, prob=data[[w]]),, drop=FALSE]
    }
  }
  data
}

# expected input formula: 'x', 'x*y', 'x+y', 'x+1'
# output: list of occurring variables, ignoring constants
parse.formula.term <- function(form) {
  if (class(form) == 'numeric') {
    return(character())
  }
  if (class(form) == 'name') {
    return(as.character(form))
  }
  if (as.character(form[[1]]) %in% c('+', '*')) {
    res <- character()
    if (class(form[[2]]) == 'name') {
      res <- as.character(form[[2]])
    } else if (class(form[[2]]) != 'numeric') {
      stop('Invalid formula: at most two dependent or conditional variables allowed')
    }
    if (class(form[[3]]) == 'name') {
      res <- c(res, as.character(form[[3]]))
    } else if (class(form[[3]]) != 'numeric') {
      stop('Invalid formula: at most two dependent or conditional variables allowed')
    }
    return(res)
  }
  stop('Invalid formula')
}

# expected input: conditional formula with up to three variables
# output: list of lists (response, independent variable, conditionals)
parse.formula <- function(form) {
  if (as.character(form[[1]]) != '~') {
    stop('Invalid formula')
  }
  node <- form[[2]]
  if (class(node) != 'name') {
    stop('Invalid formula: only one dependent variable allowed')
  }
  resp <- as.character(node)
  indep <- character()
  cond <- character()

  node <- form[[3]]
  if (class(node) != 'name' && as.character(node[[1]])  == '|') {
    indep <- parse.formula.term(node[[2]])
    cond <- parse.formula.term(node[[3]])
  } else {
    indep <- parse.formula.term(node)
  }

  if (resp != '.' && (resp %in% indep || resp %in% cond)) {
    stop('Invalid formula: variable cannot be both dependent and independent')
  }
  if (length(intersect(indep,cond))>0) {
    stop('Variables can only be used once')
  }
  if (length(indep)+length(cond) > 2) {
    stop('Invalid formula: at most 3 variables allowed')
  }
  return(list(resp, indep, cond))
}

# try to match user input to column names, if not exact
correct.varnames <- function(x, data) {
  if (length(x) == 0 || length(x) == 1 && x == '.') {
    return(x)
  }
  for (i in seq(length(x))) {
    idx <- pmatch(tolower(x[i]), tolower(names(data)))
    if (!is.na(idx)) {
      x[i] <- names(data)[idx]
    } else {
      stop(sprintf('No such variable: %s', x[i]))
    }
  }
  x
}

# process weights
process.weights <- function(data, weights) {
  if (weights == 'NULL') {
    return(data)
  }

  if  (!is.numeric(data[[weights]])) {
    stop('Weight must be numeric')
  }
  if  (any(data[[weights]]<0)) {
    stop('Weight must be non-negative')
  }
  weight.na <- is.na(data[[weights]])
  if  (any(weight.na)) {
    message('Weight is NA for %d instances, deleting', length(which(weight.na)))
    data <- data[!weight.na,]
  }
  # if weights are integer type, Hmisc::wtd.quantile() can lead to NA due to overflow
  data[[weights]] <- as.double(data[[weights]])
  data
}

info.options <- function(chosen, choices, verbose=FALSE) {
  if (verbose) {
    choices <- setdiff(choices, 'auto')
    cat(sprintf("Choosing geom='%s' out of possible options: '%s'\n", chosen, paste0(choices,collapse=', ')))
  }
}

info.threshold <- function(cond, msg, threshold, ...) {
  if (cond) {
    cat(sprintf(msg, ...), sprintf('(%s = %s)\n', deparse(match.call()$threshold), threshold))
  }
}

#' "I'm feeling lucky" for ggplot
#'
#' The purpose of \code{plotluck} is to let the user focus on \emph{what} to plot,
#' and automate the \emph{how}. Given a dependency formula with up to three
#' variables, it tries to choose the most suitable type of plot. It also automates
#' sampling large datasets, correct handling of observation weights, logarithmic
#' axis scaling, ordering and pruning of factor levels, and overlaying smoothing
#' curves or median lines.
#'
#' @param data a data frame.
#' @param formula an object of class \code{\link[stats]{formula}}: a symbolic description
#'  of the relationship of up to three variables.
#' \tabular{lll}{
#' \strong{Formula}\tab\strong{Meaning}\tab\strong{Plot types}\cr
#' \code{y~1}\tab Distribution of single variable\tab Density, histogram, scatter, dot, bar\cr
#' \code{y~x}\tab One explanatory variable\tab Scatter, hex, violin, box, spine, heat\cr
#' \code{y~x+z}\tab Two explanatory variables\tab heat, spine\cr
#' \code{y~1|z} or \code{y~x|z}\tab One conditional variable\tab Represented through coloring or facetting\cr
#' \code{y~1|x+z}\tab Two conditional variables\tab Represented through facetting\cr}
#' In addition to these base plot types, the dot symbol \code{"."} can also be used,
#' and denotes all variables in the data frame. This gives rise to a lattice or
#' series of plots (use with caution, can be slow).
#' \tabular{lll}{
#' \strong{Formula}\tab\strong{Meaning}\cr
#' \code{.~1}\tab Distribution of each variable in the data frame, separately\cr
#' \code{y~.}\tab Plot \code{y} against each variable in the data frame\cr
#' \code{.~x}\tab Plot each variable in the data frame against \code{x}\cr
#' \code{.~.}\tab Plot each variable in the data frame against each other.\cr}
#'  See also section "Generating multiple plots at once" below.
#' @param weights observation weights or frequencies (optional).
#' @param opts a named list of options (optional); See also \code{\link{plotluck.options}}.
#' @param ... additional parameters to be passed to the respective ggplot2 geom objects.
#' @return a ggplot object, or a plotluck.multi object if the dot symbol was used.
#' @export
#' @keywords hplot
#' @keywords aplot
#' @keywords dplot
#' @concept automation
#' @concept visualization
#' @concept plotting
#' @concept exploratory data analysis
#' @concept ggplot
#' @concept ggplot2
#' @concept heat map
#' @concept density plot
#' @concept violin plot
#' @concept hexbin
#' @concept histogram
#' @concept bar plot
#' @concept box plot
#' @concept spine plot
#' @concept mosaic plot
#' @concept scatter plot
#' @concept heat map
#'
#' @seealso \code{\link{plotluck.options}}, \code{\link{sample.plotluck}}, \code{\link[ggplot2]{ggplot}}
#'
#' @section Determining the type of plot: Besides the shape of the formula, the
#'   algorithm takes into account the type of variables as either numeric, ordered,
#'   or unordered factors. Often, it makes sense to treat ordered factors similarly
#'   as numeric types.
#'
#'   One-variable numeric (resp. factor) distributions are usually represented by
#'   density (resp. Cleveland dot) charts, but can be overridden to histograms or
#'   bar plots using the \code{geom} option. Density plots come with an overlaid
#'   vertical median line.
#'
#'   For two numerical variables, by default a scatter plot is produced, but for
#'   high numbers of points a hexbin is preferred (option \code{min.points.hex}).
#'   These plots come with a smoothing line and standard deviation.
#'
#'   The relation between two factor variables can be depicted best by spine
#'   (a.k.a., mosaic) plots, unless they have too many levels (options
#'   \code{max.factor.levels.spine.x}, \code{max.factor.levels.spine.y},
#'   \code{max.factor.levels.spine.z}). Otherwise, a heat map is produced.
#'
#'   For a mixed-type (factor/numeric) pair of variables, violin (overridable
#'   to box) plots are generated. However, if the resulting graph would contain
#'   too many (more than \code{max.factor.levels.violin}) violin plots in a row,
#'   the algorithm switches automatically. The number of bins of a histogram can
#'   be customized with \code{n.breaks.histogram}. The default setting, \code{NA},
#'   applies a heuristic estimate.
#'
#'   The case of a response two dependent variables (`y~x+z`) is covered by
#'   either a spine plot (if all are factors) or a heat map.
#'
#'   In many cases with few points for one of the aggregate plots, a scatter
#'   looks better (options \code{min.points.density}, \code{min.points.violin},
#'   \code{min.points.hex}).
#'
#'   If each factor combination occurs only once in the data set, we resort to
#'   bar plots.
#
#' @section Conditional variables: Conditional variables are represented by either
#'   trying to fit into the same graph using coloring (\code{max.factor.levels.color}),
#'   or by facetting (preferred dimensions \code{facet.num.wrap} (resp.
#'   \code{facet.num.grid}) for one resp. two variables). Numeric vectors are
#'   discretized accordingly. Facets are laid out horizontally or vertically
#'   according to the plot type, up to maximum dimensions of \code{facet.max.rows}
#'   and \code{facet.max.cols}.
#'
#' @section Reordering of factor levels: To better illustrate the relation
#'  between an independent factor variable and a dependent numerical variable
#'  (or an ordered factor), levels are reordered according to the value of the
#'  dependent variable. If no other numeric or ordered variable exists, we
#'  sort by frequency.
#'
#' @section Instance weights: Argument \code{weights} allows to specify weights
#'  or frequency counts for each row of data. All plots and summary statistics
#'  take weights into account when supplied. In scatter and heat maps, weights
#'  are indicated either by a shaded disk with proportional area (default) or by
#'  jittering (option \code{dedupe.scatter}), if the number of duplicated points
#'  exceeds \code{min.points.jitter}. The amount of jittering can be controlled
#'  with \code{jitter.x} and \code{jitter.y}.
#'
#' @section Axis scaling: \code{plotluck} supports logarithmic and log-modulus
#'  axis scaling. log-modulus is considered if values are both positive and
#'  negative; in this case, the transform function is \code{f(x) = sign(x) *
#'  log(1+abs(x))}.
#'
#'  The heuristic to apply scaling is based on the proportion of total display
#'  range that is occupied by the 'core' region of the distribution between the
#'  lower and upper quartiles; namely, the fact whether the transform could
#'  magnify this region by a factor of at least \code{trans.log.thresh}.
#'
#' @section Missing values: By default, missing (\code{NA} or \code{NaN}) values
#'  in factors are are shown as a special factor level code{"?"}. They can be
#'  removed by setting \code{na.rm=TRUE}. Conventionally, missing numeric values
#'  are not shown.
#'
#' @section Sampling: For very large data sets, plots can take a very long time
#'  (or even crash R). \code{plotluck} has a built-in stop-gap: If the data
#'  comprises more than \code{sample.max.rows}, it will be sampled down to that
#'  size (taking into account \code{weights}, if supplied).
#'
#' @section Factor preprocessing:  Character (resp. logical) vectors are converted to
#'  unordered (resp. ordered) factors.
#'
#'  Frequently, when numeric variables have very few values despite sufficient
#'  data size, it helps to treat these values as the levels of a factor; this is
#'  governed by option \code{few.unique.as.factor}.
#'
#'  If an unordered factor has too many levels, plots can get messy. In this
#'  case, only the \code{max.factor.levels} most frequent ones are retained,
#'  while the rest are merged into a default level \code{".other."}.
#'
#' @section Coloring: If \code{color} or \code{fill} aesthetics are used to
#'  distinguish different levels or ranges of a variable, the color scheme adjusts
#'  to the type. Preferably, a sequential (resp. qualitative) palette is chosen
#'  for a numeric/ordered (unordered) factor (\code{palette.brewer.seq},
#'  \code{palette.brewer.qual}); see also \link[RColorBrewer]{RColorBrewer}.
#'
#' @section Generating multiple plots at once: If \code{formula} contains a dot
#'  (\code{"."}) symbol, the function creates a number of 1D or 2D plots by calling
#'  \code{plotluck} repeatedly. As described above, this allows either single
#'  distribution, one-vs-all and all-vs-all variable plots. To save space,
#'  rendering is minimal without axis labels.
#'
#'  In the all-vs-all case, the diagonal contains 1D distribution plots, analogous
#'  to the behavior of the default plot method for data frames, see
#'  \code{\link[graphics]{plot.data.frame}}.
#'
#'  With setting \code{in.grid=FALSE}, plots are produced in a sequence, otherwise
#'  together on one or multiple pages, if necessary (default). Page size is
#'  controlled by \code{multi.max.rows} and  \code{multi.max.cols}.
#'
#'  With \code{entropy.order=TRUE}, plots are sorted by an estimate of
#'  empirical conditional entropy, with the goal of prioritizing the more
#'  predictive variables. Set \code{verbose=TRUE} if you want to see the actual
#'  values. For large data sets the calculation can be time consuming; entropy
#'  calculation can be suppressed by setting \code{multi.entropy.order=FALSE}.
#'
#'  @note The return value is an object of class \code{plotluck_multi}. This
#'   class does not have any functionality; its sole purpose is to make this
#'   function work in the same way as \code{ggplot} and \code{plotluck}, namely,
#'   do the actual drawing if and only if the return value is not assigned.
#'
#' @section Debugging: With the option \code{verbose=TRUE} turned on, the function
#'  will print out information about the chosen and applicable plot types, ordering,
#'  log scaling, etc.
#'
#' @section Column name matching: Variable names can be abbreviated if they match
#'  a column name uniquely by prefix.
#'
#' @section Remarks on supported plot types: By default, \code{plotluck}
#'  uses violin and density plots in place of the more traditional box-and-whisker
#'  plots and histograms; these modern graph types convey the shape of a
#'  distribution better. In the former case, summary statistics like mean and
#'  quantiles are less useful if the distribution is not unimodal; a wrong
#'  choice of the number of bins of a histogram can create misleading artifacts.
#'
#'  Following Cleveland's advice, factors are plotted on the y-axis to make labels
#'  most readable and compact at the same time. This direction can be controlled
#'  using option \code{prefer.factors.vert}.
#'
#'  Due to their well-documented problematic aspects, pie charts and stacked bar
#'  graphs are not supported.
#'
#'  With real-world data (as opposed to smooth mathematical functions),
#'  three-dimensional scatter, surface, or contour plots can often be hard to
#'  read if the shape of the distribution is not suitable, data coverage is
#'  uneven, or if the perspective is not carefully chosen depending on the data.
#'  Since they usually require manual tweaking, we have refrained from
#'  incorporating them.
#'
#' @section Remarks on the use of options: For completeness, we have included the
#'  description of option parameters in the current help page. However, the
#'  tenet of this function is to be usable "out-of-the-box", with no or very
#'  little manual tweaking required. If you find yourself needing to change
#'  option values repeatedly or find the presets to be suboptimal, please
#'  contact the author.
#
#' @section Limitations: \code{plotluck} is designed for generic out-of-the-box
#'  plotting, and not suitable to produce more specialized types of plots that
#'  arise in specific application domains (e.g., association, stem-and-leaf,
#'  star plots, geographic maps, etc). It is restricted to at most three variables.
#'  Parallel plots with variables on different scales (such as time
#'  series of multiple related signals) are not supported.
#'
#' @examples
#' # Single-variable density
#' data(diamonds, package='ggplot2')
#' plotluck(diamonds, price~1)
#' invisible(readline(prompt="Press [enter] to continue"))
#'
#' # Violin plot
#' data(iris)
#' plotluck(iris, Species~Petal.Length)
#' invisible(readline(prompt="Press [enter] to continue"))
#'
#' # Scatter plot
#' data(mpg, package='ggplot2')
#' plotluck(mpg, cty~model)
#' invisible(readline(prompt="Press [enter] to continue"))
#'
#' # Spine plot
#' data(Titanic)
#' plotluck(as.data.frame(Titanic), Survived~Class+Sex, weights=Freq)
#' invisible(readline(prompt="Press [enter] to continue"))
#'
#' # Facetting
#' data(msleep, package='ggplot2')
#' plotluck(msleep, sleep_total~bodywt|vore)
#' invisible(readline(prompt="Press [enter] to continue"))
#'
#' # Heat map
#' plotluck(diamonds, price~cut+color)
#'
#'\donttest{
#' # Multi plots
#
#' # All 1D distributions
#' plotluck(iris, .~1)
#'
#' # 2D dependencies with one fixed variable on vertical axis
#' plotluck(iris, Species~.)
#'}
#' # See also tests/testthat/test_plotluck.R for more examples!
#'
plotluck <- function(data, formula, weights,
                     opts=plotluck.options(),
                     ...) {
  parsed <- parse.formula(formula)
  response <- correct.varnames(parsed[[1]], data)
  indep <- correct.varnames(parsed[[2]], data)
  cond <- correct.varnames(parsed[[3]], data)

  vars <- c(response, indep, cond)
  if (!missing(weights)) {
    weights <- correct.varnames(deparse(substitute(weights)), data)
    vars <- c(vars, weights)
  } else {
    # note: we set missing aesthetics to 'NULL' instead of NULL, because
    # then ggplot interprets it correctly as a constant [e.g., aes(weights='NULL')]
    weights <- 'NULL'
  }
  vars <- unique(vars)

  # validation and preprocessing
  if (!('.' %in% vars)) {
     data <- data[,vars, drop=FALSE]
  }
  data <- sample.data(data, weights, opts$sample.max.rows)
  data <- process.weights(data, weights)
  data <- preprocess.factors(data, setdiff(vars, c(weights, '.')), opts$na.rm)

  info.threshold(opts$verbose && nrow(data) > opts$sample.max.rows,
                 'Data set has %s rows, sampling down', opts$sample.max.rows,
                 nrow(data))

  if ('.' %in% vars) {
    # trellis of plots
    return(plotluck.multi(response, indep, data, w=weights,
                          in.grid=opts$multi.in.grid,
                          max.rows=opts$multi.max.rows,
                          max.cols=opts$multi.max.cols,
                          opts=opts,
                          ...))
  }

  for (v in c(response, indep)) {
    if (is.numeric(data[[v]])) {
      data <- discretize.few.unique(data, v, opts$few.unique.as.factor, opts$verbose)
    }
  }

  # conditionals: discretize if numeric, and order
  # note: to have a single data frame containing all plot and facet variables,
  # we have to preprocess the conditionals beforehand.

  if (length(cond) > 0) {

    # if we have to discretize, we want that many facets
    intervals <- NULL

    if (length(cond) == 1) {
      intervals <- opts$facet.num.wrap
      info.threshold(opts$verbose && length(unique(data[[cond]])) > opts$facet.num.wrap, 'Discretizing %s into intervals', opts$facet.num.wrap, cond)
    } else {
      intervals <- opts$facet.num.grid
      info.threshold(opts$verbose && length(unique(data[[cond[1]]])) > opts$facet.num.grid, 'Discretizing %s into intervals', opts$facet.num.wrap, cond[1])
      info.threshold(opts$verbose && length(unique(data[[cond[2]]])) > opts$facet.num.grid, 'Discretizing %s into intervals', opts$facet.num.wrap, cond[2])
    }

    order.by <- NULL
    if (is.numeric(data[[response]]) || is.ordered(data[[response]])) {
      order.by <- response
    } else if (length(indep) == 1 &&
               (is.numeric(data[[indep]]) || is.ordered(data[[indep]]))) {
      order.by <- indep
    }

    for (i in seq(length(cond))) {
      if (is.numeric(data[[cond[i]]])) {
        data <- discretize(data, cond[i], w=weights, max.breaks=intervals,
                           estimate.breaks=FALSE, method='histogram')
        data[[cond[i]]] <- ordered(data[[cond[i]]])
      } else {
        data <- limit.factor.levels(data, cond[i], w=weights,
                                    max.levels=min(intervals, opts$max.factor.levels))
        if (!is.ordered(data[[cond[i]]])) {
          data <- order.factor.by.value(data, cond[i], order.by, weights, verbose=opts$verbose)
        }
      }
    }
  }

  vars.non.numeric <- cond
  vars.numeric <- NULL
  for (v in c(response, indep)) {
    if (!is.numeric(data[[v]])) {
      vars.non.numeric <- c(vars.non.numeric, v)
    } else {
      vars.numeric <- c(vars.numeric, v)
    }
  }

  # compute joint number of factor combinations, and
  # the maximum/median number of instances in them
  if (length(vars.non.numeric) > 0) {
    t <- table(data[,vars.non.numeric], useNA='ifany')
    num.groups   <- length(t)
    med.points.per.group <- median(t)
    max.points.per.group <- max(t)
  } else {
    num.groups <- 1
    med.points.per.group <- nrow(data)
    max.points.per.group <- nrow(data)
  }

  # precompute medians
  if (length(vars.numeric) == 1) {
     grp.med <- group.central(data, vars.numeric, vars.non.numeric, w=weights, method='median')
     if (length(vars.non.numeric) == 0) {
        # single value
        data$.center. <- grp.med$.center.
     } else {
      data <- merge(data, grp.med)
     }
  }

  # note: factor levels cannot be limited before ordering is known!

  # determine type of plot
  if (length(indep) == 0) {
    # distribution plot
    res <- determine.plot.type.0(data, response, weights,
                                 med.points.per.group, num.groups, opts, ...)
  } else if (length(indep) == 1) {
    res <- determine.plot.type.1(data, response, indep, cond, weights,
                                 med.points.per.group, max.points.per.group,
                                 num.groups, opts, ...)
  } else { # length(indep) == 2
    res <- determine.plot.type.2(data, response, indep[[1]], indep[[2]],
                                 weights, opts, ...)
  }

  p <- res$plot
  data <- res$data
  type.plot <- res$type.plot

  if (length(indep) < 2) {
     # data is possibly modified, we need to pass it in
    p <- add.conditional.layer(p, data, response, indep, cond, type.plot, opts)
  }
  p <- p + theme(panel.background=element_blank(), # get rid of gray background
                 strip.background=element_blank(), # no background for facet labels
                 axis.line.x=element_line(),
                 axis.line.y=element_line(),
                 panel.grid=element_line(color='black'))
  p
}

determine.plot.type.0 <- function(data, response, w, med.points.per.group,
                                  num.groups, opts,
                                  ...) {
  res <- NULL
  type.plot <- NULL
  opts.geom <- NULL
  choices.geom <- NULL
  if (is.numeric(data[[response]])) {
    choices.geom <- c('density', 'histogram', 'scatter', 'auto')
    opts.geom <- match.arg(opts$geom, choices.geom)
    if (opts.geom == 'auto') {
      # enough points for density plot?
      opts.geom <- ifelse(med.points.per.group > opts$min.points.density, 'density', 'scatter')
    }
    if (opts.geom == 'density') {
      type.plot <- 'density'
      res <- gplt.density(data, response, w=w,
                        trans.log.thresh=opts$trans.log.thresh,
                        verbose=opts$verbose,
                        ...)
    } else if (opts.geom == 'histogram') {
      type.plot <- 'histogram'
      res <- gplt.histogram(data, response, w=w,
                          n.breaks=opts$n.breaks.histogram,
                          trans.log.thresh=opts$trans.log.thresh,
                          verbose=opts$verbose,
                          ...)
    } else { # opts.geom == 'scatter'

      # to plot a distribtion as a line plots,
      # make up a constant y coordinate
      indep <- '.y_const'
      data[[indep]] <- 1
      data[[indep]] <- as.factor(data[[indep]])
      type.plot <- 'scatter.num.1'
      if (!opts$prefer.factors.vert) {
         res <- gplt.scatter(data, indep, response, w=w,
                             dedupe.scatter=opts$dedupe.scatter,
                             min.points.jitter=opts$min.points.jitter,
                             trans.log.thresh=opts$trans.log.thresh,
                             max.factor.levels=opts$max.factor.levels,
                             verbose=opts$verbose,
                             ...)
         res$plot <- res$plot +
            theme(axis.ticks.x=element_blank(),
                  axis.text.x=element_blank()) +
            xlab('')
         # known issue: the conditioning layer can add theme_slanted_text_x,
         # in which case spurious '1's appear on the x-axis.
      } else {
         res <- gplt.scatter(data, response, indep, w=w,
                             dedupe.scatter=opts$dedupe.scatter,
                             min.points.jitter=opts$min.points.jitter,
                             trans.log.thresh=opts$trans.log.thresh,
                             max.factor.levels=opts$max.factor.levels,
                             verbose=opts$verbose,
                             ...)
         res$plot <- res$plot +
            theme(axis.ticks.y=element_blank(),
                  axis.text.y=element_blank()) +
            xlab('')
      }
    }
  } else { # !is.numeric(data[[response]])
    choices.geom <- c('dot', 'bar', 'auto')
    opts.geom <- match.arg(opts$geom, choices.geom)
    if (opts.geom == 'auto') {
      opts.geom <- ifelse(num.groups < 6, 'bar', 'dot')
    }
    type.plot <- opts.geom
    res <- gplt.dot(data, response, w=w, vertical=opts$prefer.factors.vert,
                  max.factor.levels=opts$max.factor.levels, geom=opts.geom,
                  verbose=opts$verbose, ...)
  }
  info.options(opts.geom, choices.geom, opts$verbose)
  res$type.plot=type.plot
  res
}

determine.plot.type.1 <- function(data, response, indep, cond,
                                  w, med.points.per.group,
                                  max.points.per.group,
                                  num.groups, opts,
                                  ...) {
  res <- NULL
  type.plot <- NULL
  opts.geom <- NULL
  choices.geom <- NULL
  if (is.numeric(data[[indep]]) && is.numeric(data[[response]])) {
    choices.geom <- c('hex', 'scatter', 'auto')
    opts.geom <- match.arg(opts$geom, choices.geom)
    if (opts.geom == 'auto') {
      # if a lot of points, choose hex
      opts.geom <- ifelse(nrow(data) >= opts$min.points.hex, 'hex', 'scatter')
    }
    # HACK: the smoothing line can only be colored with a default color if it
    # wouldn't be used to distinguish multiple groups.
    fill.smooth <- ifelse(length(cond) == 0, opts$fill.default, 'NULL')
    if (opts.geom == 'hex') {
      type.plot <- 'hex'
      res <- gplt.hex(data, indep, response, w=w, trans.log.thresh=opts$trans.log.thresh,
                    # hex plot already has color, make the smoothing line neutral
                    fill.smooth='grey', ...)
    } else {
      type.plot <- 'scatter.num'
      res <- gplt.scatter(data, indep, response, w,
                        dedupe.scatter=opts$dedupe.scatter,
                        min.points.jitter=opts$min.points.jitter,
                        jitter.x=opts$jitter.x,
                        jitter.y=opts$jitter.y,
                        trans.log.thresh=opts$trans.log.thresh,
                        max.factor.levels=opts$max.factor.levels,
                        fill.smooth=fill.smooth,
                        verbose=opts$verbose,
                        ...)
    }
  } else if (!is.numeric(data[[indep]]) && !is.numeric(data[[response]])) {
    choices.geom <-  c('spine', 'heat', 'auto')
    opts.geom <- match.arg(opts$geom, choices.geom)
    if (opts.geom == 'auto') {
      opts.geom <- ifelse(length(cond) == 0 && # HACK: spine plot cannot be faceted, due to difficulty of implementation
                            length(levels(data[[response]])) <= opts$max.factor.levels.spine.y &&
                            length(levels(data[[indep]])) <= opts$max.factor.levels.spine.x, 'spine', 'heat')
    }

    if (opts.geom == 'spine') {
       type.plot <- 'spine'
       res <- gplt.spine(data, indep, response, w=w,
                       plot.margin.x=opts$spine.plot.margin.x,
                       max.factor.levels=opts$max.factor.levels.spine.x,
                       palette.brewer.seq=opts$palette.brewer.seq,
                       palette.brewer.qual=opts$palette.brewer.qual,
                       verbose=opts$verbose,
                       ...)
    } else {
       type.plot <- 'heat'
       res <- gplt.heat(data, indep, response, z='NULL', w=w,
                        trans.log.thresh=opts$trans.log.thresh,
                        max.factor.levels=opts$max.factor.levels,
                        resolution.heat=opts$resolution.heat,
                        palette.brewer.seq=opts$palette.brewer.seq,
                        palette.brewer.qual=opts$palette.brewer.qual,
                        verbose=opts$verbose,
                        ...)
    }
  } else {
     # mixed types: one factor, one numeric

     if (max.points.per.group <= 1) {
        # special case: single point each - use bar plot
        choices.geom <- c('dot', 'bar', 'auto')
        opts.geom <- match.arg(opts$geom, choices.geom)
        if (opts.geom == 'auto') {
           opts.geom <- ifelse(num.groups < 6, 'bar', 'dot')
        }
        type.plot <- opts.geom
        var.fac <- response
        var.num <- indep
        if (is.numeric(data[[response]])) {
           var.fac <- indep
           var.num <- response
        }
        res <- gplt.dot(data, var.fac, w=var.num, vertical=opts$prefer.factors.vert,
                      max.factor.levels=opts$max.factor.levels, geom=opts.geom,
                      verbose=opts$verbose, ...)
     } else { #

        choices.geom <- c('violin', 'box', 'scatter', 'auto')
        opts.geom <- match.arg(opts$geom, choices.geom)
        var.fac <- response
        var.num <- indep
        # assignment of variables to axes depends only on opts$prefer.factors.vert
        switch <- is.numeric(data[[response]]) != opts$prefer.factors.vert
        if (switch) {
           var.num <- response
           var.fac <- indep
        }
        if (opts.geom == 'auto') {
           # if there are too many violin plots in a horizontal row, they
           # just look like black lines
           opts.geom <- ifelse(med.points.per.group > opts$min.points.violin,
                               ifelse(length(levels(var.fac)) <= opts$max.factor.levels.violin, 'violin', 'box'),
                               'scatter')
        }

        if (opts.geom == 'violin') {
           type.plot <- 'violin'
           res <- gplt.violin(data, var.fac, var.num, w=w,
                            trans.log.thresh=opts$trans.log.thresh,
                            max.factor.levels=opts$max.factor.levels,
                            verbose=opts$verbose,
                            ...)
        } else if (opts.geom == 'box') {
           type.plot <- 'box'
           res <- gplt.box(data, var.fac, var.num, w=w,
                         trans.log.thresh=opts$trans.log.thresh,
                         max.factor.levels=opts$max.factor.levels,
                         verbose=opts$verbose,
                         ...)
        } else { # opts.geom == 'scatter'
           # not enough points for violin or box plot
           type.plot <- 'scatter.mixed'
           res <- gplt.scatter(data, var.fac, var.num, w=w,
                             dedupe.scatter=opts$dedupe.scatter,
                             min.points.jitter=opts$min.points.jitter,
                             trans.log.thresh=opts$trans.log.thresh,
                             max.factor.levels=opts$max.factor.levels,
                             verbose=opts$verbose,
                             ...)
        }
     }
  }
  info.options(opts.geom, choices.geom, opts$verbose)
  res$type.plot=type.plot
  res
}

determine.plot.type.2 <- function(data, response, indep1, indep2, w, opts,
                                  ...) {
  res <- NULL
  type.plot <- NULL
  choices.geom <- c('spine', 'heat', 'auto')
  opts.geom <- match.arg(opts$geom, choices.geom)
  if (opts.geom == 'auto') {

    opts.geom <- ifelse((!is.numeric(data[[response]])) && (!is.numeric(data[[indep1]])) &&
                          (!is.numeric(data[[indep2]])) &&
                          length(levels(data[[response]])) <= opts$max.factor.levels.spine.z &&
                          length(levels(data[[indep2]])) <= opts$max.factor.levels.spine.y &&
                          length(levels(data[[indep1]])) <= opts$max.factor.levels.spine.x, 'spine', 'heat')
  }

  if (opts.geom == 'spine') {
    type.plot <- 'spine3'
    res <- gplt.spine3(data, indep1, indep2, response, w=w,
                     plot.margin.x=opts$spine.plot.margin.x,
                     plot.margin.y=opts$spine.plot.margin.y,
                     max.factor.levels=opts$max.factor.levels.spine.x,
                     palette.brewer.seq=opts$palette.brewer.seq,
                     palette.brewer.qual=opts$palette.brewer.qual,
                     verbose=opts$verbose,
                     ...)
  } else {
    type.plot <- 'heat3'
    res <- gplt.heat(data, indep1, indep2, response, w=w,
                     trans.log.thresh=opts$trans.log.thresh,
                     max.factor.levels=opts$max.factor.levels,
                     resolution.heat=opts$resolution.heat,
                     palette.brewer.seq=opts$palette.brewer.seq,
                     palette.brewer.qual=opts$palette.brewer.qual,
                     verbose=opts$verbose,
                     ...)
  }
  info.options(opts.geom, choices.geom, opts$verbose)
  res$type.plot=type.plot
  res
}


# format factor levels with the first or last value replaced by <var>=<val>
# this helps save space on the grid display
format.facets <- function(data, x, show.var='first') {
  lev <- NULL
  if (is.numeric(data[[x]])) {
    lev <- as.character(sort(unique(data[[x]])))
  } else {
    lev <- levels(data[[x]])
  }
  names(lev) <- lev
  idx <- ifelse(show.var == 'first', 1, length(lev))
  lev[idx] <- sprintf('%s = %s', x, lev[idx])
  lev
}

add.facet.wrap <- function(p, data, cond, preferred.order, opts) {
  nrow <- NULL
  ncol <- NULL
  show.var <- 'first'
  if (preferred.order == 'row') {
    ncol <- opts$facet.max.cols
  } else if (preferred.order == 'col') {
    nrow <- opts$facet.max.rows
    show.var <- 'last'
  }
  facet.labels <- format.facets(data, cond, show.var)

  p <- p + theme_slanted_text_x +  # axis text might overlap in small diagrams
    facet_wrap(cond,
               labeller=as_labeller(facet.labels),
               nrow=nrow, ncol=ncol) +
    theme(panel.border=element_rect(fill=NA))
  # known issue 1: always slanting text is not ideal, but no quick fix
  # known issue 2: for it would be good to order vertical facets like axis
  # (i.e., the lowest value at the bottom), but doesn't seem to be easy to configure.
}


add.facet.grid <- function(p, data, cond) {
  facet.labels.1 <- format.facets(data, cond[1], 'first')
  facet.labels.2 <- format.facets(data, cond[2], 'first')

  p + theme_slanted_text_x + # axis text can easily overlap in small diagrams
    facet_grid(sprintf('%s~%s', cond[1], cond[2]),
               labeller=labeller(.rows=as_labeller(facet.labels.1),
                                 .cols=as_labeller(facet.labels.2))) +
    theme(panel.border=element_rect(fill=NA))
}


# when we get here, color will not be used for further distinction;
# no harm in using previous factor for (redundant) coloring
redundant.factor.color <- function(p, data, response, indep, type.plot, opts) {
  if (type.plot %in% c('spine', 'spine3', 'hex')) {
    # these types are already colored
    return(p)
  }
  fac <- NULL
  if (!is.numeric(data[[response]])) {
    fac <- response
  } else if  (length(indep) > 0 && !is.numeric(data[[indep]])) {
    fac <- indep
  }
  if (!is.null(fac)) {
    p <- add.color.fill(p, data, fac,
                        palette.brewer.seq=opts$palette.brewer.seq,
                        palette.brewer.qual=opts$palette.brewer.qual)  +
      guides(fill=FALSE, color=FALSE)

    return (p)
  } else if (type.plot %in% c('density', 'histogram')) {
    # color 1D density plots with default color
    p + aes(fill='something') +
      aes(color='something') +
      scale_fill_manual(values=opts$fill.default) +
      scale_color_manual(values=opts$fill.default) +
      guides(fill=FALSE, color=FALSE)
  } else {
    p
  }
}

# represent conditional variables either by color or facets
add.conditional.layer <- function(p, data, response, indep, cond, type.plot, opts) {

  if (length(cond) == 0) {
    return(redundant.factor.color(p, data, response, indep, type.plot, opts))
  }

  # heuristic for facet layout
  preferred.order <- 'none'
  if (type.plot %in% c('density', 'histogram')) {
    preferred.order <- 'col'
  } else if (!(type.plot %in% c('hex', 'scatter.num', 'heat'))) { # pure numeric -> no preference
    # mixed num/factor -> opposite to graph layout
    preferred.order <- ifelse(opts$prefer.factors.vert, 'row', 'col')
  }

  if (length(cond) == 1) {
    u <- length(unique(data[[cond]]))

    if (u <= opts$max.factor.levels.color && !(type.plot %in% c('spine', 'heat', 'hex'))) {
      p <- add.color.fill(p, data, cond,
                          palette.brewer.seq=opts$palette.brewer.seq,
                          palette.brewer.qual=opts$palette.brewer.qual)

      aesth.legend <- ifelse(type.plot %in% c('density', 'histogram', 'bar', 'violin'), 'fill', 'color')
      p <- add.color.legend(p, data, cond, aesth.legend)

    } else {
      p <- redundant.factor.color(p, data, response, indep, type.plot, opts)
      add.facet.wrap(p, data, cond, preferred.order, opts)
    }
  } else { #length(cond) == 2
    #u1 <- length(unique(data[[cond[1]]]))
    #u2 <- length(unique(data[[cond[2]]]))
    # if (min(u1,u2) <= opts$max.factor.levels.color) {
    #    if (u1 < u2) {
    #       p + aes_string(fill=cond[1]) + aes_string(color=cond[1]) +
    #          add.facet.wrap(p, data, cond[2], preferred.order, opts)
    #    } else {
    #       p + aes_string(fill=cond[2]) + aes_string(color=cond[2]) +
    #          add.facet.wrap(p, data, cond[1], preferred.order, opts)
    #    }
    # } else {
    # example: plotluck(diamonds, price~1|cut+clarity)
    p <- redundant.factor.color(p, data, response, indep, type.plot, opts)
    add.facet.grid(p, data, cond)
    #}
  }
}


# plot multiple graphs in a grid layout, possibly over multiple pages
# suppress.*lab == 'all': no axis labels for this dimension
# suppress.*lab == 'margin' means:
# no x-axis labels except at the bottom;
# no y-axis labels except in the leftmost plots
mplot <- function(plots, rows=ceiling(sqrt(length(plots))),
                  cols=ceiling(sqrt(length(plots))),
                  suppress.xlab=FALSE, suppress.ylab=FALSE) {

  num.plots <- length(plots)
  if (cols > num.plots) {
    cols <- num.plots
    rows <- 1
  }

  size.page <- rows * cols

  layout <- matrix(seq( size.page),
                   ncol=cols, nrow=rows, byrow=TRUE)

  for (p in 1:ceiling(num.plots/size.page)) {

    grid::grid.newpage()
    grid::pushViewport(grid::viewport(layout = grid::grid.layout(nrow(layout), ncol(layout))))

    # make each plot, in the correct location
    for (i in 1:min(size.page, num.plots-(p-1)*size.page)) {
      # get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout==i, arr.ind=TRUE))
      plot.idx <- (p-1)*size.page+i
      plot.current <- plots[[plot.idx]]

      if (suppress.xlab == 'all' ||
          (suppress.xlab == 'margin' && matchidx$row != rows
           && plot.idx + cols <= num.plots)) { # last complete row

        # note: the following doesn't work if a coord_flip() was applied:
        # plot.current <- plot.current + xlab('')
        # axis labels move with the coordinates, themes don't!
        plot.current <- plot.current + theme(axis.title.x=element_blank())
      }
      if (suppress.ylab == 'all' ||
          (suppress.ylab == 'margin' && matchidx$col != 1)) {
        plot.current <- plot.current + theme(axis.title.y=element_blank())
      }

      # do not fail if a single subplot isn't well-defined
      tryCatch(
        print(plot.current, vp = grid::viewport(layout.pos.row = matchidx$row,
                                                layout.pos.col = matchidx$col)),
        error = function(e) print(gplt.blank(e), vp = grid::viewport(layout.pos.row = matchidx$row,
                                                                     layout.pos.col = matchidx$col)))
    }
  }
}

plotluck.multi <- function(response, indep, data, w='NULL',
                           in.grid=TRUE,
                           max.rows=10, max.cols=10,
                           opts=plotluck.options(),
                           ...) {

  main <- deparse(substitute(data))

  vars.x <- NULL
  vrs.y <- NULL
  if (response == '.') {
    vars.y <- names(data)
    if (length(indep) == 0) {
      vars.x <- '1'
      if (opts$verbose) {
        cat('Plotting distributions for all variables\n')
      }
    } else if (indep == '.') {
      vars.x <- names(data)
      if (opts$verbose) {
        cat('Plotting each variable against each other\n')
      }
    }  else {
      vars.x <- indep
      if (opts$verbose) {
        cat(sprintf('Plotting each variable against %s\n', vars.x))
      }
    }
  } else { # response != '.'
    vars.y <- response
    if(indep != '.') {
      stop('Error: invalid formula')
    }
    if (opts$verbose) {
      cat(sprintf('Plotting %s against each variable\n', vars.y))
    }
    vars.x <- names(data)
  }

  # order variables by conditional entropy
  cond.ent <- NULL
  if (opts$multi.entropy.order) {
    if (length(vars.y) > 1 && length(vars.x) == 1 && vars.x != '1') {
      cond.ent <- suppressWarnings(cond.entropy.data(data, given=vars.x, w=w))
      cond.ent <- cond.ent[,vars.x]
      vars.y <- vars.y[order(cond.ent)]
    } else if (length(vars.y) == 1 && length(vars.x) > 1) {
      cond.ent <- suppressWarnings(cond.entropy.data(data, target=vars.y, w=w))
      cond.ent <- cond.ent[vars.y,]
      vars.x <- vars.x[order(cond.ent)]
    } else if (length(vars.y) > 1 && length(vars.x) > 1) {
      cond.ent <- suppressWarnings(cond.entropy.data(data, w=w))
      cond.ent <- (apply(cond.ent, 1, mean) + apply(cond.ent, 1, mean))/2
      vars.x <- vars.x[order(cond.ent)]
      vars.y <- vars.x
    }
    if (opts$verbose && !is.null(cond.ent)) {
      cat('Ordering variables according to conditional entropy:\n')
      cond.ent<-sort(cond.ent)
      out<-data.frame(var=names(cond.ent),cond.ent=cond.ent)
      print(out, row.names=FALSE)
    }
  }

  combi <- expand.grid(seq(length(vars.x)),
                       seq(length(vars.y)), stringsAsFactors=FALSE)
  names(combi) = c('grid.x', 'grid.y')
  combi$x <- vars.x[combi$grid.x]
  combi$y <- vars.y[combi$grid.y]
  combi$fac.x <- sapply(combi$x, function(v) !is.numeric(data[[v]]))
  combi$fac.y <- sapply(combi$y, function(v) !is.numeric(data[[v]]))

  # try to make a square layout
  cols <- ceiling(sqrt(nrow(combi)))
  rows <- ceiling(nrow(combi)/cols)

  # figure out which plots are not at the margins, and
  # don't show axis labels for them if redundant
  suppress.xlab <- FALSE
  suppress.ylab <- FALSE

  theme.multi <- ''

  is.square <- TRUE

  if (!in.grid && opts$verbose) {
    cat('Not plotting all graphs on one page because multi.in.grid=FALSE\n')
  }

  if (in.grid) {

    # grid of small 'sparklines':
    # remove all annotation text
    theme.multi <- " + theme_minimal() +
         theme(legend.position='none',
         axis.ticks.x=element_blank(),
         axis.text.x=element_blank(),
         axis.ticks.y=element_blank(),
         axis.text.y=element_blank(),
         strip.background=element_blank(),
         strip.text=element_blank(),
         panel.grid.major=element_blank(),
         panel.grid.minor=element_blank(),
         axis.title.x=element_text(size=rel(0.7)),
         axis.title.y=element_text(size=rel(0.7)))"

    theme.multi <- gsub('\\s','', theme.multi)

    # area scale slow and hardly visible
    opts$dedupe.scatter <- 'jitter'

    # use the limited space to make subgraphs
    # relatively rectangular
    opts$facet.max.rows <- NULL
    opts$facet.max.cols <- NULL

    # do all plots fit on a single page?
    if (cols > max.cols || cols > max.rows) {
      is.square <- FALSE
      cols <- max.cols
      rows <- min(max.rows, ceiling(nrow(combi)/cols))
      if (opts$verbose) {
        cat(sprintf('Cannot fit all %d variables on one page; multi.max.rows = %d, multi.max.cols= %d\n',
                    max.rows, max.cols))
      }
    }

    if (length(vars.x) > 1 && length(vars.y) > 1 && is.square) {
      # if the full cross product fits on one page,
      # write the axis labels only on the margins
      suppress.xlab <- 'margin'
      suppress.ylab <- 'margin'
    }
    if (length(vars.x) == 1) {
      # do not repeat the axis label for the constant dimension
      if (vars.x != '1') {
        main <- vars.x
        suppress.xlab <- 'margin'
      }
      else {
        # distribution/density plot
        suppress.ylab <- 'all'
      }
    }
    if (length(vars.y) == 1) {
      if (vars.y != '1') {
        main <- vars.y
        suppress.ylab <- 'margin'
      } else {
        suppress.ylab <- 'all'
      }
    }

    # For mixed factor/num combinations, plotluck() assigns the axes only
    # depending on opts$prefer.factors.vert. In the output lattice for
    # plotluck.multi, we need to control the direction (and show opposites
    # at mirror position)
    combi$opts <- 'prefer.factors.vert=TRUE'
    combi$labs <- ''
    if (length(vars.x) > 1 && length(vars.y) == 1) {
       if (is.numeric(data[[vars.y]])) {
          combi$opts <-
             'prefer.factors.vert=FALSE'
       }
    } else  if (length(vars.x) == 1 && length(vars.y) > 1) {
       if (!is.numeric(data[[vars.x]])) {
          combi$opts <-
             'prefer.factors.vert=FALSE'
       }
    } else {  # length(vars.x) > 1 && length(vars.y) > 1
       below.diag <- combi$grid.x > combi$grid.y
       combi$opts[below.diag & combi$fac.x] <-
          'prefer.factors.vert=FALSE'
       combi$opts[(!below.diag) & (!combi$fac.y)] <-
          'prefer.factors.vert=FALSE'

       # label the corner elements with the variable name, instead
       # of 'density' or 'count' (diagonal contains distribution plots)
       idx.id <- combi$x == combi$y
       combi$labs[idx.id] <-
          sprintf('+xlab("%s")+ylab("%s")', combi$x[idx.id],
                  combi$y[idx.id])
    }
  }

  combi$opts <- sprintf('prefer.factors.vert=%s', opts$prefer.factors.vert)
  combi$labs <- ''
  if (length(vars.x) > 1 && length(vars.y) > 1 &&
      ((!in.grid) || (!is.square))) {
    # plots with the axis transposed are redundant;
    # if not part of grid, no need to show them
    combi <- combi[combi$grid.x >= combi$grid.y,]

  } else {

     # For mixed factor/num combinations, plotluck() assigns the axes only
     # depending on opts$prefer.factors.vert. In the output lattice for
     # plotluck.multi, we need to control the direction (and show opposites
     # at mirror position)
     combi$opts <- 'prefer.factors.vert=TRUE'
     if (length(vars.x) > 1 && length(vars.y) == 1) {
        if (is.numeric(data[[vars.y]])) {
           combi$opts <-
              'prefer.factors.vert=FALSE'
        }
     } else  if (length(vars.x) == 1 && length(vars.y) > 1) {
        if (!is.numeric(data[[vars.x]])) {
           combi$opts <-
              'prefer.factors.vert=FALSE'
        }
     } else {  # length(vars.x) > 1 && length(vars.y) > 1
        below.diag <- combi$grid.x > combi$grid.y
        combi$opts[below.diag & combi$fac.x] <-
           'prefer.factors.vert=FALSE'
        combi$opts[(!below.diag) & (!combi$fac.y)] <-
           'prefer.factors.vert=FALSE'

        # label the corner elements with the variable name, instead
        # of 'density' or 'count' (diagonal contains distribution plots)
        idx.id <- combi$x == combi$y
        combi$labs[idx.id] <-
           sprintf('+xlab("%s")+ylab("%s")', combi$x[idx.id],
                   combi$y[idx.id])
     }
  }

  # plot single-variable distributions for x~x
  combi$x[combi$x == combi$y] <- '1'

  if (w != 'NULL')
  {
    call.strs <- sprintf('plotluck(data, %s~%s, w=%s, opts=plotluck.options(opts,%s), ...)%s%s',
                         combi$y, combi$x, w, combi$opts, combi$labs, theme.multi)
  } else {
    call.strs <- sprintf('plotluck(data, %s~%s, opts=plotluck.options(opts,%s), ...)%s%s',
                         combi$y, combi$x, combi$opts, combi$labs, theme.multi)
  }

  call.str <- sprintf('list(%s)', paste(call.strs, collapse=','))

  # don't show verbose messages for all individual plots
  opts$verbose <- FALSE

  structure(list(plots=eval(parse(text=call.str)),
                 in.grid=in.grid,
                 cols=cols,
                 rows=rows,
                 suppress.xlab=suppress.xlab,
                 suppress.ylab=suppress.ylab,
                 main=main),
            class='plotluck_multi')
}


# auxiliary class to achieve consistent behavior of plotluck.multi with ggplot
# and plotluck: draw the plot if an only if the return value is not assigned.
#' @export
print.plotluck_multi <- function(x, ...) {
  if (x$in.grid) {
    mplot(x$plots, rows=x$rows, cols=x$cols,
          suppress.xlab=x$suppress.xlab, suppress.ylab=x$suppress.ylab)
  } else {
    lapply(x$plots, print)
  }
}


#' Run \code{plotluck} for a randomly generated formula.
#'
#' \code{sample.plotluck} samples a formula as follows:
#' \itemize{
#' \item Uniformly draw the number of variables (1-3).
#' \item For each variable, uniformly choose one of the existing variable types from the data set (numeric, ordered or unordered factor).
#' \item Uniformly select one of the data frame columns of that type.
#'}
#'
#' @param data a data frame
#' @param ... additional parameters to be passed to \code{plotluck}, such as
#'       \code{weights} and \code{opts}.
#' @return a ggplot2 object.
#'
#' @seealso \code{\link{plotluck}}
#' @export
#' @examples
#'set.seed(42)
#' data(iris)
#' sample.plotluck(iris)
sample.plotluck <- function(data, ...) {
  idx.ord <- which(sapply(data, is.ordered))
  idx.fac <- which(sapply(data, function(x) {is.factor(x) && (!is.ordered(x))}))
  idx.num <- setdiff(setdiff(seq(length(data)),idx.fac), idx.ord)
  w <- match.call()$weights
  if (!is.null(w)) {
    idx.w <- which(names(data) == w)
    idx.num <- setdiff(idx.num, idx.w)
  }

  types <- list()
  if (length(idx.fac) > 0) {
    types[[length(types)+1]] <- idx.fac
  }
  if (length(idx.ord) > 0) {
    types[[length(types)+1]] <- idx.ord
  }
  if (length(idx.num) > 0) {
    types[[length(types)+1]] <- idx.num
  }
  form.length <- sample(c(1,2,3),1)
  cond.pos <- sample(seq(form.length), 1) + 0.5
  form <- NULL
  op <- NULL
  for (pos in seq(form.length)) {
    form <- c(form, op)
    if (pos > cond.pos) {
      cond.pos <- 1000
      op <- '|'
      if (pos == 1) {
        form <- c(form, '~', '1')
      }
    } else if (pos == 1) {
      op <- '~'
    } else {
      op <- '+'
    }
    while (TRUE) {
      var.type <- sample(seq(length(types)), 1)
      var <- sample(seq(length(types[[var.type]])),1)
      var <- names(data)[(types[[var.type]][var])]
      if (!(var %in% form)) {
        # avoid duplication of variables
        break
      }
    }
    form <- c(form, var)
  }
  if (length(form) == 1) {
    form <- c(form, '~', '1')
  }
  form <-do.call(paste, as.list(form))

  # collect extra arguments
  arg <- sapply(match.call()[seq(2,length(match.call()))], deparse)
  if (length(arg) == 1) {
     s <- sprintf('plotluck(%s, %s)\n', as.character(match.call()[2]), form)
  } else {
     s <- list()
     for (i in seq(2, length(arg))) {
        s[[length(s)+1]] <- sprintf('%s = %s', names(arg)[i], arg[i])
     }
     s <- do.call(paste, c(s, sep=', '))
     s <- sprintf('plotluck(%s, %s, %s)\n', as.character(match.call()[2]), form, s)
  }
  cat(s)
  plotluck(data, as.formula(form), ...) + labs(title=s) + theme(plot.title=element_text(size=10))
}

# same as sample.plotluck, but can be called with a list of options
# only used for testing/debugging!

# e.g.
# opts.list<-list()
# opts.list[[1]]<-plotluck.options(verbose=T)
# opts.list[[2]]<-plotluck.options(verbose=T,prefer.factors.vert=F)
# opts.list[[3]]<-plotluck.options(verbose=T,prefer.factors.vert=F,max.factor.levels.color=100)
# opts.list[[4]]<-plotluck.options(verbose=T,prefer.factors.vert=T,max.factor.levels.color=100,dedupe.scatter='jitter',min.points.hex=10000,min.points.density=1E20,min.points.violin=1E20)
# opts.list[[5]]<-plotluck.options(verbose=T,prefer.factors.vert=F,max.factor.levels=3)


sample.plotluck.testopts <- function(data, opts.list, ...) {
   idx.ord <- which(sapply(data, is.ordered))
   idx.fac <- which(sapply(data, function(x) {is.factor(x) && (!is.ordered(x))}))
   idx.num <- setdiff(setdiff(seq(length(data)),idx.fac), idx.ord)
   w <- match.call()$weights
   if (!is.null(w)) {
      idx.w <- which(names(data) == w)
      idx.num <- setdiff(idx.num, idx.w)
   }

   types <- list()
   if (length(idx.fac) > 0) {
      types[[length(types)+1]] <- idx.fac
   }
   if (length(idx.ord) > 0) {
      types[[length(types)+1]] <- idx.ord
   }
   if (length(idx.num) > 0) {
      types[[length(types)+1]] <- idx.num
   }
   form.length <- sample(c(1,2,3),1)
   cond.pos <- sample(seq(form.length), 1) + 0.5
   form <- NULL
   op <- NULL
   for (pos in seq(form.length)) {
      form <- c(form, op)
      if (pos > cond.pos) {
         cond.pos <- 1000
         op <- '|'
         if (pos == 1) {
            form <- c(form, '~', '1')
         }
      } else if (pos == 1) {
         op <- '~'
      } else {
         op <- '+'
      }
      while (TRUE) {
         var.type <- sample(seq(length(types)), 1)
         var <- sample(seq(length(types[[var.type]])),1)
         var <- names(data)[(types[[var.type]][var])]
         if (!(var %in% form)) {
            # avoid duplication of variables
            break
         }
      }
      form <- c(form, var)
   }
   if (length(form) == 1) {
      form <- c(form, '~', '1')
   }
   form <-do.call(paste, as.list(form))

   # collect extra arguments
   arg <- sapply(match.call()[seq(2,length(match.call()))], deparse)
   if (length(arg) <= 2) {
      s <- sprintf('plotluck(%s, %s)', as.character(match.call()[2]), form)
   } else {
      s <- list()
      for (i in seq(3, length(arg))) {
         s[[length(s)+1]] <- sprintf('%s = %s', names(arg)[i], arg[i])
      }
      s <- do.call(paste, c(s, sep=', '))
      s <- sprintf('plotluck(%s, %s, %s)', as.character(match.call()[2]), form, s)
   }

   for (i in seq(length(opts.list))) {
      s.opt <- paste(s, sprintf(' [OPTION %d]', i), sep='')
      cat(s.opt)
      print(plotluck(data, as.formula(form), opts=opts.list[[i]],...) + labs(title=s.opt) + theme(plot.title=element_text(size=10)))
   }
}