R/bixplot.R

Defines functions pamc1d bixplot

Documented in bixplot pamc1d

bixplot = function(...,
                   names = NA,
                   add = FALSE,
                   at = NULL,
                   horizontal = FALSE,
                   col = "gray60", bodyCol = NULL,
                   bodyOpaque = 0.5,
                   bodyW = 0.80,
                   bodysize = "area_from_count",
                   modeCol = c("cadetblue3", "hotpink2",
                               "lawngreen", "orange","cyan3",
                               "coral2","gray60", "darkgoldenrod2",
                               "slateblue2", "brown2", "khaki3"),
                   curveCol = "black",
                   curveLwd = 1,
                   border = "black", boxCol = NULL,
                   boxOpaque = 0.5,
                   boxW = 0.32,
                   boxLwd = 2,
                   makewhiskers = TRUE,
                   innerwhiskers = TRUE,
                   rugCol = "black",
                   rugoutCol = par("fg"), # "black",
                   rugNumeric = NULL,
                   rugNumericColors = c("blue", "green"),
                   colorbarW = 0.12,
                   rugFactor = NULL,
                   rugFactorColors = c("red", "blue", "forestgreen"),
                   rugLwd = par("lwd"),
                   rugoutLwd = 2*rugLwd,
                   rugOpaque = 0.4,
                   jittering = TRUE,
                   jitteramount = NULL,
                   rugW = 0.12,
                   stickCol = "black",
                   stickLwd = 2,
                   boxwex = 1,
                   width = NULL, # leftover from boxplot
                   side = "no",
                   tick = TRUE,
                   diplevel = 0.01,
                   minN = 15,
                   kmax = NULL,
                   bigN = 500,
                   clusMinN = 3,
                   bw = "SJ-dpi",
                   kernel = "gaussian",
                   cut = 3,
                   cutmin = -Inf,
                   cutmax = Inf,
                   xlim = NULL,
                   ylim = NULL,
                   cex.axis = 1,
                   main = "bixplot",
                   cex.main = 1, line.main = 1,
                   cex.colorbar = 1,
                   xlab = "",
                   ylab = "",
                   cex.lab = 1, line.lab = 2.2,
                   xaxs = "r", yaxs = "r", las = 1,
                   ann = !add,
                   plot = TRUE,
                   log = NULL)

{ # bixplot for data input and formula input
  #
  # Arguments
  #
  # ... : a vector of numeric values to plot,
  #       or a dataframe with variables to plot, or a
  #       matrix with columns to plot, or a single list of
  #       vectors that may be of different lengths. The
  #       data may contain NAs, which will be left out.
  #       One can also provide a formula, such as y ~ grp,
  #       where y is a numeric vector of data values to be
  #       split into groups according to the grouping
  #       variable grp (usually a factor). Note that
  #       ~ g1 + g2 is equivalent to ~ g1:g2. If a formula
  #       is given, it may be necessary to also specify
  #       an argument data = with the data.frame containing
  #       the variables in the formula.
  # names	: group labels that will be printed next to
  #       each variable (= group). [Argument from
  #       graphics::boxplot.] If NA, the default, these are
  #       extracted from the provided input data, and if
  #       the data does not contain names the labels
  #       1, 2, 3,... are used.
  # add : logical, if true then the bixplot is added to
  #       the current plot. Defaults to FALSE. [Argument
  #       from boxplot.]
  # at : numeric vector giving the locations where the
  #       bixplots should be drawn, particularly when
  #       add = TRUE. [Argument from boxplot.]
  #       Defaults to 1:p where p is the number of variables.
  # horizontal : logical indicating if the bixplots should
  #       be horizontal. [Argument from boxplot.]
  #       The default FALSE means vertical boxes.
  # bodyW : the width of the body, which is the region
  #       inside the estimated density.
  # col : contains the color(s) of the bodies for the p
  #       variables. col can be a single color or a
  #       vector, and will be recycled as needed. [Argument
  #       from boxplot]. It is only applied to those
  #       variables the method deems unimodal. When col is
  #       NA, the body is not drawn.
  # bodyCol : if not NULL, it overrides col.
  # bodyOpaque: opacity of the body.
  # bodyW : the width of the body. When zero, the body
  #       is not drawn. When two or more modes are shown
  #       for the same variable, it is the width of the
  #       widest body.
  # bodysize: either "area_is_constant", "area_from_count",
  #       or "width_is_constant".
  #       Determines the relative sizes of bodies of modes
  #       in the same variable.
  #       When "area_is_constant", the area of each density
  #       is the same.
  #       When "area_from_count", the default, the area of
  #       each density is proportional to the number of
  #       members in its cluster (mode).
  #       When "width_is_constant", all bodies have the same
  #       width.
  #       In all three settings, the width of the widest
  #       body is bodyW[j], possibly modified by argument
  #       width if it is not null.
  #       The width of the box changes proportionally
  #       with that of the body.
  # modeCol : contains the color(s) of the bodies of the
  #       modes (clusters) of the variables deemed not
  #       unimodal. Can be a single color or a vector,
  #       and will be recycled as needed. If NULL, all
  #       modes of a variable will receive the same color
  #       as provided in col for that variable.
  # curveCol : contains the color(s) of the density
  #       curves of each variable, that is, the boundary
  #       of the body. Can be a single color or a vector,
  #       and will be recycled as needed. If NA, no
  #       curve boundary is drawn. Defaults to "black".
  # curveLwd : a single number gving the line width ("lwd")
  #       of the density curves.
  # border : contains the color(s) of the boxes and
  #       whiskers [argument from boxplot].
  #       It can be a single color or a vector, and will
  #       be recycled as needed. A variable deemed
  #       unimodal will have a single box, otherwise
  #       each mode has its own box. When border is NA,
  #       the box is not drawn.
  # boxCol : if not NULL, it overrides border.
  # boxOpaque : opacity of the box.
  # boxW : the width of the box. When zero, the box is
  #       not drawn.
  # boxLwd : a single number giving the line width ("lwd")
  #       of the box and whiskers.
  # makewhiskers : logical specifying whether whiskers
  #       should be drawn.
  # innerwhiskers : logical. If TRUE and there is more than
  #       one mode, also the whiskers _between_ the modes
  #       are drawn. If FALSE, the default, no whiskers
  #       between modes are not drawn, because they do not
  #       have their usual meaning due to possible
  #       overlapping of densities.
  # rugCol : contains the color(s) of the rug plot of
  #       the variables. The rug consists of small lines
  #       perpendicular to the variable axis, one for
  #       each data value. It can be a single color or a
  #       vector, and will be recycled as needed. If NA,
  #       the rug is not plotted.
  # rugoutCol : if not NULL or NA, and the body of a
  #       variable or its modes are drawn, this specifies
  #       the color(s) of the part of the ruglines outside
  #       of the body. If NULL or NA, the same color rugCol
  #       is used as inside the body.
  # rugNumeric : external numeric variable for coloring the rug.
  # rugNumericColors : two or three colors to which
  #       colorRampPalette can be applied, to obtain a smooth
  #       palette of colors for rugNumeric.
  # colorbarW : the width of the color bar with values of
  #       rugNumeric, relative to the width of the main plot.
  #       Defaults to 0.1 . Only has an effect if rugNumeric
  #       is specified.
  # rugFactor : external factor variable for coloring the rug.
  # rugFactorColors : colors of the factor levels of
  #       rugFactor.
  # rugLwd : a single number specifying the width of the
  #       lines in the rug plot. Defaults to par("lwd").
  # rugoutLwd : a single number specifying the width of
  #       the part of the ruglines outside of the body.
  #       Defaults to 2*rugLwd to make isolated points
  #       stand out visually.
  # rugOpaque: opacity of the rug.
  # jittering : logical indicating whether the values at
  #       which the rug lines are drawn are to be jittered
  #       by base::jitter, to make tied values more visible.
  #       Defaults to TRUE.
  # jitteramount : the amount of jittering (see ?jitter).
  # rugW : the width of the rug. When zero, the rug is
  #       not drawn.
  # stickCol : when side = "both" this contains the color(s)
  #       of the `stick', which is a typically thin line
  #       separating the bodies of two variables plotted
  #       side by side. It goes from the smallest to the
  #       largest value of these variables. When the
  #       density is computed, it contains the range of
  #       the density as well. stickCol can be
  #       a single color or a vector, and will be recycled
  #       as needed. If NA, the default, the stick is not
  #       plotted.
  # stickLwd : a single number giving the line width ("lwd")
  #       of the stick. When zero, the stick is not drawn.
  # boxwex : a scale factor to be applied to the width
  #       of the body, box, and rug of all bixplots
  #       [Argument from boxplot].
  # width : a vector giving the relative widths of the p
  #       bixplots, one for each variable. [Argument from
  #       boxplot]. The entries should be strictly
  #       positive numbers, that will be divided by the
  #       largest of them. The resulting ratios multiply
  #       the widths of the body, box, and rug.
  # side : the side of the variable axis where the body,
  #       box and rug will be plot. [Argument from
  #       beanplot::beanplot]. The default is "no",
  #       meaning that each bixplot is symmetric about
  #       its axis. Options "first" and "second" plot
  #       all "half" bixplots on that given side. Option
  #       "both" plots variables on alternate sides of
  #       each axis, so fewer variable axes are needed.
  # tick : boolean indicating whether to draw tick marks
  #       on the plot axis with the variable (group)
  #       names. Only has an effect when add = FALSE.
  # diplevel : the level of Hartigan's dip test for
  #       unimodality of a variable. Only when this test
  #       rejects unimodality, that is, its p-value is
  #       at most diplevel, do we search for clusters.
  #       Defaults to 0.01. Setting it higher may lead to
  #       more clusters, and setting it lower to fewer
  #       clusters.
  # minN : the minimum number of observations required per
  #       potential cluster. Defaults to 15. The maximum
  #       number of clusters searched is limited to n/minN,
  #       where n is the number of non-missing values.
  #       If n < 2*minN, clustering is not attempted to
  #       avoid spurious clusters from small samples.
  # kmax : the maximal number of clusters. It will be
  #       truncated to 5 internally. If NULL, it is set
  #       to min(floor(n/minN), 5). When setting kmax=1
  #       all variables are considered as single clusters,
  #       making the display resemble a violin plot.
  # bigN : when a variable has over bigN non-NA values,
  #       we sample bigN values from it without
  #       replacement, to save computation time.
  #       Defaults to 500. If a number below 300 is
  #       given, it is set to 300 internally.
  # clusMinN : the minimum number of unique values in a
  #       cluster. The clustering is constrained so that
  #       any cluster must contain at least clusMinN
  #       unique values. Defaults to 3.
  # bw : the bandwidth for use by stats::density which
  #       constructs the body of a variable (group). It
  #       can be a number or a formula, and defaults to
  #       "SJ-dpi" (see ?density).
  # kernel : the kernel for use by stats::density.
  #       Defaults to "gaussian".
  # cut : denoting a variable by y, the density is only
  #       constructed from min(y) - cut*bw to
  #       max(y) + cut*bw where bw is the numeric
  #       bandwidth. Defaults to 3. [Argument from
  #       beanplot.]
  # cutmin : if specified, the densities of all variables
  #       and modes cannot go below the value cutmin.
  #       Defaults to -Inf which has no effect. [Argument
  #       from beanplot.]
  # cutmax : if specified, the densities of all variables
  #       and modes cannot go above the value cutmax.
  #       Defaults to Inf which has no effect. [Argument
  #       from beanplot.]
  #
  # Graphical arguments, only when add = FALSE:
  # xlim : vector with limits of the axis with the groups
  #       (variables), whether horizontal is TRUE or FALSE.
  #       This is a convention from graphics::boxplot.
  # ylim : vector with limits of the axis with the numeric
  #       values taken, whether horizontal is TRUE or FALSE.
  #       This is a convention from graphics::boxplot.
  # cex.axis, main, cex.main, line.main, xlab, ylab,
  # cex.lab, line.lab, xaxs, yaxs, las : work as usual.
  # ann : logical indicating if the plot should be
  #       annotated by xlab, ylab, and main title.
  #       [Argument from boxplot]. Defaults to !add.
  # plot : if TRUE (the default) then the numerical
  #       summary on which the plot is based is
  #       returned invisibly. If FALSE it is also
  #       output visibly on the terminal. [Argument
  #       from boxplot.]
  # log : this argument from boxplot has to be NULL,
  #       because a log transform can change the
  #       clusters (modes). If not NULL, a warning is
  #       given but the argument has no further effect.
  #       If you wish to use a logarithm it needs to be
  #       put explicitly in the formula, or you can
  #       transform the data first.
  #
  # There is no need to declare unused arguments from
  # boxplot, vioplot, beanplot for backward compatibility
  # (and then to ignore them): they are absorbed by the ...
  #
  # Value
  #
  # A list with the following components:
  # call : the call to bixplot
  # p    : the number of variables
  # <variable1>, <variable2>,... : for each variable, this
  #        is a list.
  #        When no clusters were found in that variable,
  #        it contains the non-NA values of that variable
  #        with their names if any, and their five-number
  #        summary from stats::fivenum that was used to
  #        draw the box.
  #        When clusters were found in that variable, it
  #        contains the clustering (a vector of integers),
  #        and for each cluster its values and five-number
  #        summary.

  wnq = function (string, qwrite = TRUE) {
    if (qwrite)
      write(noquote(string), file = "", ncolumns = 100)
  }

  checkFactor = function(y){
    # Auxiliary function for use of rugFactor.
    # Argument:
    # y : the factor variable
    #
    # Results:
    # ints   : the integer version of the factor variable
    # levels : the levels of the factor variable
    # indsv  : the indices of the non-NA entries of the factor variable
    #
    if (!(is.factor(y))) {
      stop(paste0("\n The rugFactor variable is not a factor.",
                  "\n Please use as.factor() or factor() first."))
    }
    if (is.null(attr(y, "levels"))) {
      stop(paste0("\n The rugFactor variable has no levels.",
                  "\n Please use as.factor() or factor() first."))
    }
    levelsattr <- attr(y, "levels")
    if (sum(is.na(levelsattr)) > 0) {
      stop(paste0("\n The levels attribute of the rugFactor variable",
                  "\n has at least one NA."))
    }
    dupls <- duplicated(levelsattr)
    if (sum(dupls) > 0)
      stop(paste0("\n The label ", levelsattr[dupls == TRUE],
                  " occurs more than once in",
                  "\n the levels attribute of the rugFactor variable."))
    yv <- as.character(y)
    indsv <- which(!is.na(yv))
    if (length(indsv) == 0) {
      stop(paste0("The rugFactor variable only contains NA's."))
    }
    yv <- yv[indsv]
    uniqy <- sort(unique(yv))
    for (g in seq_len(length(uniqy))) {
      if (!(uniqy[g] %in% levelsattr)) {
        stop(paste0("\n The label ", uniqy[g], " does not appear among",
                    "\n the levels attribute of the rugFactor variable."))
      }
    }
    if (length(uniqy) < 2) {
      stop(paste0("\n The rugFactor variable has only a single level ( ",
                  uniqy[1], " )", "\n so we cannot color by level."))
    }
    for (g in seq_len(length(levelsattr))) {
      if (!(levelsattr[g] %in% uniqy)) {
        wnq(paste0("\nThe level ", levelsattr[g], " does not occur",
                   " in the rugFactor variable.\n"))
      }
    }
    levels <- levelsattr[which(levelsattr %in% uniqy)]
    xlevelz <- levels
    lab2int <- function(labs, xlevelz) {
      ints <- rep(NA, length(labs))
      indv <- which(!is.na(labs))
      labv <- labs[indv]
      for (g in seq_len(length(xlevelz))) {
        clinds <- indv[which(labv == xlevelz[g])]
        ints[clinds] <- g
      }
      ints
    }
    ints = lab2int(y, xlevelz)
    return(list(ints = ints, levels = levels, indsv = indsv))
  }

  drawpoints = function(y, coord=1, horizontal = FALSE,
                        bokw = 0.36, side = "no",
                        cex = 1, pointcol = par()$fg){
    if(!(side %in% c("no","first","second"))) stop("invalid side")
    if(side == "first")  coord = coord - 0.04*bokw
    if(side == "second") coord = coord + 0.04*bokw
    coords = rep(coord,length(y))
    if(horizontal==FALSE){
      points(coords, y, col=pointcol, pch=16, cex=cex)
    } else {
      points(y, coords, col=pointcol, pch=16, cex=cex)
    }
  }

  drawbox = function(y, coord = 1, horizontal = FALSE,
                     bokw = 0.36, lwd = 1,
                     bcol = "blue", side = "no",
                     whiskers = "none"){
    # Auxiliary function to draw box at (Q1, median, Q3)
    # y       : a data vector
    # coord   : coordinate of the box
    # bokw    : box width if side = "no"
    # side    : side on which to draw the box
    # whisker : whether to draw whiskers
    #
    if(!(side %in% c("no","first","second"))) stop("invalid side")
    if(!(whiskers %in% c("both","up","down","none")))
      stop("invalid whiskers")
    bstat = grDevices::boxplot.stats(y, coef = 1.53)$stats
    nuniq = length(unique(y))
    miny = min(y, na.rm = TRUE); maxy = max(y, na.rm = TRUE)
    if(nuniq == 3) bstat = c(miny, miny, bstat[3], maxy, maxy)
    spine = coord
    Left  = coord - 0.50*bokw
    Right = coord + 0.50*bokw
    if(side == "second") {
      Left  = coord + 0.06*bokw
      spine = coord + 0.28*bokw
    }
    if(side == "first") {
      Right = coord - 0.06*bokw
      spine = coord - 0.28*bokw
    }
    Low = bstat[2]; Mid = bstat[3]; Hi = bstat[4]
    if(horizontal == FALSE){
      if(nuniq > 1) lines(c(Left,Left,Right,Right,Left),
                          c(Low,Hi,Hi,Low,Low),
                          lwd = lwd, col = bcol)
      if(nuniq > 2) lines(c(Left,Right), c(Mid,Mid),
                          lwd = lwd, col = bcol)
      if(length(y) > 3){
        if(whiskers %in% c("both","up")){
          lines(c(spine,spine),c(Hi,bstat[5]),
                lwd = lwd, col = bcol) }
        if(whiskers %in% c("both","down")){
          lines(c(spine,spine),c(Low,bstat[1]),
                lwd = lwd, col = bcol) }
      }
    } else {
      if(nuniq > 1) lines(c(Low,Hi,Hi,Low,Low),
                          c(Left,Left,Right,Right,Left),
                          lwd = lwd, col = bcol)
      if(nuniq > 2) lines(c(Mid,Mid), c(Left,Right),
                          lwd = lwd, col = bcol)
      if(length(y) > 3){
        if(whiskers %in% c("both","up")){
          lines(c(Hi,bstat[5]), c(spine,spine),
                lwd = lwd, col = bcol) }
        if(whiskers %in% c("both","down")){
          lines(c(Low,bstat[1]), c(spine,spine),
                lwd = lwd, col = bcol) }
      }
    }
  }

  compdens <- function(x, bwnum, kernel, cut, cutmin, cutmax) {
    # Auxiliary function for drawdensity
    if (length(x) > 0) {
      from <- max(cutmin, (min(x, na.rm = TRUE) - cut * bwnum))
      to <- min(cutmax, max(x, na.rm = TRUE) + cut * bwnum)
      density(x, bw = bwnum, kernel = kernel, from = from,
              to = to)[c("x", "y")]
    }
    else list(x = numeric(), y = numeric())
    # if no non-NA data, this empty density comes out.
    # But we only run this function for n > 0.
  }

  drawdensity = function(side, dens, at, bodcol,
                         curcol, curlwd = 1, horizontal){
    # if curcol is NA we don't draw the curve.
    if (side == "first") {
      x1 <- NULL
      y1 <- NULL
    } else {
      x1 <- dens$y + at
      y1 <- dens$x
    }
    if (side == "second") {
      x2 <- NULL
      y2 <- NULL
    } else {
      x2 <- rev(dens$y) * -1 + at
      y2 <- rev(dens$x)
    }
    if (length(x1) > 0) {
      x1 <- c(at, x1, at)
      y1 <- c(y1[1], y1, y1[length(y1)])
    }
    if (length(x2) > 0) {
      x2 <- c(at, x2, at)
      y2 <- c(y2[1], y2, y2[length(y2)])
    }
    if (horizontal) {
      polygon(c(y1, y2), c(x1, x2), col = bodcol, border = NA)
      if (!is.na(curcol))
        lines(c(y1, y2), c(x1, x2), col = curcol, lwd = curlwd)
    } else {
      polygon(c(x1, x2), c(y1, y2), col = bodcol, border = NA)
      if (!is.na(curcol))
        lines(c(x1, x2), c(y1, y2), col = curcol, lwd = curlwd)
    }
  }

  drawrug2sided = function(y, side, jittering, jitteramount, dens,
                           at, rugw, colin, colout, rugLwd,
                           rugoutLwd, horizontal)
  { # only for side == "no"
    if(side != "no") stop('side must be \"no\" here')
    if (jittering == TRUE) {
      rugy <- list(jitter(y, amount = jitteramount),
                   rep(1, length(y)))
    } else {
      rugy <- list(y, rep(1, length(y)))
      # this is poor when there are many ties
    }
    rugy[[2]] <- rugy[[2]] * rugw/2 # 1-sided width of ruglines
    if (!isTRUE(all.equal(colin,colout))) {
      crossings <- approx(dens$x, dens$y, rugy[[1]])$y
      crossings <- pmin(crossings, rugy[[2]], na.rm = TRUE)
      extra <- unlist(rugy[2]) - crossings
    }
    else {
      crossings <- rugy[[2]]
      extra <- 0
    }
    ## First we plot the part of the rug that sticks out of the body:
    if (max(extra) > 0) { # if at least one segment exceeds density
      w <- (extra > 0) # which segments exceed density
      if (horizontal) {
        if (side %in% c("no", "first"))
          segments(rugy[[1]][w], -extra[w] - crossings[w] + at,
                   rugy[[1]][w], -crossings[w] + at,
                   col = colout, lwd = rugoutLwd)
        if (side %in% c("no", "second"))
          segments(rugy[[1]][w], crossings[w] + at,
                   rugy[[1]][w], extra[w] + crossings[w] + at,
                   col = colout, lwd = rugoutLwd)
      }
      else {
        if (side %in% c("no", "first"))
          segments(-extra[w] - crossings[w] + at, rugy[[1]][w],
                   -crossings[w] + at, rugy[[1]][w],
                   col = colout, lwd = rugoutLwd)
        if (side %in% c("no", "second"))
          segments(crossings[w] + at, rugy[[1]][w],
                   extra[w] + crossings[w] + at, rugy[[1]][w],
                   col = colout, lwd = rugoutLwd)
      }
    }
    ## Now plot the central part, inside the body:
    x1 = -crossings + at
    x2 =  crossings + at
    if (horizontal)
      segments(rugy[[1]], x1, rugy[[1]], x2, col = colin)
    else segments(x1, rugy[[1]], x2, rugy[[1]], col = colin)
  }

  drawrug1sided = function(y, side, jittering, jitteramount, at,
                           rugw, rugcenter, colin, rugLwd, horizontal)
  { # only use this function when side != "no", so here it
    # has to be either "first" or "second".
    if(!(side %in% c("first", "second"))) stop(
      'side must be \"first\" or \"second\" here')
    if (jittering == TRUE) {
      rugy <- jitter(y, amount = jitteramount)
    } else {
      rugy <- y # this is poor when there are many ties
    }
    spine = rugcenter
    if(side == "first") spine = -rugcenter
    x1 <- at + spine - rugw/2
    x2 <- at + spine + rugw/2
    if (horizontal)
      segments(rugy, x1, rugy, x2, col = colin)
    else segments(x1, rugy, x2, rugy, col = colin)
  }

  # Here main function starts
  if(!(side %in% c("no", "first", "second", "both")))
    stop("invalid side")
  if(!(bodysize %in% c("area_is_constant",
                       "area_from_count",
                       "width_is_constant"))) stop("invalid bodysize")
  if(!is.null(rugNumeric) & !is.null(rugFactor)) stop(
    "You cannot specify rugNumeric and rugFactor simultaneously.")
  if(!is.null(log)) warning(paste0(
    "The argument 'log' has no effect here. If you wish\n",
    "to use a logarithm it needs to be put explicitly in\n",
    "the formula, or you can transform the data first.\n",
    "Note that a log transform can change the modes."))
  #
  # Getting the y variables to plot
  args <- match.call()
  cat("\n")
  print(args)
  mcall <- as.list(args)
  noname = rep(FALSE, length(args))
  for(ii in seq_len(length(args))){
    noname[ii] = (is.null(base::names(args[ii])))
  }
  whichnoname = which(noname == TRUE)
  isadatafr = isamatrix = FALSE
  if(length(whichnoname) == 2){
    tempmat = eval(mcall[[whichnoname[2]]], envir = parent.frame())
    isamatrix = is.matrix(tempmat)
    isadatafr = is.data.frame(tempmat)
  }
  if(isamatrix == TRUE || isadatafr == TRUE){
    p = NCOL(tempmat)
    nams = colnames(tempmat)
    if(is.null(nams) || missing(nams)) nams = paste0("X",seq_len(p))
    n = NROW(tempmat); n
    rownams = rownames(tempmat)
    if(is.null(rownams) || missing(rownams)) rownams = seq_len(n)
    ylist = list()
    for(j in seq_len(p)){
      tempvec = tempmat[,j]
      names(tempvec) = rownams
      ylist[[nams[j]]] = tempvec
    }
  } else {
    envir = parent.frame(2)
    if (is.null(base::names(args)))
      vars <- 1:length(args)
    else vars <- c(1, which(base::names(args)[2:length(args)] %in%
                              c("formula", "x", "data", "")) + 1)
    if (length(vars) < 2)
      return(list())
    options <- which(base::names(args) %in%
                       c("subset", "na.action",
                         "drop.unused.levels", "xlev"))
    args <- args[c(vars, options)]
    args[[1]] <- quote(model.frame)
    hashad <- rep(FALSE, length(vars))
    ylist <- list()
    notnamed <- 0
    subsetno <- numeric()
    naactno <- numeric()
    dulno <- numeric()
    xlevno <- numeric()
    datano <- numeric()
    argsvals <- lapply(as.list(args[2:length(vars)]), eval, envir)
    islists <- lapply(argsvals, function(x) {
      is.list(x) || is.null(x)
    })
    for (i in 2:length(vars)) {
      if (hashad[i])
        next
      x <- argsvals[[i - 1]]
      if (inherits(x, "formula")) {
        datanonext <- match(TRUE, islists[
          max(datano, i):length(islists)]) +
          max(datano, i)
        if (!is.na(datanonext)) {
          hashad[datanonext] <- TRUE
          datano <- datanonext
        }
        nextargpos <- function(name, pos) {
          if (any(pos == length(args))) return(pos)
          posnext <- match(name, base::names(args[
            max(pos + 1, 3):length(args)])) +
            max(pos + 1, 3) - 1
          if (!is.na(posnext)) pos <- posnext
          pos
        }
        subsetno <- nextargpos("subset", subsetno)
        naactno <- nextargpos("na.action", naactno)
        dulno <- nextargpos("drop.unused.levels", dulno)
        xlevno <- nextargpos("xlev", xlevno)
        attr(args, "names")[i] <- "formula"
        m <- args[c(1, i, datano, subsetno, naactno, dulno,
                    xlevno)]
        mf <- eval(m, envir)
        response <- attr(attr(mf, "terms"), "response")
        ylist <- c(ylist, split(mf[[response]], mf[-response]))
      }
      else if (is.list(x)) {
        ylist <- c(ylist, x)
      }
      else {
        x <- list(x)
        notnamed <- notnamed + 1
        attr(x, "names") <- notnamed
        ylist <- c(ylist, x)
      }
    }
  }
  p <- length(ylist)
  if(p == 0) stop("no data found to plot")
  constantN = TRUE
  nn = length(ylist[[1]])
  for(j in seq_len(p)){
    if(length(ylist[[j]]) != nn) constantN = FALSE
  }
  if(is.null(col)) col = NA
  if(is.null(bodyCol)) bodyCol = col
  colormodes = TRUE
  if(is.null(modeCol)) colormodes = FALSE
  if(is.null(curveCol)) curveCol = NA # no curves
  if(is.null(boxCol)) boxCol = border
  if(is.null(rugCol)) rugCol = par("fg")
  if(is.null(rugoutCol)) rugoutCol = rugCol
  if(is.na(rugoutCol)) rugoutCol = rugCol
  #
  # adjusting transparency of colors:
  indx = which(!is.na(bodyCol))
  if(length(indx) >= 1) bodyCol[indx] =
    adjustcolor(bodyCol[indx], alpha.f = bodyOpaque)
  if(colormodes == TRUE) {
    indx = which(!is.na(modeCol))
    if(length(indx) >= 1) modeCol[indx] =
        adjustcolor(modeCol[indx], alpha.f = bodyOpaque)
  }
  indx = which(!is.na(boxCol))
  if(length(indx) >= 1) boxCol[indx] =
    adjustcolor(boxCol[indx], alpha.f = boxOpaque)
  indx = which(!is.na(rugCol))
  if(length(indx) >= 1) rugCol[indx] =
    adjustcolor(rugCol[indx], alpha.f = rugOpaque)
  indx = which(!is.na(rugoutCol))
  if(length(indx) >= 1) rugoutCol[indx] =
    adjustcolor(rugoutCol[indx], alpha.f = 1)
  #
  # Making vectors longer if needed:
  cycle2size = function(x, size){
    if(length(x) < size) x = rep(x, ceiling(size/length(x)))
    return(x)
  }
  bodyCol   = cycle2size(bodyCol,   p)
  bodyW     = cycle2size(bodyW,     p)
  modeCol   = cycle2size(modeCol,   5*p) # assumes mykmax <= 5
  boxCol    = cycle2size(boxCol,    p)
  boxW      = cycle2size(boxW,      p)
  curveCol  = cycle2size(curveCol,  p)
  rugCol    = cycle2size(rugCol,    p)
  rugoutCol = cycle2size(rugoutCol, p)
  rugW      = cycle2size(rugW,      p)
  stickCol  = cycle2size(stickCol,  p)
  #
  if(!is.null(width)){
    if(length(width) != p) stop(paste0(
      "Argument width should be a vector of length" ,p))
    if(min(width) <= 0) stop(
      "Argument width should only contain positive entries")
    bodyW = bodyW * width/(max(width))
    boxW  = boxW * width/(max(width))
    rugW  = rugW * width/(max(width))
  }
  bodyW = boxwex * abs(bodyW)
  boxW  = boxwex * abs(boxW)
  rugW  = boxwex * abs(rugW)
  #
  if (missing(names)) {
    if (is.null(base::names(ylist)))
      attr(ylist, "names") = 1:displayp
    names <- base::names(ylist)
  } else { attr(ylist, "names") <- names }

  if(!(side == "both")){
    displayp = p
    xpos = seq_len(p)
    mysides = rep(side, p)
    if(!is.null(at)){
      lat = length(at)
      if(lat != p)
        stop(paste0("argument at has length ",lat,
                    " which should be ", p))
      xpos = at
    }
    xcoords = xpos
  }
  if(side == "both"){
    # if plot on both sides: "half" as many variable axes
    displayp = ceiling(p/2)
    xpos = seq_len(displayp)
    if(!is.null(at)){
      lat = length(at)
      if(lat != displayp)
        stop(paste0("argument at has length ", lat,
                    " which should be ", displayp))
      xpos = at
    }
    if ((side == "both") && (length(names) > displayp)) {
      combinenames <- function(string1, string2)
      { if (is.na(string2)) return(string1)
        if (string1 == string2) return(string1)
        s1 <- unlist(strsplit(string1, NULL))
        s2 <- unlist(strsplit(string2, NULL))
        fromleft <- 0
        gfromleft <- 0
        while ((fromleft < length(s1)) && (fromleft < length(s2)) &&
               (s1[fromleft + 1] == s2[fromleft + 1])) {
          if (any(grep("[. \t]", s1[fromleft + 1])))
            gfromleft <- fromleft
          fromleft <- fromleft + 1
        }
        fromright <- 0
        gfromright <- 0
        while ((fromright < length(s1)) && (fromright < length(s2)) &&
               (s1[length(s1) - fromright] == s2[length(s2) - fromright])) {
          if (any(grep("[. \t]", s1[length(s1) - fromright])))
            gfromright <- fromright
          fromright <- fromright + 1
        }
        if (gfromleft > gfromright)
          result <- substr(string1, 1, gfromleft)
        else if (gfromright == 0)
          result <- paste(string1, string2, sep = "+")
        else result <- substr(string1, nchar(string1) - gfromright +
                                1, nchar(string1))
        result
      }
      for (i in 1:displayp) {
        names[i] <- combinenames(names[i*2 - 1], names[i*2])
      }
      length(names) <- displayp
    }
    xcoords = c(matrix(rep(xpos, 2), nrow=2, byrow = TRUE))
    mysides = rep(c("first","second"), displayp)
  }
  ## From here on, each mysides[j] is either "no", "first", or "second".
  ## It cannot be "both" anymore.
  #
  rugfact = FALSE
  if(!is.null(rugFactor)){
    rugfact = TRUE
    if(constantN == FALSE) {
      warning(
        "We cannot color the rug by rugFactor because not all",
        "\n  y variables have the same length.")
      rugfact = FALSE
    } else {
      if(length(rugFactor) != nn) {
        warning(paste0(
          "We cannot color the rug by rugFactor,",
          "\n  because rugFactor has length ", length(rugFactor),
          "\n  and the y variables have length ", nn, ".\n"))
        rugfact = FALSE
      }
    }
    if(rugfact == TRUE){
      out = checkFactor(rugFactor)
      # If this errors, also bixplot stops.
      # So from here on we can assume it didn't.
      ints = out$ints
      mylevels = out$levels
      indsv = out$indsv
      nlevels = length(mylevels)
      ncolors = length(rugFactorColors)
      if(nlevels > ncolors) {
        rugFactorColors = cycle2size(rugFactorColors, nlevels) }
    }
    cat("The variable rugFactor has the levels:\n")
    print(mylevels)
  }
  #
  rugnumer = FALSE
  if(add == FALSE){
    if(!is.null(rugNumeric)) {
      rugnumer = TRUE
      if(constantN == FALSE) {
        warning(
          "We cannot color the rug by rugNumeric because not all",
          "\n  y variables have the same length.")
        rugnumer = FALSE
      } else {
        if(length(rugNumeric) != nn) {
          warning(paste0(
            "We cannot color the rug by rugNumeric,",
            "\n  because rugNumeric has length ", length(rugNumeric),
            "\n  and the y variables have length ", nn, ".\n"))
          rugnumer = FALSE
        }
      }
    }
  }
  #
  # Determine limits of x and y
  if(is.null(xlim)){
    xlim = c(min(xcoords) - 0.5, max(xcoords) + 0.5)
  }
  ymin = Inf; ymax = -Inf
  rugNum = rugFac = list()
  if(bigN < 300) bigN = 300
  for(j in seq_len(p)){
    y = ylist[[j]]
    if(length(y) >= 1){
      if(!(is.numeric(y))) stop(paste0("variable ",j,
                                       " is not numeric"))
      if(rugnumer == TRUE) rugNum[[j]] = rugNumeric
      if(rugfact  == TRUE) rugFac[[j]] = ints
      if(length(y) > bigN){
        set.seed(1)
        inds = sample(seq_len(length(y)), bigN, replace = FALSE)
        # In order to display the worst outliers:
        inds[1] = which.min(y)
        inds[2] = which.max(y)
        y = y[inds]
        if(rugnumer == TRUE) rugNum[[j]] = rugNum[[j]][inds]
        if(rugfact  == TRUE) rugFac[[j]] = rugFac[[j]][inds]
      }
      noNA = which(!is.na(y))
      y = y[noNA]
      if(rugnumer == TRUE) rugNum[[j]] = rugNum[[j]][noNA]
      if(rugfact  == TRUE) rugFac[[j]] = rugFac[[j]][noNA]
      ord = order(y)
      y = y[ord] # This sort is necessary. The names are permuted too.
      if(rugnumer == TRUE) rugNum[[j]] = rugNum[[j]][ord]
      if(rugfact  == TRUE) rugFac[[j]] = rugFac[[j]][ord]
      ylist[[j]] = y
      indy  = which(!is.na(y))
      tempy = y[indy]
      indy  = which(is.finite(tempy))
      tempy = tempy[indy]
      bwnum = 0
      if(length(tempy) > 1 && !(all(is.na(modeCol)) && all(is.na(bodyCol)))){
        bwnum = if(!is.numeric(bw)) density(unlist(tempy), bw = bw)$bw else bw
      }
      ymin  = min(ymin, min(tempy) - cut*bwnum)
      ymax  = max(ymax, max(tempy) + cut*bwnum)
    }
  }
  if(is.null(ylim)) { ylim = c(ymin, ymax) }
  if(add == FALSE){
    if(rugnumer == TRUE){ # make room for color bar
      layout(matrix(c(1, 2), ncol = 2), widths = c(1, colorbarW))
      maruser = par("mar")
      mymar = maruser
      mymar[4] = 1
      par(mar = mymar)
      colorpalette <- colorRampPalette(rugNumericColors)(100)
      ran = range(rugNumeric, na.rm = TRUE)
    }
    if(horizontal == FALSE){
      plot(NA, NA, type = "n",
           xaxt = "n", xlim = xlim, ylim = ylim,
           xaxs = xaxs, yaxs = yaxs,
           xlab = "", ylab = "", cex.axis = cex.axis)
      axis(side = 1, at = xpos, labels = names, las = las,
           cex.axis = cex.axis, tick = tick)
    } else {
      plot(NA, type = "n",
           yaxt = "n", xlim = ylim, ylim = xlim,
           xaxs = xaxs, yaxs = yaxs,
           xlab = "", ylab = "", cex.axis = cex.axis)
      axis(side = 2, at = xpos, labels = names, las = las,
           cex.axis = cex.axis, tick = tick, ylim = xlim)
    }
    if(ann == TRUE){
      title(main = main, line = line.main, cex.main = cex.main)
      title(xlab = xlab, line = line.lab, cex.lab = cex.lab)
      title(ylab = ylab, line = line.lab, cex.lab = cex.lab)
    }
  }
  outp = list()
  outp[["call"]] = match.call()
  outp[["p"]] = p
  #
  jj = 0 # counter for modeCol
  for(j in seq_len(p)){
    listj = list() # to add to outp list
    y = ylist[[j]]
    n = length(y)
    cat(paste0("\nVariable ", j, " has length = ", n, ":\n"))
    if(n < 1) { cat("we are skipping it.\n")
    } else { # draw this plot
      nuniq = length(unique(y))
      if((side == "both") && (j%%2 == 0)){
        ymintemp = min(c(y, ymintemp))
        ymaxtemp = max(c(y, ymaxtemp))
      } else{
        ymintemp = min(y)
        ymaxtemp = max(y)
      }
      # If bandwidth bw is a formula, turn it into a number:
      bwnum = bw
      if(n == 1) { bwnum = 0.5
      } else {
        if (!is.numeric(bw)) {
          bwnum = density(y, kernel = kernel, bw = bw)$bw
        }
        if(is.nan(bwnum)) bwnum <- 0.5
      }
      mykmax = floor(n/minN)
      if(!is.null(kmax)) mykmax = min(kmax, mykmax)
      mykmax = min(mykmax, 5)
      reject0 = FALSE
      if(mykmax > 1){ # else we don't need the dip test
        # Is there more than 1 mode:
        pval = round(diptest::dip.test(y)$p.value,4)
        cat(paste0("pvalue(dip.test) = ",pval,"\n"))
        if(pval <= diplevel) reject0 = TRUE
      }
      kopt = 1
      if(reject0 == TRUE){
        mykmax = max(1, min(mykmax, floor(nuniq/clusMinN)))
        cat(paste0("mykmax = ",mykmax,"\n"))
        if(mykmax >= 2){
          silwidths = rep(-2, mykmax)
          cluslist = list()
          cluslist[[1]] = NA
          for(kk in 2:mykmax){
            out = pamc1d(y, kk, clusMinN, maxit = 100,
                         verbose = FALSE)
            cluslist[[kk]] = out$clustering
            silh = cluster::silhouette(cluslist[[kk]], dist(y))
            silwidths[kk] = mean(silh[,3], na.rm = TRUE)
          }
          cat("Silhouette widths: ")
          print(round(silwidths[-1],3))
          if(max(silwidths) > 0){ # else we keep kopt = 1
            kopt = which.max(silwidths) # always >= 2
            clusvec = cluslist[[kopt]]
          }
        } # ends mykmax >= 2
      }
      cat(paste0("Selected k = ", kopt,".\n"))
      if(kopt > 1){
        mymodes = list()
        if(rugnumer == TRUE | rugfact == TRUE) myrugs = list()
        tempval = rep(NA, kopt)
        for(m in seq_len(kopt)){ # m=1
          mymodes[[m]] = y[which(clusvec == m)]
          if(rugnumer == TRUE) myrugs[[m]] = rugNum[[j]][which(clusvec == m)]
          if(rugfact  == TRUE) myrugs[[m]] = rugFac[[j]][which(clusvec == m)]
          names(mymodes[[m]]) = names(y)[which(clusvec == m)]
          tempval[m] = (mymodes[[m]])[1]
        }
        ordr = order(tempval) # to sort the modes
        cat("Cluster sizes:\n")
        print(t(as.matrix(table(clusvec))))
        sortedmodes = list()
        if(rugnumer == TRUE | rugfact == TRUE) sortedrugs = list()
        clustnew = rep(NA, n)
        for(kk in seq_len(kopt)){
          kkk = ordr[kk]
          sortedmodes[[kk]] = as.vector(mymodes[[kkk]])
          names(sortedmodes[[kk]]) = names(mymodes[[kkk]])
          if(rugnumer == TRUE) sortedrugs[[kk]] = as.vector(myrugs[[kkk]])
          if(rugfact  == TRUE) sortedrugs[[kk]] = as.vector(myrugs[[kkk]])
          clustnew[clusvec == kkk] = kk
        }
        clusvec = clustnew; rm(clustnew)
        names(clusvec) = names(y)
        listj[["clustering"]] = clusvec
        modelist = list()
        #
        # We have to compute the densities first, since their
        # relative widths will depend on the argument bodysize.
        densities = list()
        areas = heights = counts = rep(NA, kopt)
        for(m in seq_len(kopt)){ # loop over the modes
          ym = sortedmodes[[m]]
          counts[m] = length(ym)
          if(length(unique(ym)) < clusMinN){
            # This should never happen due to pamc
            densities[[m]] = NA
          } else { # for this mode:
            dens = NA
            # compute density of mode m:
            dens = compdens(ym, bwnum = bwnum, kernel = kernel,
                            cut = cut, cutmin = cutmin,
                            cutmax = cutmax)
            # we reuse the overall bandwidth for all modes!
            ymintemp = min(ymintemp, min(dens$x))
            ymaxtemp = max(ymaxtemp, max(dens$x))
            xrange = max(dens$x) - min(dens$x)
            area   = xrange * mean(dens$y, na.rm = T)
            dens$y = dens$y/area # so area is exactly 1
            areas[m] = xrange * mean(dens$y, na.rm = T)
            heights[m] = max(dens$y)
            densities[[m]] = dens
          } # ends density computation for this mode
        } # ends loop over modes
        factors = rep(1, kopt)
        if(bodysize == "width_is_constant"){
          factors = 1/heights
        }
        if(bodysize == "area_is_constant"){
          factors = rep(1, kopt)/max(heights)
        }
        if(bodysize == "area_from_count"){
          factors = (counts/max(counts))
          factors = factors/max(factors*heights)
        }
        factors[which(is.na(factors))] = 0
        ratios = heights*factors # for width of boxes and rug
        ratios = ratios/max(ratios)
      } # ends if(kopt > 1)
      #
      # Start plotting
      if(kopt == 1){ # Plot as a single cluster
        listj[["values"]] = y
        my5 = fivenum(y, na.rm=TRUE)
        names(my5) = NULL
        listj[["fivenumbersummary"]] = my5
        whiskers = "none"
        if(makewhiskers == TRUE) whiskers = "both"
        if(length(unique(y)) < clusMinN){ # should never happen
          drawpoints(y, coord = xcoords[j], bokw = boxW[j],
                     side = mysides[j], cex=0.6,
                     horizontal = horizontal, pointcol = par()$fg)
          if(boxW[j] > 0) {
            drawbox(y, coord = xcoords[j], horizontal = horizontal,
                    side = mysides[j], bokw = boxW[j],
                    bcol = boxCol[j], whiskers = "none") }
        } else { # For this variable:
          colin = rugCol[j]
          colout = rugoutCol[j]
          if(rugnumer == TRUE){
            mycols = round(99*(rugNum[[j]]-ran[1])/(ran[2]-ran[1])) + 1
            mycols[is.na(mycols)] = 0
            mycols = colorpalette[mycols]
            colin = colout = mycols
          }
          if(rugfact == TRUE){
            mycols = rugFac[[j]]
            mycols[is.na(mycols)] = 0
            mycols = rugFactorColors[mycols]
            colin = colout = mycols
          }
          if((!is.na(bodyCol[j]) | !is.na(curveCol[j])) && (bodyW[j] > 0)) {
            # Compute the density
            dens = compdens(y, bwnum = bwnum, kernel = kernel,
                            cut = cut, cutmin = cutmin,
                            cutmax = cutmax)
            ymintemp = min(ymintemp, min(dens$x))
            ymaxtemp = max(ymaxtemp, max(dens$x))
            dens$y = 0.5*bodyW[j]*dens$y/max(dens$y)
            drawdensity(side = mysides[j], dens, at = xcoords[j],
                        bodcol = bodyCol[j], curcol = curveCol[j],
                        curlwd = curveLwd, horizontal = horizontal)
          } else {
            colout = colin # whether fixed or variable
          }
          if (sum(is.na(colin)) == 0 && (rugW[j] > 0)) {
            set.seed(1) # so jittering always gives same result
            if(mysides[j] == "no"){
              drawrug2sided(y, side = mysides[j], jittering = jittering,
                            jitteramount = jitteramount, dens = dens,
                            at = xcoords[j], rugw = rugW[j],
                            colin = colin, colout = colout,
                            rugLwd = rugLwd, rugoutLwd = rugoutLwd,
                            horizontal = horizontal)
            } else { # mysides[j] is either "first" or "second"
              if(boxW[j] > 0){
                drawrug1sided(y, side = mysides[j], jittering = jittering,
                              jitteramount = jitteramount, at = xcoords[j],
                              rugw = rugW[j]/2, rugcenter = 0.27*boxW[j],
                              colin = colin, rugLwd = rugLwd,
                              horizontal = horizontal)
              }
            }
          }
          if(boxW[j] > 0) {
            drawbox(y, coord = xcoords[j], horizontal = horizontal,
                    side = mysides[j], bokw = boxW[j], lwd = boxLwd,
                    bcol = boxCol[j], whiskers = whiskers) }
        }
      } else { # kopt > 1
        for(m in seq_len(kopt)){ # plot all the modes
          ym = sortedmodes[[m]] # modes in increasing order
          my5 = fivenum(ym, na.rm=TRUE)
          names(my5) = NULL
          modelist[[paste0("cluster_",m)]] =
            list(members = ym, fivenumbersummary = my5)
          whiskers = "none"
          if(makewhiskers == TRUE){
            whiskers = "both"
            if(innerwhiskers == FALSE){
              whiskers = "none"
              if(m == 1) whiskers = "down"
              if(m == kopt) whiskers = "up"
            }
          }
          if(length(unique(ym)) < clusMinN){ # should never happen
            drawpoints(ym, coord = xcoords[j], bokw = boxW[j],
                       side = mysides[j], cex=0.6,
                       horizontal = horizontal, pointcol = par()$fg)
            if(boxW[j] > 0) drawbox(ym, coord = xcoords[j],
                                    horizontal = horizontal,
                                    side = mysides[j], bokw = boxW[j],
                                    bcol = boxCol[j], whiskers = "none")
          } else { # for this mode:
            thismodecol = bodyCol[j]
            if(colormodes == TRUE){
              jj = jj+1
              thismodecol = modeCol[jj] # can be NA
            }
            colin = rugCol[j]
            colout = rugoutCol[j]
            if(rugnumer == TRUE){
              mycols = round(99*(sortedrugs[[m]]-ran[1])/(ran[2]-ran[1])) + 1
              mycols[is.na(mycols)] = 0
              mycols = colorpalette[mycols]
              colin = colout = mycols
            }
            if(rugfact == TRUE){
              mycols = sortedrugs[[m]]
              mycols[is.na(mycols)] = 0
              mycols = rugFactorColors[mycols]
              colin = colout = mycols
            }
            if((!is.na(thismodecol) | !is.na(curveCol[j])) && (bodyW[j] > 0)) {
              dens = densities[[m]]
              dens$y = 0.5*bodyW[j]*factors[m]*dens$y
              drawdensity(side = mysides[j], dens, at = xcoords[j],
                          bodcol = thismodecol, curcol = curveCol[j],
                          curlwd = curveLwd, horizontal = horizontal)
            } else { # if we don't draw a density, the outside
              # rug color should coincide with the inside
              colout = colin # whether fixed or variable
            }
            if (sum(is.na(colin)) == 0 && (rugW[j] > 0)) {
              set.seed(1) # so jittering always gives same result
              if(mysides[j] == "no"){
                drawrug2sided(ym, side = mysides[j], jittering = jittering,
                              jitteramount = jitteramount, dens = dens,
                              at = xcoords[j], rugw = rugW[j],
                              colin = colin, colout = colout,
                              rugLwd = rugLwd, rugoutLwd = rugoutLwd,
                              horizontal = horizontal)
              } else { # mysides[j] is either "first" or "second"
                if(boxW[j] > 0){
                  drawrug1sided(ym, side = mysides[j], jittering = jittering,
                                jitteramount = jitteramount, at = xcoords[j],
                                rugw = rugW[j]/2,
                                rugcenter = 0.27*ratios[m]*boxW[j],
                                colin = colin, rugLwd = rugLwd,
                                horizontal = horizontal)
                }
              }
            }
            bokw = ratios[m]*boxW[j]
            if(bokw > 0) drawbox(ym, coord = xcoords[j],
                                 horizontal = horizontal,
                                 side = mysides[j], bokw = bokw,
                                 lwd = boxLwd, bcol = boxCol[j],
                                 whiskers = whiskers)
          }
        } # ends loop over modes m
        listj = c(listj, modelist)
      } # ends kopt > 1
    } # ends if(n >= 1)
    if(!is.na(stickCol[j]) && (stickLwd > 0) && (side == "both")){
      if((ymintemp < Inf) && (ymaxtemp > -Inf)){
        if(horizontal == FALSE){
          lines(c(xcoords[j], xcoords[j]), c(ymintemp, ymaxtemp),
                col = stickCol[j], lwd = stickLwd)
        } else {
          lines(c(ymintemp, ymaxtemp), c(xcoords[j], xcoords[j]),
                col = stickCol[j], lwd = stickLwd)
        }
      }
    } # ends stick
    name = names[j]
    outp[[name]] = listj # noquote(paste0("Variable ",j))
  } # ends loop over j
  if(add == FALSE){
    if(rugnumer == TRUE){ # plot color bar:
      mymar = maruser
      mymar[2] = 0
      par(mar = mymar)
      color_bar_matrix <- matrix(seq(0, 1, length.out = 100), nrow = 1)
      image(color_bar_matrix, col = colorpalette, axes=FALSE)
      # Add a y-axis to the color bar to show the values:
      # Use pretty() to get "nice" tick mark values:
      tickvalues <- pretty(ran, n = 5)
      # Calculate the corresponding positions on the 0-1
      # scale of the color bar
      tickpositions <- (tickvalues - ran[1])/(ran[2] - ran[1])
      # Use these positions and values to create the axis
      axis(4, at = tickpositions, labels = tickvalues,
           cex.axis = cex.colorbar, las = 1)
      box() # Add a box around the color bar
      # Reset the layout to default:
      layout(1)
      # Reset the margins to before:
      par(mar = maruser)
    }
  }
  if(plot == TRUE) { invisible(outp) } else { return(outp) }
}


pamc1d <- function(y, k, minsize = 4, countwhat = "unique",
                   stand = TRUE, maxit = 100, verbose = TRUE){
  # Constrained version of k-medoids clustering (partitioning
  # around medoids) for univariate data y.
  # This is a modification of an algorithm for constrained
  # k-means by Bradley-Bennett-Demiriz (2000, Microsoft report
  # MSR-TR-2000-65). The modifications include working with
  # medians instead of means, and providing a minsize constraint
  # on unique cases instead of any cases, to prevent placing
  # tied cases in separate clusters.
  #
  # Arguments
  # y       : univariate data
  # k       : specified number of clusters
  # minsize : lower bound on the size of all clusters
  # maxit   : maximum number of iterations in reassignment loop
  # verbose : TRUE will print intermediate steps.

  objL1 = function(yy, centers, clusvec){
    mycmat = abs(outer(as.vector(yy),centers,"-"))
    sum(mycmat*outer(clusvec, 1:length(centers), "=="))/length(yy)
  }

  if(!(countwhat %in% c("any", "unique"))) stop("invalid countwhat")
  y = sort(na.omit(y))
  if(length(unique(y)) < 2) stop("y has < 2 non-NA unique values, so no clustering")
  n <- length(y)
  if(is.null(names(y))) names(y) = seq_len(length(y))
  yraw = y
  if(stand == TRUE) y = scale(y) # possible since sd(y) > 0
  if(countwhat == "unique"){
    ydup = duplicated(y)
    nuniq = sum(ydup == FALSE)
    if(nuniq == n) countwhat = "any"
  }
  kminsize = k*minsize
  if(countwhat == "any"){
    if(kminsize > n) stop(paste0(
      "A partition with k = ",k," clusters that each\n",
      "have at least minsize = ", minsize, " members\n",
      "would require ", kminsize," data points, but\n",
      "there are only ", n, " data points."))
  } else { # if(countwhat == "unique")
    if(kminsize > nuniq) stop(paste0(
      "A partition with k = ",k," clusters that each\n",
      "have at least minsize = ", minsize, " unique members\n",
      "would require ", kminsize," unique data points, but\n",
      "there are only ", nuniq, " unique data points."))
  }
  objectives = list()
  set.seed(1)
  clusvec = cluster::pam(y, k, cluster.only = TRUE,
                         keep.diss=F, keep.data=F)
  if(verbose == TRUE){
    cat("Initial clustering vector:\n")
    print(clusvec) # for all n cases (whether unique or not)
    cat("Initial clustering table:\n")
    print(as.data.frame(t(as.matrix(table(clusvec)))),
          row.names = FALSE)
  }
  centers = rep(NA, k)
  bigenough = TRUE
  for(h in seq_len(k)){
    clush = which(clusvec == h)
    yh = y[clush]
    if(countwhat == "any"){
      if(length(yh) < minsize) bigenough = FALSE
    }
    if(countwhat == "unique"){
      if(length(unique(yh)) < minsize) bigenough = FALSE
    }
    centers[h] = median(yh)
  }
  obj = objL1(y, centers, clusvec)
  if(verbose == TRUE){
    cat(paste0("Unconstrained objective = ", obj, "\n"))
  }
  objectives = c(objectives, obj)
  num = 0
  iter = 1
  if(bigenough == FALSE){
    if(countwhat == "any"){
      num = 1
      while (iter <= maxit & num > 0){
        # The second condition says: stop when no assignment changes
        if(verbose) cat(paste0("\nIteration Number = ", iter,"\n"))
        cost.mat = abs(outer(as.vector(y),centers,"-"))
        # = matrix with distances of all cases to each center
        trans <- lpSolve::lp.transport(cost.mat = cost.mat,
                              direction = "min",
                              row.signs = rep("=", n),
                              row.rhs = rep(1, n),
                              col.signs = rep(">=", k),
                              col.rhs = rep(minsize, k))
        clusnew <- apply(trans$solution, 1, which.max)
        num <- sum(clusnew != clusvec)
        if(verbose == TRUE){
          cat(paste0("Number of assignment changes = ", num,"\n"))
        }
        if(num > 0){ # if not, we are done
          clusvec <- clusnew # update clustering
          if(verbose == TRUE){
            print(clusvec) # new clustering vector
            print(as.data.frame(t(as.matrix(table(clusvec)))),
                  row.names = FALSE)
          }
          # recompute centers:
          for(h in seq_len(k)){
            clush = which(clusvec == h)
            centers[h] = median(y[clush])
          }
          obj = objL1(y, centers, clusvec)
          if(verbose == TRUE){
            cat(paste0("objective = ", obj, "\n"))
          }
          objectives = c(objectives, obj)
          iter <- iter + 1
        }
      } # ends while
      if(min(table(clusvec)) < minsize){ # should never happen
        warning(paste0("Failed to satisfy the minsize condition for\n",
                       "all clusters. Try with lower k or minsize."))
      }
    } # ends countwhat == any
    if(countwhat == "unique"){
      mult = rep(1, nuniq)
      if(verbose == TRUE){
        cat(paste0("\nThere are ", nuniq, " unique cases out of ",
                   n," cases.\n"))
      }
      whichuni = which(ydup == FALSE)
      yuniq = y[whichuni]
      yrawuniq = yraw[whichuni]
      if(verbose == TRUE){
        cat("Unique cases:\n")
        print(clusvec[whichuni]*0 + yrawuniq)
      }
      clusuni = clusvec[whichuni]
      whichdup = which(ydup == TRUE)
      if(verbose == TRUE){
        cat("Duplicate cases:\n")
        print(clusvec[whichdup]*0 + yraw[whichdup])
      }
      dupofwhat = rep(NA, length(whichdup))
      for(i in seq_len(length(whichdup))){ # i = 10
        yi = y[whichdup[i]]
        dupofwhat[i] = min(which.min(abs(y[whichuni] - yi)))
        mult[dupofwhat[i]] = mult[dupofwhat[i]] + 1
      }
      if(verbose == TRUE){
        cat("They are duplicates of the cases:\n")
        whichof = whichuni[dupofwhat]
        print(clusvec[whichof]*0 + yraw[whichof])
      }
      num = 1
      while (iter <= maxit & num > 0){
        if(verbose) cat(paste0("\nIteration Number = ", iter,"\n"))
        cost.mat = abs(outer(as.vector(yuniq),centers,"-"))
        trans <- lp.transport(cost.mat = cost.mat,
                              direction = "min",
                              row.signs = rep("=", nuniq),
                              row.rhs = rep(1, nuniq),
                              col.signs = rep(">=", k),
                              col.rhs = rep(minsize, k))
        clusuninew <- apply(trans$solution, 1, which.max)
        num <- sum(clusuninew != clusuni) # number of assignment changes
        if(verbose == TRUE){
          cat(paste0("Number of assignment changes = ", num,"\n"))
        }
        if(num > 0){
          clusuni <- clusuninew # update clustering
          # turn clusuni back into clusvec:
          if(nuniq < n){
            clusvec[whichuni] = clusuni
            clusvec[whichdup] = clusuni[dupofwhat]
          } else { clusvec = clusuni }
          if(verbose == TRUE){
            print(clusvec) # true clustering vector
            print(as.data.frame(t(as.matrix(table(clusvec)))),
                  row.names = FALSE)
          }
          for(h in 1:k){
            clush = which(clusvec == h)
            centers[h] = median(y[clush])
          }
          obj = objL1(y, centers, clusvec)
          if(verbose == TRUE){
            cat(paste0("objective = ", obj, "\n"))
          }
          objectives = c(objectives, obj)
          iter <- iter + 1
        }
      } # ends while
      if(min(table(clusuni)) < minsize){ # should never happen
        warning(paste0("Failed to satisfy the minsize condition for\n",
                       "all clusters. Try with lower k or minsize."))
      }
    } # ends countwhat == unique
  } # ends bigenough == FALSE
  clustable = clusvec
  clustable = table(clustable)
  if(verbose == TRUE){
    cat("\nObjective function in iteration steps: \n")
    print(matrix(objectives, ncol=1))
    cat("\nFinal clustering vector:\n")
    print(clusvec)
    cat("\nFinal clustering table:\n")
    print(as.data.frame(t(as.matrix(clustable))), row.names = FALSE)
  }
  result <- list(iter = iter, converged = (num==0),
                 clustering = clusvec, obj = obj,
                 centers = centers, clustable = clustable)
  return(result)
}

Try the classmap package in your browser

Any scripts or data that you put into this service are public.

classmap documentation built on April 29, 2026, 5:10 p.m.