R/plotlyM.r
In Hmisc: Harrell Miscellaneous

Documented in plotlyM

#' plotly Multiple
#'
#' Generates multiple plotly graphics, driven by specs in a data frame
#'
#' Generates multiple \code{plotly} traces and combines them with \code{plotly::subplot}.  The traces are controlled by specifications in data frame \code{data} plus various arguments.  \code{data} must contain these variables: \code{x}, \code{y}, and \code{tracename} (if \code{color} is not an "AsIs" color such as \code{~ I('black')}), and can contain these optional variables: \code{xhi}, \code{yhi} (rows containing \code{NA} for both \code{xhi} and \code{yhi} represent points, and those with non-\code{NA} \code{xhi} or \code{yhi} represent segments, \code{connect} (set to \code{TRUE} for rows for points, to connect the symbols), \code{legendgroup} (see \code{plotly} documentation), and \code{htext} (hovertext).  If the \code{color} argument is given and it is not an "AsIs" color, the variable named in the \code{color} formula must also be in \code{data}.  Likewise for \code{size}.  If the \code{multplot} is given, the variable given in the formula must be in \code{data}.  If \code{strata} is present, another level of separate plots is generated by levels of \code{strata}, within levels of \code{multplot}.
#'
#' If \code{fitter} is specified, x,y coordinates for an individual plot are
#' run through \code{fitter}, and a line plot is made instead of showing data points.  Alternatively you can specify \code{fitter='ecdf'} to compute and plot emirical cumulative distribution functions.
#'
#' @param data input data frame
#' @param x formula specifying the x-axis variable
#' @param y formula for y-axis variable
#' @param xhi formula for upper x variable limits (\code{x} taken to be lower value)
#' @param yhi formula for upper y variable limit (\code{y} taken to be lower value)
#' @param htext formula for hovertext variable
#' @param multplot formula specifying a variable in \code{data} that when stratified on produces a separate plot
#' @param strata formula specifying an optional stratification variable
#' @param fitter a fitting such as \code{loess} that comes with a \code{predict} method.  Alternatively specify \code{fitter='ecdf'} to use an internal function for computing and displaying ECDFs, which moves the analysis variable from the y-axis to the x-axis
#' @param color \code{plotly} formula specifying a color variable or e.g. \code{~ I('black')}.  To keep colors constant over multiple plots you will need to specify an AsIs color when you don't have a variable representing color groups.
#' @param size \code{plotly} formula specifying a symbol size variable or AsIs
#' @param showpts if \code{fitter} is given, set to \code{TRUE} to show raw data points in addition to smooth fits
#' @param rotate set to \code{TRUE} to reverse the roles of \code{x} and \code{y}, for example to get horizontal dot charts with error bars
#' @param xlab x-axis label.  May contain html.
#' @param ylab a named vector of y-axis labels, possibly containing html (see example below).  The names of the vector must correspond to levels of the \code{multplot} variable.  \code{ylab} can be unnamed if \code{multplot} is not used.
#' @param ylabpos position of y-axis labels.  Default is on top left of plot.  Specify \code{ylabpos='y'} for usual y-axis placement.
#' @param xlim 2-vector of x-axis limits, optional
#' @param ylim 2-vector of y-axis limits, optional
#' @param shareX specifies whether x-axes should be shared when they align vertically over multiple plots
#' @param shareY specifies whether y-axes should be shared when they align horizontally over multiple plots
#' @param nrows the number of rows to produce using \code{subplot}
#' @param ncols the number of columns to produce using \code{subplot} (specify at most one of \code{nrows,ncols})
#' @param height height of the combined image in pixels
#' @param width width of the combined image in pixels
#' @param colors the color palette.  Leave unspecified to use the default \code{plotly} palette
#' @param alphaSegments alpha transparency for line segments (when \code{xhi} or \code{yhi} is not \code{NA})
#' @param alphaCline alpha transparency for lines used to connect points
#' @param digits number of significant digits to use in constructing hovertext
#' @param zeroline set to \code{FALSE} to suppress vertical line at x=0
#'
#' @return \code{plotly} object produced by \code{subplot}
#' @author Frank Harrell
#' @examples
#' \dontrun{
#' set.seed(1)
#' pts     <- expand.grid(v=c('y1', 'y2', 'y3'), x=1:4, g=c('a', 'b'), yhi=NA,
#'                        tracename='mean', legendgroup='mean',
#'                        connect=TRUE, size=4)
#'
#' pts$y   <- round(runif(nrow(pts)), 2)
#'
#' segs     <- expand.grid(v=c('y1', 'y2', 'y3'), x=1:4, g=c('a', 'b'),
#'                         tracename='limits', legendgroup='limits',
#'                         connect=NA, size=6)
#' segs$y   <- runif(nrow(pts))
#' segs$yhi <- segs$y + runif(nrow(pts), .05, .15)
#'
#' z <- rbind(pts, segs)
#'
#' xlab <- labelPlotmath('X<sub>12</sub>', 'm/sec<sup>2</sup>', html=TRUE)
#' ylab <- c(y1=labelPlotmath('Y1', 'cm', html=TRUE),
#'           y2='Y2',
#'           y3=labelPlotmath('Y3', 'mm', html=TRUE))
#'
#' W=plotlyM(z, multplot=~v, color=~g, xlab=xlab, ylab=ylab, ncols=2,
#'           colors=c('black', 'blue'))
#'
#' W2=plotlyM(z, multplot=~v, color=~I('black'), xlab=xlab, ylab=ylab,
#'            colors=c('black', 'blue'))
#' 
#' }
#' @export
plotlyM <- function(data, x=~x, y=~y, xhi=~xhi, yhi=~yhi, htext=NULL,
                    multplot=NULL, strata=NULL, fitter=NULL,
                    color=NULL, size=NULL,
                    showpts=! length(fitter),
                    rotate=FALSE, xlab=NULL, ylab=NULL,
                    ylabpos=c('top', 'y'),
                    xlim=NULL, ylim=NULL,
                    shareX=TRUE, shareY=FALSE, height=NULL, width=NULL,
                    nrows=NULL, ncols=NULL,
                    colors=NULL, alphaSegments=1, alphaCline=0.3, digits=4,
                    zeroline=TRUE) {
  
  if (!requireNamespace("plotly"))
    stop("This function requires the 'plotly' package.")
  auto <- .Options$plotlyauto
  if(length(auto) && auto) height <- width <- NULL
  
  ylabpos <- match.arg(ylabpos)

  if(rotate) {
    xf  <- y   #~ y
    yf  <- x   #~ x
    xfe <- yhi #~ yhi
    yfe <- xhi #~ xhi
    
  } else {
    xf  <- x    #~ x
    yf  <- y    #~ y
    xfe <- xhi  #~ xhi
    yfe <- yhi  #~ yhi
  }

  xn   <- all.vars(xf) #x)
  yn   <- all.vars(yf) #y)
  xhin <- all.vars(xfe) #xhi)
  yhin <- all.vars(yfe) #yhi)

  n <- nrow(data)
  if(! length(multplot)) {
    multplot <- ~ .v.
    data$.v. <- rep(' ', n)
  } else data$.v. <- data[[all.vars(multplot)]]

  vlevs  <- levels(as.factor(data$.v.))
  lastv  <- vlevs[length(vlevs)]

  strpres <- length(strata) > 0
  strata <- if(strpres) as.factor(data[[all.vars(strata)]])
            else
              as.factor(rep('', nrow(data)))
  stlevs <- levels(strata)
  lasts  <- stlevs[length(stlevs)]
  if(! length(nrows) && ! length(ncols) && strpres)
    ncols <- length(stlevs)

  if(length(ylab) && ! length(names(ylab))) names(ylab) <- vlevs
  if(! length(ylab)) ylab <- structure(vlevs, names=vlevs)

  fmt <- function(x) htmlSN(x, digits=digits)

  nam <- names(data)
  if(xhin  %nin% nam)         data[[xhin]]     <- rep(NA, n)
  if(yhin  %nin% nam)         data[[yhin]]     <- rep(NA, n)
  if('connect' %nin% nam)     data$connect     <- rep(FALSE, n)
  if('tracename' %in% nam && 'legendgroup' %nin% nam)
    data$legendgroup <- data$tracename

  if(length(color)) {
    ## ~ I('black') will not show inherits('AsIs') but all.vars is char(0)
    colasis         <- ! length(all.vars(color))
    traceform       <- if(colasis) ~ tracename
    legendgroupform <- if(colasis) ~ legendgroup
    colvar          <- if(! colasis) all.vars(color)
  }
  else if(length(size)) {
    sizeasis        <- ! length(all.vars(color))
    traceform       <- if(sizeasis) ~ tracename
    legendgroupform <- if(sizeasis) ~ legendgroup
    sizevar         <- if(! sizeasis) all.vars(size)
    }
  else {
    traceform       <- if('tracename'   %in% nam) ~ tracename
    legendgroupform <- if('legendgroup' %in% nam) ~ legendgroup
    colasis         <- FALSE
    colvar          <- NULL
    sizeasis        <- FALSE
    sizevar         <- NULL
    }
  
  if(length(color)) legendgroupform <- color

  usualfitter <- length(fitter) && is.function(fitter)
  is.ecdf     <- length(fitter) && is.character(fitter) && fitter == 'ecdf'
  xpresent    <- ! is.ecdf
  
  runfit <- if(usualfitter) function() {
    xv <- all.vars(xf)
    yv <- all.vars(yf)
    x  <- pt[[xv]]
    y  <- pt[[yv]]
    g  <- if(length(colvar))
            pt[[colvar]] else rep('', nrow(pt))
    g  <- as.factor(g)
    d  <- data.frame(x, y, g)
    Dp <- NULL
    xgrid <- seq(min(x, na.rm=TRUE), max(x, na.rm=TRUE), length=150)
    dx <- data.frame(x = xgrid)
    for(gv in levels(g)) {
      f  <- fitter(y ~ x, data=subset(d, g == gv))
      y  <- predict(f, newdata=dx)
      dp <- cbind(dx, y, g=gv)
      Dp <- rbind(Dp, dp)
    }
    names(Dp) <- c(xv, yv, if(length(colvar)) colvar else 'g')
    Dp
  }
  else
    if(is.ecdf) function() {
      yv <- all.vars(xf)
      y  <- pt[[yv]]
      g  <- if(length(colvar))
              pt[[colvar]] else rep('', nrow(pt))
      g  <- as.factor(g)
      Dp <- NULL
      rng <- range(y, na.rm=TRUE)
      for(gv in levels(g)) {
        j    <- g == gv & ! is.na(y)
        yg   <- sort(y[j])
        n    <- length(yg)
        vals <- unique(yg)   # see stats::ecdf
        a <- approx(vals, cumsum(tabulate(match(yg, vals))) / n,
                    method='constant', yleft=0, yright=1, f=0,
                    ties='ordered', xout=vals)
        delta <- diff(rng) * 0.025
        a$x   <- c(min(a$x) - delta, a$x, max(a$x) + delta)
        a$y   <- c(0, a$y, 1)
        dp <- data.frame(x = a$x, y = a$y, g=gv)
        Dp <- rbind(Dp, dp)
      }
      names(Dp) <- c(yv, 'ecdf', if(length(colvar)) colvar else 'g')
      Dp
    }

  xlabc <- if(length(xlab)) paste0(xlab, ': ')
  llab <- ifelse('tracename' %in% nam,
                 as.character(data$tracename), 'Limits')
  wl <- function(n, hin)
    paste0(xlabc, fmt(data[[hin]]),
           '<br>', llab, ':[',
           fmt(data[[n]]), ', ',
           fmt(data[[hin]]), ']')
  
    if(! length(htext)) { 
      nhi <- is.na(data[[xhin]]) + is.na(data[[yhin]])
      whi <- ifelse(nhi == 2, 'xy',       ## which vars missing hi?
                    ifelse(nhi == 0, '',
                           ifelse(is.na(data[[xhin]]), 'x', 'y')))
    data$htxt <- ifelse(whi == 'xy',
                        paste0(xlabc, fmt(data[[xn]]),
                               '<br>',
                               ylab[data$.v.], ':', fmt(data[[yn]])),
                 ifelse(whi == 'x', wl(yn, yhin),
                 ifelse(whi == 'y', wl(xn, xhin),
                        paste0(xlabc, fmt(data[[xn]]),
                               '<br>', xn, ' ', llab, ': [',
                               fmt(data[[xn]]), ', ', fmt(data[[xhin]]), ']',
                               '<br>', yn, ' ', llab, ': [',
                               fmt(data[[yn]]), ', ', fmt(data[[yhin]]), ']'))))
    htext <- ~ htxt
    }
  p <- plotly::plot_ly(height=height, width=width, colors=colors)
  ## For some reason colors doesn't always take in add_*
  P <- list()
  iv <- 0
#  axislab <- character(0)
#  axn1 <- if(rotate) 'yaxis' else 'xaxis'
#  axn2 <- if(rotate) 'xaxis' else 'yaxis'

  for(vn in vlevs) {
    for(sn in stlevs) {
      iv <- iv + 1
      whichaxis <- if(iv == 1) '' else iv
      if(is.ecdf) {
        ax1 <- ylab[vn]
        ax2 <- 'Cumulative Probability'
        xn  <- yn
        xf  <- yf
        yf  <- ~ ecdf
      } else {
        ax1 <- if(rotate) ylab[vn] else xlab
        ax2 <- if(rotate) xlab else ylab[vn]
      }

      w  <- subset(data, .v. == vn & strata == sn)
      wxn <- w[[xn]]  # if(xpresent) w[[xn]] else 1 : nrow(w)
      j <- if(length(colvar)) order(w[[colvar]], wxn)
           else
             if(length(sizevar)) order(w[[sizevar]], wxn)
           else order(wxn)
      w <- w[j, ]
      r <- p
      ipt <- is.na(w[[yhin]]) & is.na(w[[xhin]])
      pt  <- w[ipt, ]
      conct <- is.logical(pt$connect) && pt$connect[1]
      if(nrow(pt)) {
        if(length(fitter)) {
          Dp <- runfit()
          r <- plotly::add_lines(r, data=Dp, x=xf, y=yf,
                                 name=traceform, legendgroup=legendgroupform,
                                 showlegend=vn==lastv & sn==lasts,
                                 color=color, size=size,
                                 colors=colors,
                                 line=if(is.ecdf) list(shape='hv'))
        }
        if(showpts) {
          r <- plotly::add_markers(r, data=pt, x=xf, y=yf,
                                   name=traceform, legendgroup=legendgroupform,
                                   showlegend=vn==lastv & sn==lasts,
                                   color=color, size=size,
                                   text=htext, hoverinfo='text', colors=colors)
          if(conct)
            r <- plotly::add_lines(r, data=pt, x=xf, y=yf,
                                   name=traceform, legendgroup=legendgroupform,
                                   showlegend=FALSE, color=color,
                                   size=I(1),
                                   hoverinfo='none', colors=colors, alpha=alphaCline)
        }
      }
      
      s <- w[! ipt, ]
        
      if(nrow(s)) {
        ## If only one of xhi and yhi is missing, need to copy non-NA
        ## value from x/y.  Must go to extra trouble to preserve factors
        m <- is.na(s[[xhin]])
        if(any(m)) {
          a <- s[[xn]]
          a[! m] <- s[! m, xhin]
          s[[xhin]] <- a
        }
        m <- is.na(s[[yhin]])
        if(any(m)) {
          a <- s[[yn]]
          a[! m] <- s[! m, yhin]
          s[[yhin]] <- a
          }

        r <-
          plotly::add_segments(r, data=s, x=xf, y=yf, xend=xfe, yend=yfe,
                               name=traceform, legendgroup=legendgroupform,
                               showlegend=vn==lastv & sn==lasts,
                               color=color, size=size,
                               colors=colors, alpha=alphaSegments,
                               text=htext, hoverinfo='text')
        }
      ## rdocumentation.org/packages/plotly/versions/4.7.1/topics/add_annotations
      ## https://plot.ly/r/text-and-annotations/
			## plot.ly/r/text-and-annotations/#set-annotation-coordinate-references      
      firstst <- length(stlevs) > 1 && vn == vlevs[1]
      if(firstst || ylabpos == 'top') {
        lab <- ax2
        if(firstst) lab <- paste0(lab, '<br>', sn)
        r <- plotly::add_annotations(r,  x=0, y=1,
                                     xref='paper', xanchor='left',
                                     yref='paper', yanchor='bottom',
                                     text=paste0('<b>', lab, '</b>'),
                                     showarrow=FALSE,
                                     font=list(color='rgba(25, 25, 112, 1.0)',
                                               size=14))
        ## midnight blue
        }
      r <- plotly::layout(r, xaxis=list(title=ax1, range=xlim,
                                        zeroline=zeroline),
                          yaxis=list(title=if(ylabpos == 'y') ax2 else '',
                                     range=ylim))
      P[[iv]] <- r
    }
  }
  if(length(ncols)) nrows <- ceil(iv / ncols)
  if(length(stlevs) > 1) shareY <- TRUE
  if(length(P) == 1) P <- P[[1]]
  else {
    P <- if(length(nrows))
           plotly::subplot(P, shareX=shareX, shareY=shareY,
                           titleX=TRUE, titleY=TRUE, nrows=nrows)
         else
           plotly::subplot(P, shareX=shareX, shareY=shareY,
                           titleX=TRUE, titleY=TRUE)
    }
  P
}
utils::globalVariables('.v.')