mplot: Graphical Model Stability and Variable Selection Procedures

Documented in plot.vis vis

#' Model stability and variable inclusion plots
#'
#' Calculates and provides the plot methods for standard
#' and bootstrap enhanced model stability plots (\code{lvk} and
#' \code{boot}) as well as variable inclusion plots (\code{vip}).
#'
#' @param mf a fitted 'full' model, the result of a call
#'   to lm or glm (and in the future lme or lmer)
#' @param nvmax size of the largest model that can still be
#'   considered as a viable candidate
#' @param B number of bootstrap replications
#' @param lambda.max maximum penalty value for the vip plot,
#'   defaults to 2*log(n)
#' @param use.glmulti logical. Whether to use the glmulti package
#'   instead of bestglm. Default \code{use.glmulti=FALSE}.
#' @param nbest maximum number of models at each model size
#'   that will be considered for the lvk plot. Can also take
#'   a value of \code{"all"} which displays all models.
#' @param cores number of cores to be used when parallel
#'   processing the bootstrap
#' @param force.in the names of variables that should be forced
#'   into all estimated models. (Not yet implemented.)
#' @param screen logical, whether or not to perform an initial
#'   screen for outliers.  Highly experimental, use at own risk.
#'   Default = \code{FALSE}.
#' @param redundant logical, whether or not to add a redundant
#'   variable.  Default = \code{TRUE}.
#' @param seed random seed for reproducible results
#' @param ... further arguments (currently unused)
#' @details The result of this function is essentially just a
#'   list. The supplied plot method provides a way to visualise the
#'   results.
#'
#'   See \code{?plot.vis} or \code{help("plot.vis")} for details of the
#'   plot method associated with the result.
#'
#' @seealso \code{\link{plot.vis}}
#' @references Mueller, S. and Welsh, A. H. (2010), On model
#'   selection curves. International Statistical Review, 78:240-256.
#'   doi: 10.1111/j.1751-5823.2010.00108.x
#'
#'   Murray, K., Heritier, S. and Mueller, S. (2013), Graphical
#'   tools for model selection in generalized linear models.
#'   Statistics in Medicine, 32:4438-4451. doi: 10.1002/sim.5855
#'   
#'   Tarr G, Mueller S and Welsh AH (2018). mplot: An R Package for 
#'   Graphical Model Stability and Variable Selection Procedures. 
#'   Journal of Statistical Software, 83(9), pp. 1-28. doi: 10.18637/jss.v083.i09
#' @export
#' @import foreach
#' @import parallel
#' @examples
#' n = 100
#' set.seed(11)
#' e = rnorm(n)
#' x1 = rnorm(n)
#' x2 = rnorm(n)
#' x3 = x1^2
#' x4 = x2^2
#' x5 = x1*x2
#' y = 1 + x1 + x2 + e
#' dat = data.frame(y, x1, x2, x3, x4, x5)
#' lm1 = lm(y ~ ., data = dat)
#' \dontshow{
#' v1 = vis(lm1, B = 5, cores = 1, seed = 1)
#' plot(v1, highlight = "x1", which = "lvk")
#' plot(v1, which = "boot")
#' plot(v1, which = "vip")
#' }
#' \dontrun{
#' v1 = vis(lm1, seed = 1)
#' plot(v1, highlight = "x1", which = "lvk")
#' plot(v1, which = "boot")
#' plot(v1, which = "vip")
#' }

vis = function(mf,
               nvmax,
               B = 100,
               lambda.max,
               nbest = "all",
               use.glmulti = FALSE,
               cores,
               force.in = NULL,
               screen = FALSE,
               redundant = TRUE,
               seed = NULL,
               ...) {
  # redundant not supported with glmulti yet
  if (use.glmulti) {
    if (!requireNamespace("mvoutlier", quietly = TRUE)) {
      stop("mvoutlier package needed when screen=TRUE. Please install it.",
           call. = FALSE
      )
    }
    redundant <- FALSE
  }
  
  set.seed(seed)
  m <- mextract(mf,
                screen = screen,
                redundant = redundant
  )
  fixed <- m$fixed
  yname <- m$yname
  fam <- m$family
  X <- m$X
  kf <- m$k
  n <- m$n
  n.obs <- n
  
  initial.weights = m$wts
  if (missing(nvmax))
    nvmax = kf
  if (nbest == "all") {
    if(kf > 15) {
      cat('\nUsing nbest = "all" is not currently allowed when the full\nmodel has more than 15 variables.  Defaulting to nbest = 1 
          \n')
      nbest = 1
    } else {
      nbest = 2 ^ (kf - 1) 
    }
    # nbest needs to be whole model space because
    # bestglm takes option TopModels which is the
    # top models to show overall
    # Using leaps, you can specify the number of
    # best models of each model size, so the max
    # would be:
    # max(choose((kf-1),0:(kf-1)))
    # this behaviour (keeping so many nbest models)
    # could cause issues for large parameter spaces
    # consider maxing out at a million?
  }
  if (!is.numeric(nbest)) {
    stop("nbest should be numeric or 'all'")
  }
  
  add.intercept.row = function(em, rs.which, rs.stats) {
    # add an intercept row
    rs.which = rbind(c(1, rep(0, dim(rs.which)[2] - 1)), rs.which)
    # used for testing
    # f0 = stats::as.formula(paste(yname," ~ 1"))
    # lm0 = stats::lm(formula=f0,data=X)
    i = 1 # special case for the null model
    intercept = TRUE
    n1 = em$nn - intercept
    sigma2 = em$sserr / (n1 + intercept - em$last)
    nullrss = em$nullrss
    ress = em$nullrss
    # ress = sum(resid(lm0)^2) # used for testing
    vr = ress / nullrss
    rsq = 1 - vr
    adjr2 = 1 - vr * n1 / (n1 + intercept - i)
    cp = ress / sigma2 - (n1 + intercept - 2 * i)
    # the way leaps calculates the bic:
    bic = (n1 + intercept) * log(vr) + i * log(n1 + intercept)
    logLikelihood = -(em$nn + em$nn * log(2 * pi) + em$nn * log(em$nullrss /
                                                                  em$nn)) / 2
    rs.stats = rbind(c(logLikelihood, ress, bic, cp, rsq, adjr2, i), rs.stats)
    return(cbind(rs.which, rs.stats))
  }
  
  ## Initial single pass
  ## (gives the minimum envelopping set of models)
  if (any(class(mf) == "glm") == TRUE & !use.glmulti) {
    stop(!("bestglm" %in% rownames(utils::installed.packages())))
    # no redundant variable
    # Xy = X 
    # Xy$REDUNDANT.VARIABLE = NULL # do want to keep it in
    em = bestglm::bestglm(
      Xy = X,
      family = fam,
      IC = "AIC",
      TopModels = nbest+1,
      nvmax = nvmax,
      weights = initial.weights
    )
    # has an intercept row
    rs.which = em$BestModels[, 1:(kf - 1)] + 0
    k = rowSums(rs.which) + 1
    rs.ll = -(em$BestModels$Criterion - 2 * (k - 1)) / 2
    rs.bic = -2 * rs.ll + k * log(n)
    rs.aic = -2 * rs.ll + 2 * k
    rs.stats = cbind(rs.ll, rs.bic, rs.aic, k)
    colnames(rs.stats) = c("logLikelihood", "bic", "aic", "k")
    rs.all = cbind(1, rs.which, rs.stats)
    # in bestglm rs.all$logLikelihood comes from
    # stats::logLik(model) unless Gaussian in which case
    # -(n/2) * log(sum(resid(ans)^2)/n) is used
    # note the bic in bestglm is calculated as:
    # -2*rs.all$logLikelihood + log(n)*(rs.all$k-1)
  } 
  if (any(class(mf) == "glm") == TRUE & use.glmulti) {
    glmulti.trans = function(x) {
      form.list = lapply(x@formulas, all.vars)
      vars = sort(unique(unlist(form.list)))
      res.mat = data.matrix(sapply(vars, function(x)
        lapply(form.list, function(y)
          1 * any(is.element(x, y)))))
      res.mat = data.frame(res.mat, ll = x@crits, k = x@K)
    }
    mf <<- mf
    initial.weights <<- initial.weights
    
    dryrun = glmulti::glmulti(
      stats::formula(mf),
      level = 1,
      marginality = TRUE,
      data = stats::model.frame(mf),
      method = "d",
      fitfunction = "glm",
      plotty = FALSE,
      report = FALSE,
      includeobjects = FALSE,
      family = stats::family(mf)[[1]],
      weights = mf$initial.weights
    )
    
    dryrun <<- as.numeric(dryrun)
    
    n = stats::nobs(mf)
    
    em = glmulti::glmulti(
      stats::formula(mf),
      data = stats::model.frame(mf),
      level = 1,
      marginality = TRUE,
      method = "h",
      confsetsize = dryrun,
      fitfunction = "glm",
      crit = "ll",
      plotty = FALSE,
      report = FALSE,
      includeobjects = FALSE,
      family = stats::family(mf)[[1]],
      weights = mf$initial.weights
    )
    rs.temp = glmulti.trans(em)
    var.order = all.vars(stats::formula(mf))
    rs.all = rs.temp[, var.order]
    rs.all$logLikelihood = rs.temp$ll # -(rs.temp$bic - log(n)*(rs.temp$k))/2
    rs.all$bic = -2 * rs.all$logLikelihood +  log(n) * (rs.temp$k)# -2*rs.all$logLikelihood + rs.temp$k*log(n)
    rs.all$aic = -2 * rs.all$logLikelihood +  2 * (rs.temp$k) #rs.temp$aic
    rs.all$k = rs.temp$k
  } 
  if(any(class(mf)=="lm") & length(class(mf))==1) {
    wts = initial.weights
    em = leaps::regsubsets(
      x = fixed,
      data = X,
      weights = wts,
      nbest = nbest,
      nvmax = nvmax,
      intercept = TRUE,
      force.in = force.in,
      really.big = TRUE
    )
    rs = summary(em)
    # does not start with intercept row
    rs.which = data.frame(rs$which + 0, row.names = NULL)
    k = rowSums(rs.which)
    # assuming Gaussian errors:
    logLikelihood = -(n + n * log(2 * pi) + n * log(rs$rss / n)) / 2
    rs.stats = cbind(logLikelihood, rs$rss, rs$bic,
                     rs$cp, rs$rsq, rs$adjr2, k)
    colnames(rs.stats) = c("logLikelihood", "rss", "bic",
                           "cp", "rsq", "adjr2", "k")
    rs.all = add.intercept.row(em, rs.which, rs.stats)
    # note that the BIC in leaps (and add.intercept.row funtion)
    # differs from the bestglm BIC buy a constant
  }
  
  if (use.glmulti) {
    nms = base::all.vars(stats::formula(mf))
    nms = nms[nms != yname]
    nm = apply(rs.all[, nms], 1, function(x) {
      Reduce(paste, paste(nms[x == 1], sep = "", collapse = "+"))
    })
  } else {
    nms = colnames(rs.all)[2:kf]
    nm = apply(rs.all[, 2:kf], 1, function(x) {
      Reduce(paste, paste(nms[x == 1], sep = "", collapse = "+"))
    })
  }
  nm[nm == ""] = "1"
  nm = paste(yname, "~", nm, sep = "")
  nm = gsub(pattern = " ",
            replacement = "",
            x = nm)
  nm = gsub(pattern = "REDUNDANT.VARIABLE", "RV", x = nm)
  res.single.pass = base::data.frame(base::data.matrix(rs.all))
  res.single.pass$name = nm
  
  # Bootstrap run -----------------------------------------------------------
  
  if (missing(cores))
    cores = max(detectCores() - 1, 1)
  cl.visB = parallel::makeCluster(cores)
  doParallel::registerDoParallel(cl.visB)
  if (any(class(mf) == "glm") == TRUE & !use.glmulti) {
    
    stop(!("bestglm" %in% rownames(utils::installed.packages())))
    
    res = foreach::foreach(
      b = 1:B, 
      .packages = c("bestglm"),
      .options.RNG = seed #,
      # .export = c("n.obs") # don't seem to need to export this
      # and get a warning when we do
      ) %dorng% {
        wts = stats::rexp(n = n.obs, rate = 1) * initial.weights
        
        em = bestglm::bestglm(
          Xy = X,
          family = fam,
          IC = "BIC",
          TopModels = 1,
          weights = wts,
          nvmax = nvmax
        )
        # starts with intercept row
        rs.which = em$Subsets[, 1:kf] + 0
        rs.stats = em$Subsets[,-c(1:kf)]
        k = rowSums(rs.which)
        rs.all = cbind(rs.which, rs.stats, k)
        # in bestglm rs.all$logLikelihood comes from
        # stats::logLik(model) unless Gaussian in which case
        # -(n/2) * log(sum(resid(ans)^2)/n) is used
        # note the bic in bestglm is calculated as:
        # -2*rs.all$logLikelihood + log(n)*(rs.all$k-1)
      }
  } else if (any(class(mf) == "glm") == TRUE & use.glmulti) {
    res = foreach(b = 1:B, 
                  .packages = c("glmulti", "dplyr"),
                  .options.RNG=seed) %dorng% {
                    mf <<- mf
                    initial.weights <<- initial.weights
                    n.obs <<- n.obs
                    dryrun <<- dryrun
                    
                    em <-
                      glmulti::glmulti(
                        stats::formula(mf),
                        level = 1,
                        # No interaction considered
                        marginality = TRUE,
                        data = stats::model.frame(mf),
                        method = "h",
                        # Exhaustive approach
                        crit = "ll",
                        # AIC as criteria
                        confsetsize = dryrun,
                        # Keep 5 best models
                        plotty = FALSE,
                        report = FALSE,
                        # No plot or interim reports
                        fitfunction = "glm",
                        # glm function
                        includeobjects = FALSE,
                        family = stats::family(mf)[[1]],
                        weights = I(stats::rexp(n = n.obs, rate = 1) *
                                      initial.weights)
                      )
                    
                    rs.temp = glmulti.trans(em)
                    var.order = all.vars(stats::formula(mf))
                    rs.all = rs.temp[, var.order]
                    rs.all$logLikelihood = rs.temp$ll #-(rs.temp$aic - 2*(rs.temp$k))/2
                    rs.all$bic = -2 * rs.all$logLikelihood + rs.temp$k * log(n)
                    rs.all$aic = -2 * rs.all$logLikelihood + rs.temp$k * 2 #rs.temp$aic
                    rs.all$k = rs.temp$k
                    rs.all = dplyr::filter(dplyr::group_by(rs.all, k),
                                           logLikelihood == max(logLikelihood))
                    rs.all = base::data.frame(base::data.matrix(rs.all))
                    
                  }
  } else {
    res = foreach(b = 1:B, 
                  .packages = c("leaps"),
                  .options.RNG = seed# ,
                  # .export = c("n.obs") # don't seem to need to export this
                  # and get a warning when we do
                  ) %dorng% {
                    wts = stats::rexp(n = n.obs, rate = 1) * initial.weights
                    em = leaps::regsubsets(
                      x = fixed,
                      data = X,
                      nbest = 1,
                      nvmax = nvmax,
                      intercept = TRUE,
                      force.in = force.in,
                      weights = wts,
                      really.big = TRUE
                    )
                    rs = summary(em)
                    # does not start with intercept row
                    rs.which = data.frame(rs$which + 0, row.names = NULL)
                    k = rowSums(rs.which)
                    # assuming Gaussian errors:
                    logLikelihood = -(n + n * log(2 * pi) + n * log(rs$rss / n)) / 2
                    rs.stats = cbind(logLikelihood, rs$rss, rs$bic,
                                     rs$cp, rs$rsq, rs$adjr2, k)
                    colnames(rs.stats) = c("logLikelihood", "rss", "bic",
                                           "cp", "rsq", "adjr2", "k")
                    rs.all = add.intercept.row(em, rs.which, rs.stats)
                    # note that the BIC in leaps (and add.intercept.row funtion)
                    # differs from the bestglm BIC buy a constant
                  }
  }
  parallel::stopCluster(cl.visB)
  
  ### Variable inclusion Plot Calculations
  if (missing(lambda.max))
    lambda.max = 2 * log(n)
  lambdas = seq(0, lambda.max, 0.01)
  
  if (use.glmulti) {
    nms = base::all.vars(stats::formula(mf))
    nms = nms[nms != yname]
    real.k = length(nms) + 1
  } else {
    real.k = kf
  }
  
  var.in = matrix(NA, ncol = real.k, nrow = length(lambdas))
  colnames(var.in) = colnames(res[[1]][1:real.k])
  
  rownames(var.in) = lambdas
  for (i in 1:length(lambdas)) {
    resl = lapply(res, function(x)
      - 2 * x$logLikelihood + lambdas[i] * x$k)
    min.pos = unlist(lapply(resl, which.min))
    temp.best = mapply(function(x, row) {
      x[row, 1:real.k]
    }, res, min.pos, SIMPLIFY = TRUE)
    temp.best = matrix(
      as.numeric(temp.best),
      nrow = dim(temp.best)[1],
      dimnames = dimnames(temp.best)
    )
    var.in[i, ] = rowSums(temp.best)
  }
  
  #### lvk where bubbles reflect frequencey of choice
  best.within.size = function(x) {
    rankings.within.size = unlist(stats::aggregate(-2 * x$logLikelihood, rank, by =
                                                     list(x$k))$x)
    return(x[rankings.within.size == 1, ])
  }
  res.best = lapply(res, best.within.size)
  
  res.df = do.call(rbind.data.frame, res.best)
  
  if (use.glmulti) {
    res.df = dplyr::group_by(res.df, k)
    res.df = dplyr::count_(res.df, vars = nms)
    res.df = base::data.frame(base::data.matrix(res.df))
    res.df$freq = res.df$n
    res.df$n = NULL
  } else {
    res.df = plyr::count(df = res.df, vars = 1:real.k)
    res.df$logLikelihood = NA
    res.df$name = NA
    res.df$k = rowSums(res.df[, 1:real.k])
  }
  
  
  # iterate over all required models on the original date
  # to obtain logLiks (in future could extract other stats)
  for (i in 1:dim(res.df)[1]) {
    ff = paste(yname, " ~ ",
               paste(colnames(res.df[2:real.k])[res.df[i, 2:real.k] == 1], collapse =
                       "+"), sep = "")
    if (ff == paste(yname, " ~ ", sep = "")) {
      ff = stats::as.formula(paste(yname, "~1"))
    } else {
      ff = stats::as.formula(ff)
    }
    # redo this to access previous estimated models
    if (any(class(mf) == "glm") == TRUE & !use.glmulti) {
      em = stats::glm(
        formula = ff,
        data = X,
        family = fam,
        weights = initial.weights
      )
    } else if (any(class(mf) == "glm") == TRUE & use.glmulti) {
      em = stats::glm(
        formula = ff,
        data = stats::model.frame(mf),
        family = fam,
        weights = initial.weights
      )
    } else {
      em = stats::lm(formula = ff,
                     data = X,
                     weights = initial.weights)
    }
    res.df$logLikelihood[i] = as.numeric(stats::logLik(em))
    nm = gsub(
      pattern = " ",
      replacement = "",
      x = Reduce(paste, deparse(ff))
    )
    nm = gsub(pattern = "REDUNDANT.VARIABLE", "RV", x = nm)
    res.df$name[i] = nm
  }
  
  res.df = res.df[with(res.df, order(k, -freq)), ]
  
  output = list(
    res.df = res.df,
    res.single.pass = res.single.pass,
    var.in = var.in,
    screen = screen,
    mextract = m,
    lambdas = lambdas,
    B = B,
    mf = mf,
    use.glmulti = use.glmulti
  )
  
  class(output) = "vis"
  return(output)
}




#' Plot diagnostics for a vis object
#'
#' A plot method to visualise the results of a \code{vis} object.
#'
#' Specifying \code{which = "lvk"} generates a scatter plot where
#' the points correspond to description loss is plot against model size
#' for each model considered.  The \code{highlight} argument is
#' used to differentiate models that contain a particular variable
#' from those that do not.
#'
#' Specifying \code{which = "boot"} generates a scatter plot where
#' each circle represents a model with a non-zero bootstrap probability,
#' that is, each model that was selected as the best model of a
#' particular dimension in at least one bootstrap replication.
#' The area of each circle is proportional to the
#' corresponding model's bootstrapped selection probability.
#'
#' @param x \code{vis} object, the result of \code{\link{vis}}
#' @param highlight the name of a variable that will be highlighted
#' @param interactive logical.  If \code{interactive=TRUE} a
#'   googleVis plot is provided instead of the base graphics plot.
#'   Default is \code{interactive=FALSE}.
#' @param classic logical.  Depricated. If \code{classic=TRUE} a
#'   base graphics plot is provided instead of a googleVis plot.
#'   For now specifying \code{classic} will overwrite the
#'   default \code{interactive} behaviour, though this is
#'   likely to be removed in the future.
#' @param legend.position the postion of the legend for classic plots.
#'   Default \code{legend.position="right"} alternatives include
#'   \code{legend.position="top"} and \code{legend.position="bottom"}
#' @param tag Default NULL. Name tag of the objects to be extracted
#' from a gvis (googleVis) object.
#'
#' The default tag for is NULL, which will
#' result in R opening a browser window.  Setting \code{tag='chart'}
#' or setting \code{options(gvis.plot.tag='chart')} is useful when
#' googleVis is used in scripts, like knitr or rmarkdown.
#'
#' @param shiny Default FALSE. Set to TRUE when using in a shiny interface.
#'
#' @param which a vector specifying the plots to be output.  Variable
#'   inclusion plots \code{which="vip"}; description loss against model
#'   size \code{which="lvk"}; bootstrapped description loss against
#'   model size \code{which="boot"}.
#' @param width Width of the googleVis chart canvas area, in pixels.
#'   Default: 800.
#' @param height Height of the googleVis chart canvas area, in pixels.
#'   Default: 400.
#' @param chartWidth googleVis chart area width.
#'   A simple number is a value in pixels;
#'   a string containing a number followed by \code{\%} is a percentage.
#'   Default: \code{"60\%"}
#' @param chartHeight googleVis chart area height.
#'   A simple number is a value in pixels;
#'   a string containing a number followed by \code{\%} is a percentage.
#'   Default: \code{"80\%"}
#' @param fontSize font size used in googleVis chart.  Default: 12.
#' @param left space at left of chart (pixels?).  Default: "50".
#' @param top space at top of chart (pixels?).  Default: "30".
#' @param axisTitlesPosition Where to place the googleVis axis titles,
#'   compared to the chart area. Supported values:
#'   "in" - Draw the axis titles inside the the chart area.
#'   "out" - Draw the axis titles outside the chart area.
#'   "none" - Omit the axis titles.
#' @param dataOpacity The transparency of googleVis data points,
#'   with 1.0 being completely opaque and 0.0 fully transparent.
#' @param options a list to be passed to the googleVis function giving
#'   complete control over the output.  Specifying a value for
#'   \code{options} overwrites all other plotting variables.
#' @param backgroundColor The background colour for the main area
#'   of the chart. A simple HTML color string,
#'   for example: 'red' or '#00cc00'.  Default: 'null' (there is an
#'   issue with GoogleCharts when setting 'transparent' related to the
#'   zoom window sticking - once that's sorted out, the default
#'   will change back to 'transparent')
#' @param nbest maximum number of models at each model size
#'   that will be considered for the lvk plot. Can also take
#'   a value of \code{"all"} which displays all models (default).
#' @param text logical, whether or not to add text labels to classic
#'   boot plot. Default = \code{FALSE}.
#' @param min.prob when \code{text=TRUE}, a lower bound on the probability of
#'   selection before a text label is shown.
#' @param srt when \code{text=TRUE}, the angle of rotation for the text labels.
#'   Default = 45.
#' @param print.full.model logical, when \code{text=TRUE} this determines if the full
#'   model gets a label or not.  Default=\code{FALSE}.
#' @param max.circle  determines the maximum circle size.
#'   Default = 15.
#' @param jitterk amount of jittering of the model size in the lvk and boot plots.
#'   Default = 0.1.
#' @param ylim the y limits of the lvk and boot plots.
#' @param seed random seed for reproducible results
#' @param ... further arguments (currently unused)
#' @seealso \code{\link{vis}}
#' @references Mueller, S. and Welsh, A. H. (2010), On model
#'   selection curves. International Statistical Review, 78:240-256.
#'   doi: 10.1111/j.1751-5823.2010.00108.x
#'
#'   Murray, K., Heritier, S. and Mueller, S. (2013), Graphical
#'   tools for model selection in generalized linear models.
#'   Statistics in Medicine, 32:4438-4451. doi: 10.1002/sim.5855
#'   
#'   Tarr G, Mueller S and Welsh AH (2018). mplot: An R Package for 
#'   Graphical Model Stability and Variable Selection Procedures. 
#'   Journal of Statistical Software, 83(9), pp. 1-28. doi: 10.18637/jss.v083.i09
#' @export
#' @examples
#' n = 100
#' set.seed(11)
#' e = rnorm(n)
#' x1 = rnorm(n)
#' x2 = rnorm(n)
#' x3 = x1^2
#' x4 = x2^2
#' x5 = x1*x2
#' y = 1 + x1 + x2 + e
#' dat = data.frame(y,x1,x2,x3,x4,x5)
#' lm1 = lm(y~.,data=dat)
#' \dontshow{
#' v1 = vis(lm1, B = 5, cores = 1, seed = 1)
#' plot(v1, highlight = "x1", which = "lvk")
#' plot(v1, which = "boot")
#' plot(v1, which = "vip")
#' }
#' \dontrun{
#' v1 = vis(lm1, seed = 1)
#' plot(v1, highlight = "x1", which = "lvk")
#' plot(v1, which = "boot")
#' plot(v1, which = "vip")
#' }
plot.vis = function(x,
                    highlight,
                    interactive = FALSE,
                    classic = NULL,
                    tag = NULL,
                    shiny = FALSE,
                    nbest = "all",
                    which = c("vip", "lvk", "boot"),
                    width = 800,
                    height = 400,
                    fontSize = 12,
                    left = 50,
                    top = 30,
                    chartWidth = "60%",
                    chartHeight = "80%",
                    axisTitlesPosition = "out",
                    dataOpacity = 0.5,
                    options = NULL,
                    ylim,
                    legend.position = "right",
                    backgroundColor = 'transparent',
                    text = FALSE,
                    min.prob = 0.4,
                    srt = 45,
                    max.circle = 15,
                    print.full.model = FALSE,
                    jitterk = 0.1,
                    seed = NULL,
                    ...) {
  # reproducible jitter?
  if(!is.null(seed)) { 
    set.seed(seed)
  }
  # to prevent the notes:
  # plot.vis: no visible binding for global variable k
  # plot.vis: no visible binding for global variable logLikelihood
  k = logLikelihood = NULL
  
  if (backgroundColor == "transparent") {
    backgroundColor = "{stroke:null, fill:'null', strokeSize: 0}"
  } else {
    backgroundColor = paste("{stroke:null, fill:'",
                            backgroundColor,
                            "', strokeSize: 0}",
                            sep = "")
  }
  if (!is.null(classic))
    interactive = !classic
  
  #backwards compatability for use.glmulti
  if(!exists("x$use.glmulti"))
    x$use.glmulti = FALSE
  
  if (missing(highlight)) {
    # highlight first variable in the coefficient list
    if (x$use.glmulti) {
      vars = base::all.vars(stats::formula(x$mf))[-1]
      highlight = vars[1]
    } else {
      highlight = x$mextract$exp.vars[1]
      vars = make.names(x$mextract$exp.vars)
    }
  } else {
    vars = highlight
  }
  if ("lvk" %in% which) {
    var.ident = n.var.ident = NA
    lvk.dat = data.frame(x$res.single.pass)
    if(is.numeric(nbest)){
      lvk.dat = lvk.dat %>% 
        dplyr::group_by(k) %>% 
        dplyr::top_n(n = nbest, wt = logLikelihood)
    }
    #m2ll = -2 * lvk.dat$logLikelihood
    lvk.dat$m2ll = -2 * lvk.dat$logLikelihood
    lvk.dat$spk = lvk.dat$k
    jitter = stats::runif(length(lvk.dat$spk), 0 - jitterk, 0 + jitterk)
    lvk.dat$spk = lvk.dat$spk + jitter
    if (!interactive) {
      # for (i in 1:length(vars)) {
      #lvk.dat = x$res.single.pass
      lvk.dat$kj = lvk.dat$k + (unlist(lvk.dat[, vars[1]]) - 0.5) / 4
      lvk.dat[, vars[1]] = as.logical(unlist(lvk.dat[, vars[1]]))
      
      p = ggplot2::ggplot(data = lvk.dat, ggplot2::aes_string(x = "kj", y = "m2ll")) +
        ggplot2::geom_jitter(
          ggplot2::aes_string(color = vars[1]),
          shape = 19,
          width = jitterk,
          size = 2
        ) +
        ggplot2::theme_bw(base_size = 14) +
        ggplot2::ylab("-2*Log-likelihood") +
        ggplot2::xlab("Number of parameters") +
        ggplot2::theme(
          legend.title = ggplot2::element_blank(),
          legend.key = ggplot2::element_blank(),
          legend.position = legend.position
        ) +
        ggplot2::scale_fill_manual(values = ggplot2::alpha(c("blue", "red"), .4)) +
        ggplot2::scale_color_manual(values = ggplot2::alpha(c("blue", "red"), .4),
                                    labels = paste(c("Without", "With"), vars[1])) +
        ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(
          shape = 22,
          size = 5,
          fill = ggplot2::alpha(c("blue", "red"), .4)
        )))
      
      if (!missing(ylim))
        p = p + ggplot2::ylim(ylim[1], ylim[2])
      
      return(p)
      
      # col_high = grDevices::rgb(1, 0, 0, alpha = 0)
      # col_nohigh = grDevices::rgb(0, 0, 1, alpha = 0.5)
      # colbg = grDevices::rgb(1, 0, 0, alpha = 0.5)
      # var.ident = which(x$res.single.pass[,vars[i]] == 1)
      # n.var.ident = which(x$res.single.pass[,vars[i]] == 0)
      # graphics::par(mar = c(3.4,3.4,0.1,0.1),mgp = c(2.0, 0.75, 0))
      # if (missing(ylim)) ylim = c(min(m2ll),max(m2ll))
      # graphics::plot(m2ll[n.var.ident] ~ lvk.dat$spk[n.var.ident],
      #                pch = 19, cex = 1.3, col = col_nohigh,
      #                bg = colbg,
      #                xlab = "Number of parameters",
      #                ylab = "-2*Log-likelihood",
      #                ylim = ylim,
      #                xlim = c(min(lvk.dat$spk),max(lvk.dat$spk)))
      # graphics::points(m2ll[var.ident] ~ lvk.dat$spk[var.ident],
      #                  pch = 24, bg = colbg,
      #                  col = col_high, cex = 1.2)
      # graphics::legend("topright",legend = c(paste("With",vars[i]),paste("Without",vars[i])),
      #                  col = c(col_high,col_nohigh),
      #                  pt.bg = colbg, pch = c(24,19))
      #  if (length(vars) > 1) {
      #    graphics::par(ask = TRUE)
      # }
      # }
    } else {
      # googleVis version
      var.ident = which(lvk.dat[, vars[1]] == 1)
      n.var.ident = which(lvk.dat[, vars[1]] == 0)
      with.var = without.var = rep(NA, dim(lvk.dat)[1])
      with.var[var.ident] = lvk.dat$m2ll[var.ident]
      without.var[n.var.ident] = lvk.dat$m2ll[n.var.ident]
      mods = lvk.dat$name
      dat = data.frame(
        k = lvk.dat$spk,
        without.var,
        without.var.html.tooltip = mods,
        with.var,
        with.var.html.tooltip = mods
      )
      colnames(dat)[4] = paste("With", highlight)
      colnames(dat)[2] = paste("Without", highlight)
      gvis.title = paste("Loss versus dimension plot", sep = "")
      x.ticks = paste(1:max(lvk.dat$spk), collapse = ",")
      gvis.hAxis = paste("{title:'Number of parameters', ticks: [",
                         x.ticks, "]}")
      y.min = min(lvk.dat$m2ll) - 0.02*(max(lvk.dat$m2ll)- min(lvk.dat$m2ll))
      gvis.vAxis = paste("{title:'-2*Log-likelihood', minValue:",
                         y.min, "}")
      chartArea = paste(
        "{left:", left, ",top:", top,
        ",width:'", chartWidth, "',height:'", chartHeight,
        "'}", sep = ""
      )
      if (is.null(options)) {
        use.options = list(
          title = gvis.title,
          fontSize = fontSize,
          hAxis = gvis.hAxis,
          vAxis = gvis.vAxis,
          axisTitlesPosition = axisTitlesPosition,
          chartArea = chartArea,
          width = width,
          height = height,
          dataOpacity = dataOpacity,
          backgroundColor = backgroundColor,
          series = "{0:{color: 'blue', visibleInLegend: true}, 1:{color: 'red', visibleInLegend: true}}",
          explorer = "{axis: 'vertical',  keepInBounds: true, maxZoomOut: 1, maxZoomIn: 0.01, actions: ['dragToZoom', 'rightClickToReset']}"
        )
      } else {
        use.options = options
      }
      fplot = googleVis::gvisScatterChart(data = dat, 
                                          options = use.options)
      if (shiny) {
        return(fplot)
      } else {
        graphics::plot(fplot, tag = tag)
      }
    }
  }
  if ("boot" %in% which) {
    var.ident = x$res.df[, vars[1]] == 1
    vi = var.ident
    var.ident[var.ident == TRUE] = paste("With", vars[1])
    var.ident[var.ident == FALSE] = paste("Without", vars[1])
    jitter = stats::runif(length(vi), 0 - jitterk, 0 + jitterk)
    dat = data.frame(
      mods = x$res.df$name,
      k =  x$res.df$k + jitter,
      LL = -2 * x$res.df$logLikelihood,
      prob = x$res.df$freq / x$B,
      var.ident = var.ident
    )
    if (!interactive) {
      dat$mod.lab = as.character(dat$mods)
      dat$mod.lab[dat$prob < min.prob] = ""
      dat$mod.lab[length(dat$mod.lab)] = ""
      dat$int_k = round(dat$k, 0)
      p = ggplot2::ggplot(
        dat,
        ggplot2::aes_string(
          x = "int_k",
          y = "LL",
          group = "var.ident",
          label = "mod.lab"
        )
      ) +
        ggplot2::geom_jitter(
          ggplot2::aes_string(size = "prob",
                              fill = "var.ident"),
          shape = 21,
          width = jitterk
        ) +
        ggplot2::scale_size(range = c(0, max.circle)) +
        ggplot2::theme_bw(base_size = 14) +
        ggplot2::ylab("-2*Log-likelihood") +
        ggplot2::xlab("Number of parameters") +
        ggplot2::theme(
          legend.title = ggplot2::element_blank(),
          legend.key = ggplot2::element_blank(),
          legend.position = legend.position
        ) +
        ggplot2::scale_fill_manual(values = ggplot2::alpha(c("red", "blue"), .4)) +
        ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(
          shape = 22,
          size = 5,
          fill = ggplot2::alpha(c("red", "blue"), .4)
        )))
      if (!missing(ylim))
        p = p + ggplot2::ylim(ylim[1], ylim[2])
      if (text) {
        p = p + ggplot2::geom_text(hjust = 0, angle = srt)
      }
      return(p)
    } else {
      gvis.title = paste("Model stability plot", sep = "")
      x.ticks = paste(1:max(dat$k), collapse = ",")
      gvis.hAxis = paste(
        "{title:'Number of parameters',
        maxValue:",
        max(dat$k) + 0.5,
        " ,
        minValue:",
        0.5,
        " ,
        ticks: [",
        x.ticks,
        "]}"
      )
      y.min = min(dat$LL) - 0.02*(max(dat$LL)- min(dat$LL))
      gvis.vAxis = paste("{title:'-2*Log-likelihood', minValue:",
                         y.min, "}")
      chartArea = paste(
        "{left:", left, ",top:", top,
        ",width:'", chartWidth, "',height:'", chartHeight,
        "'}", sep = ""
      )
      bubble = paste("{opacity:",
                     dataOpacity,
                     ", textStyle: {color: 'none'}}",
                     sep = "")
      if (is.null(options)) {
        use.options = list(
          title = gvis.title,
          fontSize = fontSize,
          vAxis = gvis.vAxis,
          hAxis = gvis.hAxis,
          sizeAxis = "{minValue: 0, minSize: 1,
          maxSize: 20, maxValue:1}",
          axisTitlesPosition = axisTitlesPosition,
          bubble = bubble,
          chartArea = chartArea,
          width = width,
          height = height,
          backgroundColor = backgroundColor,
          explorer = "{axis: 'vertical',
          keepInBounds: true,
          maxZoomOut: 1,
          maxZoomIn: 0.01,
          actions: ['dragToZoom',
          'rightClickToReset']}"
        )
      } else {
        use.options = options
      }
      fplot = googleVis::gvisBubbleChart(
        data = dat,
        idvar = "mods",
        xvar = "k",
        yvar = "LL",
        colorvar = "var.ident",
        sizevar = "prob",
        options = use.options
      )
      if (shiny) {
        return(fplot)
      } else {
        graphics::plot(fplot, tag = tag)
      }
    }
  }
  if ("vip" %in% which) {
    # variable inclusion plot
    if (x$use.glmulti) {
      var.names = vars
    } else {
      var.names = make.names(x$mextract$exp.vars)
    }
    var.names = gsub(pattern = ":",
                     replacement = ".",
                     x = var.names)
    B = x$B
    p.var = x$var.in[, is.element(colnames(x$var.in), var.names)]
    colnames(p.var) = gsub("REDUNDANT.VARIABLE", "RV", colnames(p.var))
    sortnames = names(sort(apply(p.var, 2, mean), decreasing = TRUE))
    vip.df = p.var[, sortnames]
    vip.df = data.frame(
      lambda = x$lambdas,
      AIC = NA,
      AIC.annotation = NA,
      BIC = NA,
      BIC.annotation = NA,
      vip.df / B
    )
    aicline = rbind(
      c(2, 0, NA, NA, NA, rep(NA, length(var.names))),
      c(2, 1, "AIC", NA, NA, rep(NA, length(var.names))),
      c(log(x$mextract$n), NA, NA, 0, NA, rep(NA, length(var.names))),
      c(log(x$mextract$n), NA, NA, 1, "BIC", rep(NA, length(var.names)))
    )
    colnames(aicline) = colnames(vip.df)
    vip.df = rbind(vip.df, aicline)
    tid = c(1, 2, 4, 6:dim(vip.df)[2])
    vip.df[, tid] = sapply(vip.df[, tid], as.numeric)
    if (!interactive) {
      classic.lambda = vip.df$lambda
      classic.vip.df = subset(vip.df,
                              select = -c(
                                get("AIC"),
                                get("AIC.annotation"),
                                get("BIC"),
                                get("BIC.annotation"),
                                get("lambda")
                              ))
      lwds = log((1:length(var.names)) + 1)
      lwds = rev(2 * lwds / max(lwds))
      
      vip.ggdf = cbind(classic.lambda, classic.vip.df)
      vip.ggdfL = reshape2::melt(vip.ggdf, id = "classic.lambda")
      
      p = ggplot2::ggplot(
        data = vip.ggdfL,
        ggplot2::aes_string(
          x = "classic.lambda",
          y = "value",
          colour = "variable"
        )
      ) +
        ggplot2::geom_line() +
        ggplot2::theme_bw(base_size = 14) +
        ggplot2::ylab("Bootstrapped probability") +
        ggplot2::xlab("Penalty") +
        ggplot2::theme(
          legend.title = ggplot2::element_blank(),
          legend.key = ggplot2::element_blank(),
          legend.position = legend.position
        )
      
      return(p)
    } else {
      gvis.title = "Variable inclusion plot"
      #lineDashStyle = paste("[",paste(1:2,collapse=","),"]",sep="")
      lineseries = "[{lineDashStyle: [2,2], lineWidth: 2, color:'gray',
      visibleInLegend: false},
      {lineDashStyle: [2,2], lineWidth: 2, color:'gray',
      visibleInLegend: false}]"
      chartArea = paste(
        "{left:",
        left,
        ",top:",
        top,
        ",width:'",
        chartWidth,
        "',height:'",
        chartHeight,
        "'}",
        sep = ""
      )
      if (is.null(options)) {
        use.options = list(
          title = gvis.title,
          fontSize = fontSize,
          vAxis = "{title:'Bootstrapped probability'}",
          hAxis = "{title:'Penalty'}",
          sizeAxis = "{minValue: 0, minSize: 1,
          maxSize: 20, maxValue:1}",
          axisTitlesPosition = axisTitlesPosition,
          series = lineseries,
          #lineDashStyle = lineDashStyle,
          chartArea = chartArea,
          width = width,
          height = height,
          backgroundColor = 'transparent',
          annotations = "{style:'line'}"
        )
      } else {
        use.options = options
      }
      fplot = googleVis::gvisLineChart(
        data = vip.df,
        xvar = "lambda",
        yvar = c(
          "AIC",
          "AIC.annotation",
          "BIC",
          "BIC.annotation",
          sortnames
        ),
        options = use.options
      )
      if (shiny) {
        return(fplot)
      } else {
        return(graphics::plot(fplot, tag = tag))
      }
    }
  } else
    return(invisible())
}


#' Print method for a vis object
#'
#' Prints basic output of the bootstrap results of an
#' vis object.
#'
#' @param x a \code{vis} object, the result of \code{\link{vis}}
#' @param min.prob a lower bound on the probability of
#'   selection before the result is printed
#' @param print.full.model logical, determines if the full
#'   model gets printed or not.  Default=\code{FALSE}.
#' @param ... further arguments (currently unused)
#' @export
# S3 print method for class 'vis'
print.vis = function (x,
                      min.prob = 0.3,
                      print.full.model = FALSE,
                      ...) {
  dat = x$res.df
  dat$prob = dat$freq / x$B
  print.obj = dat[dat$prob > min.prob, c("name", "prob", "logLikelihood")]
  if (!print.full.model)
    print.obj = print.obj[-dim(print.obj)[1], ]
  print.obj[, 2:3] = round(print.obj[, 2:3], 2)
  rownames(print.obj) = NULL
  print(print.obj, row.names = FALSE)
  invisible(x)
}