R/Histogram.R

Defines functions Histogram

Documented in Histogram

Histogram <-
function(x=NULL, data=d, rows=NULL,
         stat_x=c("count", "proportion"),
         n_cat=getOption("n_cat"), Rmd=NULL,

    by1=NULL, by2=NULL,
    n_row=NULL, n_col=NULL, aspect="fill",

    bin_start=NULL, bin_width=NULL, bin_end=NULL, breaks="Sturges",

    theme=getOption("theme"),
    fill=getOption("bar_fill_cont"),
    color=getOption("bar_color_cont"),
    transparency=getOption("trans_bar_fill"),

    values=FALSE,
    reg="snow2", cumulate=c("off", "on", "both"),

    xlab=NULL, ylab=NULL, main=NULL, sub=NULL,
    lab_adj=c(0,0), margin_adj=c(0,0,0,0),

    rotate_x=getOption("rotate_x"), rotate_y=getOption("rotate_y"),
    offset=getOption("offset"),
    scale_x=NULL, scale_y=NULL,

    density=FALSE, show_histogram=TRUE,
    bandwidth=NULL, type=c("general", "normal", "both"),
    fill_general=NULL, fill_normal=NULL, fill_hist=getOption("se_fill"), 
    color_general="gray20", color_normal="gray20",
    x.pt=NULL, y_axis=FALSE,
    rug=FALSE, color_rug="black", size_rug=0.5,

    add=NULL, x1=NULL, y1=NULL, x2=NULL, y2=NULL,

    eval_df=NULL, digits_d=NULL, quiet=getOption("quiet"), do_plot=TRUE,
    width=6, height=6, pdf_file=NULL, 
    fun_call=NULL, ...) {


  if (missing(fill))
    fill <- ifelse (is.null(getOption("bar_fill_cont")), 
      getOption("bar_fill"), getOption("bar_fill_cont"))
  breaks.miss <- ifelse (missing(breaks), TRUE, FALSE)
  bw.miss <- ifelse (missing(bandwidth), TRUE, FALSE)

  # a dot in a parameter name to an underscore and more
  dots <- list(...)
  if (!is.null(dots)) if (length(dots) > 0) {
    for (i in 1:length(dots)) {
      if (names(dots)[i] == "dn.hist") show_histogram <- dots[[i]]
      if (names(dots)[i] == "fill_gen") fill_general <- dots[[i]]
      if (names(dots)[i] == "fill_nrm") fill_normal <- dots[[i]]
      if (names(dots)[i] == "color_gen") color_general <- dots[[i]]
      if (names(dots)[i] == "color_nrm") color_normal <- dots[[i]]
      if (names(dots)[i] == "bw") bandwidth <- dots[[i]]
      if (length(grep(".", names(dots)[i], fixed=TRUE)) > 0) {
        nm <- gsub(".", "_", names(dots)[i], fixed=TRUE)
        assign(nm, dots[[i]])
        get(nm)
      }
    }
  }

  if (is.null(fun_call)) fun_call <- match.call()

  trans <- transparency

  # limit actual argument to alternatives, perhaps abbreviated
  cumulate <- match.arg(cumulate)
  type <- match.arg(type)
  stat_x <- match.arg(stat_x)
  proportion <- ifelse (stat_x == "proportion", TRUE, FALSE)   # old signal
  histogram <- ifelse (density, FALSE, TRUE)

  if (theme != getOption("theme")) {
    sty <- style(theme, reset=FALSE)
    fill <- sty$bar$bar_fill_cont
    color <- sty$bar$color_ordered
    trans <- sty$bar$trans_fill
  }

  if (!breaks.miss && density)  {
    cat("\n"); stop(call.=FALSE, "\n","------\n",
      "When plotting density, parameter  breaks  is ignored.\n",
      "Bins must be equal width, but can use bin_start and bin_width.\n\n")
  }

  if (!is.null(scale_x)) if (length(scale_x) != 3)  {
    cat("\n"); stop(call.=FALSE, "\n","------\n",
      "Starting value, ending value, and number of intervals\n",
      "  must all be specified as a vector, e.g., scale_x=c(0, 9 , 5)\n\n")
  }

  if (!is.null(scale_y)) if (length(scale_y) != 3)  {
    cat("\n"); stop(call.=FALSE, "\n","------\n",
      "Starting value, ending value, and number of intervals\n",
      "  must all be specified as a vector, e.g., scale_y=c(0, 9 , 5)\n\n")
  }

  panel_fill <- getOption("panel_fill")
  panel_color <- getOption("panel_color")
  grid_color <- getOption("grid_color")
  lab_color <- getOption("lab_color")
  lab_cex <- getOption("lab_cex")
  axis_cex <- getOption("axis_cex") 

  fill[which(fill == "off")] <- "transparent"
  color[which(color == "off")] <- "transparent"

  Trellis <- ifelse(!missing(by1), TRUE, FALSE)
  
  xlab_adj <- lab_adj[1];   ylab_adj <- lab_adj[2]
  tm.adj <- margin_adj[1];  rm.adj <- margin_adj[2]
  bm.adj <- margin_adj[3];  lm.adj <- margin_adj[4]

  .param.old(...)


  # --------- data frame stuff
  
  data.miss <- ifelse (missing(data), TRUE, FALSE) 

  # let deprecated mydata work as default
  dfs <- .getdfs() 
  mydata.ok <- FALSE
  if (!is.null(dfs)) {
    if ("mydata" %in% dfs  &&  !("d" %in% dfs)) {
      d <- mydata
      rm(mydata)
      df.name <- "mydata"
      mydata.ok <- TRUE
      options(dname = df.name)
    }
  }

  # get name of data table
  if (!mydata.ok) {
    df.name <- deparse(substitute(data))
    options(dname = df.name)
  }
 
  # if a tibble, convert to data frame
  if (exists(df.name, envir=parent.frame())) {
    if (any(grepl("tbl", class(data), fixed=TRUE)))
      data <- data.frame(data)
  }

  x.name <- deparse(substitute(x), width.cutoff = 120L)
  options(xname = x.name)

    if (!is.null(x.name))
      x.in.global <- .in.global(x.name, quiet)  # in global?, includes vars list
    else
      x.in.global <- FALSE

  if (!x.in.global)  {
    if (df.name != "NULL") {  # if NULL, force global (shiny, from interact() )
      # force evaluation (not lazy) if data not specified, relies on default d
      if (data.miss) {
        if (!mydata.ok) .nodf(df.name)  # check to see if df exists 
        data <- eval(substitute(data), envir=parent.frame())
        # the 1.201 comes from Shiny, need to reset
        # l.cex and l.axc are set in interact() before shiny run
        if (getOption("lab_cex") == 1.201) {
         if (getOption("l.cex") != 1.201) {
            style(lab_cex=getOption("l.cex"))
            style(axis_cex=getOption("l.axc"))
          }
          else
            style()
        }
      }
    }
    else # df.name is NULL
      x.in.global <- TRUE
  }
    
  eval_df <- !x.in.global 

    
  # -----------------------------------------------------------
  # establish if a data frame, if not then identify variable(s)
  # x can be missing entirely, with a data frame passed instead
  # if x a vector, then x.name not in data, but also not in global

  if (!missing(x)) {

    # x not in global env, in df, specify data= forces to data frame
    if (!x.in.global) {
      if (eval_df) {
        if(!mydata.ok) .nodf(df.name)  # check to see if data frame container exists 
        .xcheck(x.name, df.name, names(data))  # x-vars in df?
      }
      data.vars <- as.list(seq_along(data))
      names(data.vars) <- names(data)
      if (!missing(rows)) {  # subset rows
        r <- eval(substitute(rows), envir=data, enclos=parent.frame())
        if (!any(r)) {
          cat("\n"); stop(call.=FALSE, "\n","------\n",
            "No rows of data with the specified value of\n",
            "rows = ", deparse(substitute(rows)), "\n\n")
        }
        r <- r & !is.na(r)  # set missing for a row to FALSE
        data <- data[r,,drop=FALSE]
      }
      ind <- eval(substitute(x), envir=data.vars)  # col num of each var     
      if (!("list" %in% class(data))) {
        data.x <- data[, ind]
        if (length(ind) == 1) {  # x is 1 var
          if (!is.numeric(data.x)) { 
            cat("\n"); stop(call.=FALSE, "\n","------\n",
              "A histogram is only computed from a numeric variable\n",
              "To tabulate the values of a categorical variable:\n\n",
              "  Plot(", x.name, ", stat=\"count\")\n",
              "or\n",
              "  BarChart(", x.name, ")\n\n", sep="")
          }
          data.x <- data.frame(data.x, stringsAsFactors=TRUE)
          names(data.x) <- x.name
        }
      }
      else {  # class of data is "list"
        data.x <- data.frame(data[[ind]], stringsAsFactors=TRUE)
        names(data.x) <- x.name
      }
    }  # end x not in global

    # x is in the global environment (vector or data frame)
    else {
      if (is.data.frame(x))  # x a data frame
        data.x <- x
      else {  # x a vector in global
        .in.global(x.name, quiet)  # x.name is expression?
        if (!is.function(x))
          data.x <- data.frame(x, stringsAsFactors=TRUE)  # x is 1 var
        else
          data.x <- data.frame(eval(substitute(data$x)), stringsAsFactors=TRUE)
        names(data.x) <- x.name
      }
    }  # x is in global
  }

  # evaluate by1
  #-------------
  if (!missing(by1)) {

    # get actual variable name before potential call of data$x
    by1.name <- deparse(substitute(by1))
    options(by1name = by1.name)

    # get conditions and check for data existing
    in.global <- .in.global(by1.name, quiet)

    # see if var exists in df, if x not in global Env or function call
    if (!missing(x) && !in.global)
      .xcheck(by1.name, df.name, names(data))

    if (!in.global)
      by1.call <- eval(substitute(data$by1))
    else {  # vars that are function names get assigned to global
      by1.call <- by1
      if (is.function(by1.call)) by1.call <- eval(substitute(data$by1))
    }

    if (!is.factor(by1.call)) by1.call <- factor(by1.call)
  }

  else
    by1.call <- NULL

  # evaluate by2
  #-------------
  if (!missing(by2)) {

    # get actual variable name before potential call of data$x
    by2.name <- deparse(substitute(by2))
    options(by2name = by2.name)

    # get conditions and check for data existing
    in.global <- .in.global(by2.name, quiet)

    # var in data frame? if x not in global Env or function call
    if (!missing(x) && !in.global)
      .xcheck(by2.name, df.name, names(data))

    if (!in.global)
      by2.call <- eval(substitute(data$by2))
    else {  # vars that are function names get assigned to global
      by2.call <- by2
      if (is.function(by2.call)) by2.call <- eval(substitute(data$by2))
    }

    if (!is.factor(by2.call)) by2.call <- factor(by2.call)
  }

  else
   by2.call <- NULL


  # ---------------
  # do the analysis

  if (Trellis && do_plot) {

    .bar.lattice(data.x[,1], by1.call, by2.call, n_row, n_col, aspect, 
           proportion, fill, color, trans, size.pt=NULL,
           xlab, ylab, main, rotate_x, offset, width, height, pdf_file,
           segments_x=NULL, breaks, T.type="hist", quiet)
  }

  else {  # not Trellis

    if (!missing(x)) data <- data.x

    # set up graphics
    manage.gr <- .graphman()  # manage graphics?
    if (manage.gr) {
      i.win <- 0
      for (i in 1:ncol(data)) {
        if (is.numeric(data[,i])  &&  !.is.num.cat(data[,i], n_cat)) 
          i.win <- i.win + 1
      }
      .graphwin(i.win, d.w=width, d.h=height)
      open.win <- 2
    }
    plot.i <- 0  # keep track of generated graphics
    plot.title  <- character(length=0)

    # no suggestions if multiple variables
    if (ncol(data) > 1) {
      sug <- getOption("suggest")
      options(suggest = FALSE)
    }

    for (i in 1:ncol(data)) {  # data only contains data to be analyzed
      nu <- length(unique(na.omit(data[,i])))

      x.name <- names(data)[i]
      options(xname = x.name)

      if (is.numeric(data[,i])) {
        # let 1 variable go through, even if num.cat
        if (ncol(data) == 1  ||  !.is.num.cat(data[,i], n_cat)) {

        if (!is.null(pdf_file)) {
          if (!grepl(".pdf", pdf_file))
            pdf_file <- paste(pdf_file, ".pdf", sep="")
          pdf(file=pdf_file, width=width, height=height, onefile=FALSE)
        }
        else {
          if (df.name != "NULL")  # not dev.new for shiny
              .opendev(pdf_file, width, height)
        }

        txss <- ""
        ssstuff <- .ss.numeric(data[,i], digits_d=digits_d, brief=TRUE)
        txss <- ssstuff$tx

        if (histogram) {

          # nothing returned if quiet=TRUE
          stuff <- .hst.main(data[,i], fill, color, trans, reg,
              rotate_x, rotate_y, offset,
              breaks, bin_start, bin_width,
              bin_end, proportion, values, cumulate, xlab, ylab, main, sub, 
              xlab_adj, ylab_adj, bm.adj, lm.adj, tm.adj, rm.adj,
              add, x1, x2, y1, y2,
              scale_x, scale_y,
              quiet, do_plot, fun_call=fun_call, ...)

          txsug <- stuff$txsug
          if (is.null(txsug)) txsug <- ""
          txdst <- stuff$ttx
          if (is.null(txdst)) txdst <- ""

          txotl <- ""
            txotl <- .bx.stats(data[,i])$txotl
            if (txotl[1] == "") txotl <- "No (Box plot) outliers"

          if (ncol(data) > 1  &&  !quiet) { # for var range, print text output
            class(txss) <- "out"
            class(txdst) <- "out"
            class(txotl) <- "out"
            output <- list(out_ss=txss, out_freq=txdst, out_outliers=txotl)
            class(output) <- "out_all"
            if (!quiet) print(output)
          }
        } # end histogram

        else {  # density
          if (bw.miss) bandwidth <- .band.width(data[,i], ...)  # band width

          clr <- getOption("theme")  # color theme not used except monochrome

          if (!missing(color_rug)  ||  !missing(size_rug)) rug <- TRUE

          if (missing(fill_general)) {
              fill_general <- rgb(80,150,200, alpha=80, maxColorValue=255)
            if (clr == "gray" ||
               (getOption("theme") == "gray"  &&
                getOption("sub_theme") == "black")) {
              fill_general <- rgb(.75,.75,.75, .5)
            }
          }
          else {  # add some transparency to a named color
            if (fill_general %in% colors()) {
              fg.rgb <- col2rgb(fill_general) 
              fill_general <- rgb(fg.rgb[1], fg.rgb[2], fg.rgb[3],
                                  alpha=80, maxColorValue=255)
            }
          }

          if (missing(fill_normal)) {
              fill_normal <- rgb(250,210,230, alpha=80, maxColorValue=255)
            if (clr == "gray" ||
               (getOption("theme") == "gray"  &&
                getOption("sub_theme") == "black")) {
              fill_normal <- "transparent"
            }
          }
          else {  # add some transparency to a named color
            if (fill_normal %in% colors()) { 
              fg.rgb <- col2rgb(fill_normal) 
              fill_normal <- rgb(fg.rgb[1], fg.rgb[2], fg.rgb[3],
                                  alpha=80, maxColorValue=255)
            }
          }

          x.min <- NULL
          x.max <- NULL
          if (!is.null(scale_x)) {
            x.min <- scale_x[1]
            x.max <- scale_x[2]
          }

          stuff <- .dn.main(data[,i], bandwidth, type, show_histogram,
                bin_start, bin_width,
                fill_hist, color_normal, color_general,
                fill_normal, fill_general,
                rotate_x, rotate_y, offset,
                x.pt, xlab, main, sub, y_axis, x.min, x.max,
                rug, color_rug, size_rug, quiet, fncl=fun_call, ...)


          txdst <- ""  # should be named txbw
          txotl <- ""
          txsug <- ""
            txdst <- stuff$tx

            txotl <- .bx.stats(data[,i])$txotl
            if (txotl[1] == "") txotl <- "No (Box plot) outliers"

            txsug <- stuff$txsug

            class(txdst) <- "out"
            class(txotl) <- "out"
            class(txsug) <- "out"
          gl <- .getlabels()
          x.name <- gl$xn; x.lbl <- gl$xl;
          y.name <- gl$yn; y.lbl <- gl$yl
          if (!quiet  &&  ncol(data) > 1) {
            ttlns <- .title2(x.name, y.name, x.lbl, y.lbl, TRUE)
            ttlns <- paste(" ", "\n", ttlns, sep="")
          }
          else
            ttlns <- ""

        }  # end density

        if (!is.null(pdf_file)) {
          dev.off()
          if (!quiet) .showfile(pdf_file, "Histogram")
        }

      }  # end ncol(data) == 1 ... 

      else { 
        if (ncol(data) > 1) {
          plot.i <- plot.i + 1
          plot.title[plot.i] <- paste("Histogram of ", x.name, sep="")
          if (manage.gr) {
            open.win <- open.win + 1
            dev.set(which = open.win)
          }
        }
        if (!quiet) .ncat("Histogram", x.name, nu, n_cat)
      }

      }  # is.numeric(data[,i])
    }  # end for i from 1 to ncol

    if (ncol(data) > 1) {
      options(suggest = sug)
      if (is.null(pdf_file)  &&  plot.i > 0)
        if (is.null(options()$knitr.in.progress))
          .plotList(plot.i, plot.title)
    }

    if (df.name != "NULL")  # not shiny
      dev.set(which=2)  # reset graphics window for standard R functions

    if (ncol(data) == 1) {

      # R Markdown
      txkfl <- ""
      if (!is.null(Rmd)) {
        if (!grepl(".Rmd", Rmd)) Rmd <- paste(Rmd, ".Rmd", sep="")
        txknt <- .dist.Rmd(x.name, df.name, fun_call, digits_d)
        cat(txknt, file=Rmd, sep="\n")
        txkfl <- .showfile2(Rmd, "R Markdown instructions")
      }

      class(txsug) <- "out"
      class(txss) <- "out"
      class(txdst) <- "out"
      class(txotl) <- "out"
      class(txkfl) <- "out"

      if (histogram) {   

        output <- list(type="Histogram",
          call=fun_call, 
          out_suggest=txsug, out_ss=txss, out_outliers=txotl, out_freq=txdst,
          out_file=txkfl,
          bin_width=stuff$bin_width, n_bins=stuff$n.bins,
          breaks=stuff$breaks,
          mids=stuff$mids, counts=stuff$counts, prop=stuff$prop,
          cumulate=stuff$counts_cum, cprop=stuff$prop_cum)

        class(output) <- "out_all"
        if (!quiet) print(output)

        # names and order of components per documentation in Histogram.Rd
        stuff$out_outliers <- txotl  # after to class out for line breaks
        stuff$out_summary <- txss
        stuff$out_freq <- txdst
        names(stuff) <- c("out_suggest", "out_freq", "bin_width", "n_bins",
                "breaks", "mids", "counts", "prop", "cumulate", "cprop",
                "out_outliers", "out_summary"
                )
        stuff <- c(stuff[1], stuff[12], stuff[2], stuff[11], stuff[3], stuff[4],
                   stuff[5], stuff[6], stuff[7], stuff[8], stuff[9], stuff[10])
        return(invisible(stuff))
      }  # end histogram

      else {  # density
          output <- list(type="Density",
#           out_suggest=txsug, out_title=ttlns, out_stats=txdst,
            out_suggest=txsug, out_stats=txdst,
            out_ss=txss, out_outliers=txotl,
            out_file=txkfl,
            bw=stuff$bw, n=stuff$n, n_miss=stuff$n.miss)

          class(output) <- "out_all"

        if (!quiet) print(output)
        return(invisible(output))
      }  # end density

    }  # end ncol(data) == 1

  }  # else not Trellis

}

Try the lessR package in your browser

Any scripts or data that you put into this service are public.

lessR documentation built on Nov. 12, 2023, 1:08 a.m.