R/hist2.R

Defines functions hist2

Documented in hist2

# ==================================================================== #
# TITLE                                                                #
# Tools for Data Analysis at Certe                                     #
#                                                                      #
# AUTHORS                                                              #
# Berends MS (m.berends@certe.nl)                                      #
# Meijer BC (b.meijer@certe.nl)                                        #
# Hassing EEA (e.hassing@certe.nl)                                     #
#                                                                      #
# COPYRIGHT                                                            #
# (c) 2019 Certe Medische diagnostiek & advies - https://www.certe.nl  #
#                                                                      #
# LICENCE                                                              #
# This R package is free software; you can redistribute it and/or      #
# modify it under the terms of the GNU General Public License          #
# version 2.0, as published by the Free Software Foundation.           #
# This R package is distributed in the hope that it will be useful,    #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the         #
# GNU General Public License for more details.                         #
# ==================================================================== #

#' Histogram with skewness and normal dist
#'
#' @param x vector
#' @param bindwidth width of bins, see \code{\link[ggplot2]{geom_histogram}}
#' @param info show suggestions when skewness is < -0.5 or > 0.5
#' @export
hist2 <- function(x, binwidth = 0.3, info = TRUE) {

  if (!is.numeric(x)) {
    stop("x must be numeric")
  }
  if (all(x == 0)) {
    stop("all x are 0")
  }

  x <- x[!is.na(x)]

  n = length(x)
  mean = mean(x)
  sd = sd(x)

  p <- ggplot(data.frame(x = x), aes(x = x,
                                     mean = mean,
                                     sd = sd,
                                     binwidth = binwidth,
                                     n = n)) +
    geom_histogram(binwidth = binwidth,
                   colour = "white", fill = "cornflowerblue", size = 0.1) +
    stat_function(fun = function(x) dnorm(x, mean = mean, sd = sd) * n * binwidth,
                  color = "black", size = 1.25, linetype = 5) +
    labs(title = "Histogram") +
    theme_certe() +
    theme(plot.title = element_text(size = 16, face ="bold"))

  # rechtsscheef
  if (info == TRUE & skewness(x) > 0.5) {
    suppressWarnings(
      p <- p + geom_label(label = paste0("Skewed to the right: \u03b3\u2081(x) = ", (round(skewness(x), 2)),
                                         "\nSuggestions: \u03b3\u2081(log x) = ", (round(skewness(suppressWarnings(log(x)), na.rm = TRUE), 2)),
                                         "\n\u03b3\u2081(1/x) = ", (round(skewness(1/x, na.rm = TRUE), 2)),
                                         "\n\u03b3\u2081(\u221ax) = ", (round(skewness(suppressWarnings(sqrt(x)), na.rm = TRUE), 2))),
                          y = Inf, x = max(x), inherit.aes = FALSE, vjust = 1.5, hjust = 1, family = "Consolas", colour = "gray25", size = 3.5)
    )
  }

  # linksscheef
  if (info == TRUE & skewness(x) < -0.5) {
    suppressWarnings(
      p <- p + geom_label(label = paste0("Skewed to the left: \u03b3\u2081(x) = ", (round(skewness(x), 2)),
                                         "\nSuggestions: \u03b3\u2081(x\u00b2) = ", (round(skewness(x^2, na.rm = TRUE), 2)),
                                         "\n\u03b3\u2081(x\u00b3) = ", (round(skewness(x^3, na.rm = TRUE), 2))),
                          y = Inf, x = min(x), inherit.aes = FALSE, vjust = 1.5, hjust = 0, family = "Consolas", colour = "gray25", size = 3.5)
    )
  }
  p
}

# #' @exportMethod hist.default
# #' @inherit graphics::hist
# #' @export hist.default
# hist.default <- function(x,
#                          breaks = "Sturges",
#                          freq = NULL,
#                          include.lowest = TRUE,
#                          normalcurve = TRUE,
#                          right = TRUE,
#                          density = NULL,
#                          angle = 45,
#                          col = NULL,
#                          border = NULL,
#                          main = paste("Histogram of", xname),
#                          ylim = NULL,
#                          xlab = xname,
#                          ylab = NULL,
#                          axes = TRUE,
#                          plot = TRUE,
#                          labels = FALSE,
#                          tick = TRUE,
#                          warn.unused = TRUE,
#                          ...)  {
# 
#   # https://stackoverflow.com/a/20078645/4575331
#   xname <- paste(deparse(substitute(x), 500), collapse = "\n")
# 
#   suppressWarnings(
#     h <- graphics::hist.default(
#       x = x,
#       breaks = breaks,
#       freq = freq,
#       include.lowest = include.lowest,
#       right = right,
#       density = density,
#       angle = angle,
#       col = col,
#       border = border,
#       main = main,
#       ylim = ylim,
#       xlab = xlab,
#       ylab = ylab,
#       axes = axes,
#       plot = plot,
#       labels = labels,
#       tick = tick,
#       warn.unused = warn.unused,
#       ...
#     )
#   )
# 
#   if (normalcurve == TRUE & plot == TRUE) {
#     x <- x[!is.na(x)]
#     xfit <- seq(min(x), max(x), length = 40)
#     yfit <- dnorm(xfit, mean = mean(x), sd = sd(x))
#     yfit <- yfit * diff(h$mids[1:2]) * length(x)
# 
#     lines(xfit, yfit, col = "black", lwd = 2)
#   } else if (plot == FALSE) {
#     h
#   }
# }
msberends/certedata documentation built on Nov. 26, 2019, 5:19 a.m.