R/commonDiscoverDistributions.R

Defines functions .ldAllTextsList .ldIntroText .ldParsSupportMoments .ldFillPlotECDF .ldPlotECDF .ldFillPlotHistogram .ldPlotHistogram .ldFillPlotCMF .ldPlotCMF .ldExplanationCMF .ldFillCMFContainer .ldFillPlotPMF .ldPlotPMF .ldExplanationPMF .ldFillPMFContainer .ldFillPlotQF .ldPlotQF .ldExplanationQF .ldFillQFContainer .ldFillPlotCDF .ldPlotCDF .ldExplanationCDF .ldFillCDFContainer .ldFillPlotPDF .ldPlotPDF .ldExplanationPDF .ldFillPDFContainer .ldFormulaPlot .ldGetPlotContainer .ldShowDistribution .ldFillEstCDFPlot .ldFillPPPlot .ldFillEstPMFPlot .ldFillEstPDFPlot .ldFillQQPlot .ldFitPlots .ldFillFitStatisticsTable .ldFitStatisticsResults .ldFitStatisticsTable .ldStructureResults .ldMLEoptim .ldMLEResults .ldAnyAssessFitRequested .ldMLE .ldOptionsDeterminePlotLimits .ldEstimatesTable .ldGetFitContainer .computeObservedMoments .ldFillObservedMomentsTableMain .ldObservedMomentsTableMain .ldFillSummaryFactorTableMain .ldSummaryFactorTableMain .ldFillSummaryContinuousTableMain .ldSummaryContinuousTableMain .ldDescriptives .ldGetDataContainer .ldCheckInteger .simulateData

#
# Copyright (C) 2013-2020 University of Amsterdam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

### Summary stats for distributions module ----
.simulateData <- function(jaspResults, options, as = "scale", sampleSizeName = "n"){
  if(is.null(jaspResults[['simdata']])){
    args <- c(options[['pars']], sampleSize = options[['sampleSize']])
    names(args)[names(args) == "sampleSize"] <- sampleSizeName

    sample <- do.call(options[['rFun']], args)
    jaspResults[['simdata']] <- createJaspState(sample)
    jaspResults[['simdata']]$dependOn(c("newVariableName", "simulateNow"))

    if(options[["newVariableName"]] != "")
    {
      jaspResults[['sampleColumn']] <- createJaspColumn(options[["newVariableName"]])

      didItWork <- switch(as,
      scale=  jaspResults[['sampleColumn']]$setScale(  sample),
      ordinal=jaspResults[['sampleColumn']]$setOrdinal(sample),
              jaspResults[['sampleColumn']]$setNominal(sample))

      if(!didItWork)
        jaspResults[['sampleColumnError']] <- createJaspHtml(text=gettextf("Could not write to column '%s', probably because it wasn't created by this analysis", options[["newVariableName"]]), elementType="errorMsg", dependencies="newVariableName")
    }
  }

  return()
}

.ldCheckInteger <- function(variable, errors){
  is_integer <- all((variable %% 1) == 0)

  if(isFALSE(errors) && is_integer){
    errors <- FALSE
  } else if(isFALSE(errors) && !is_integer){
    errors <- list(integer = TRUE, message = gettext("The following problem(s) occurred while running the analysis:<ul><li>Variable has to be discrete (i.e., integer)</li></ul>"))
  } else if(!is_integer){
    errors[['integer']] <- TRUE
    errors[['message']] <- paste(errors[['message']], gettext("<ul><li>Variable has to be discrete (i.e., integer)</li></ul>"))
  } else{
    errors <- errors
  }

  return(errors)
}

.ldGetDataContainer <- function(jaspResults, options, errors = FALSE){
  if(!is.null(jaspResults[['dataContainer']])){
    dataContainer <- jaspResults[['dataContainer']]
  } else{
    dataContainer <- createJaspContainer(title = gettextf("Overview - %s", options[['variable']]))
    dataContainer$position <- 6
    dataContainer$dependOn(c("variable", "simulateNow"))

    jaspResults[['dataContainer']] <- dataContainer
  }

  if(!isFALSE(errors) && (!is.null(errors$infinity) || !is.null(errors$observations) || !is.null(errors$integer))){
    dataContainer$setError(errors$message)
  }

  if(!isFALSE(errors) && (!is.null(errors$factorLevels))){
    dataContainer$setError(errors$message)
  }

  return(dataContainer)
}

#### Descriptives ----
.ldDescriptives <- function(jaspResults, variable, options, ready, errors, as = c("continuous", "discrete", "factor")){
  as <- match.arg(as)
  dataContainer <- .ldGetDataContainer(jaspResults, options, errors)

  if(as == "continuous") {
    ready <- ready && (isFALSE(errors) || (is.null(errors$infinity) && is.null(errors$observations)))
    .ldSummaryContinuousTableMain(dataContainer, variable, options, ready)
    .ldObservedMomentsTableMain  (dataContainer, variable, options, ready)
    .ldPlotHistogram             (dataContainer, variable, options, ready)
    .ldPlotECDF                  (dataContainer, variable, options, ready)
  } else if(as == "discrete") {
    ready <- ready && (isFALSE(errors) || (is.null(errors$infinity) && is.null(errors$observations)))
    .ldSummaryContinuousTableMain(dataContainer, variable, options, ready)
    .ldObservedMomentsTableMain  (dataContainer, variable, options, ready)
    .ldPlotHistogram             (dataContainer, variable, options, ready, "discrete")
    .ldPlotECDF                  (dataContainer, variable, options, ready)
  } else {
    ready <- ready && isFALSE(errors)
    .ldSummaryFactorTableMain    (dataContainer, variable, options, ready)
    .ldPlotHistogram             (dataContainer, variable, options, ready, "factor")
  }
}

.ldSummaryContinuousTableMain <- function(dataContainer, variable, options, ready) {
  if(!options$summary) return()
  if(!is.null(dataContainer[['summary']])) return()
  if(!ready) return()

  summaryTable <- createJaspTable(title = gettext("Descriptives"))
  summaryTable$position <- 1
  summaryTable$dependOn(c("summary"))

  summaryTable$addColumnInfo(name = "variable",   title = gettext("Variable"),       type = "string", combine = TRUE)
  summaryTable$addColumnInfo(name = "sampleSize", title = gettext("n"),              type = "integer")
  summaryTable$addColumnInfo(name = "mean",       title = gettext("Mean"),           type = "number", format = "sf:4")
  summaryTable$addColumnInfo(name = "var",        title = gettext("Variance"),       type = "number", format = "sf:4")
  summaryTable$addColumnInfo(name = "sd",         title = gettext("Std. deviation"), type = "number", format = "sf:4")
  summaryTable$addColumnInfo(name = "min",        title = gettext("Minimum"),        type = "number", format = "sf:4")
  summaryTable$addColumnInfo(name = "quantile25", title = gettextf("25%% Quantile"),   type = "number", format = "sf:4")
  summaryTable$addColumnInfo(name = "median",     title = gettext("Median"),         type = "number", format = "sf:4")
  summaryTable$addColumnInfo(name = "quantile75", title = gettextf("75%% Quantile"),   type = "number", format = "sf:4")
  summaryTable$addColumnInfo(name = "max",        title = gettext("Maximum"),        type = "number", format = "sf:4")
  #summaryTable$addColumnInfo(name = "skew",       title = gettext("Skewness"),       type = "number", format = "sf:4")
  #summaryTable$addColumnInfo(name = "kurt",       title = gettext("Kurtosis"),       type = "number", format = "sf:4")

  dataContainer[['summary']] <- summaryTable

  .ldFillSummaryContinuousTableMain(summaryTable, variable, options)

  return()
}

.ldFillSummaryContinuousTableMain <- function(summaryTable, variable, options){

  summaryTable$addRows(list(variable   = options[['variable']],
                            sampleSize = sum(!is.na(variable)),
                            mean       = mean(variable, na.rm = TRUE),
                            var        = var(variable, na.rm = TRUE),
                            sd         = sd(variable, na.rm = TRUE),
                            min        = min(variable, na.rm = TRUE),
                            quantile25 = quantile(variable, 0.25, na.rm = TRUE),
                            median     = median(variable, na.rm = TRUE),
                            quantile75 = quantile(variable, 0.75, na.rm = TRUE),
                            max        = max(variable, na.rm = TRUE))
                       #skew       = .summarySkewness(variable),
                       #kurt       = .summaryKurtosis(variable))
  )

  return()
}

.ldSummaryFactorTableMain <- function(dataContainer, variable, options, ready) {
  if(!options$summary) return()
  if(!is.null(dataContainer[['summary']])) return()
  if(!ready) return()

  summaryTable <- createJaspTable(title = gettext("Descriptives"))
  summaryTable$position <- 1
  summaryTable$dependOn(c("summary"))
  summaryTable$addCitation(.ldAllTextsList()$references$jasp)

  summaryTable$addColumnInfo(name = "level", title = "", type = "string")
  summaryTable$addColumnInfo(name = "freq",  title = gettext("n"), type = "integer")
  summaryTable$addColumnInfo(name = "rel.freq",  title = gettext("Rel. Frequency"), type = "number")

  summaryTable$setExpectedSize(rows = length(levels(variable)) + 1)

  dataContainer[['summary']] <- summaryTable

  .ldFillSummaryFactorTableMain(summaryTable, variable, options)

  return()
}

.ldFillSummaryFactorTableMain <- function(summaryTable, variable, options){
  for(l in levels(variable)){
    summaryTable$addRows(list(
      level = l,
      freq  = sum(variable == l),
      rel.freq = sum(variable == l)/length(variable)
    ))
  }

  summaryTable$addRows(list(
    level = gettext("Total"),
    freq = length(variable),
    rel.freq = "."
  ))
}
### Moments ----

.ldObservedMomentsTableMain <- function(dataContainer, variable, options, ready){
  if(!options$moments) return()
  if(!is.null(dataContainer[['moments']])) return()

  momentsTable <- createJaspTable(title = gettext("Observed Moments"))
  momentsTable$position <- 2
  momentsTable$dependOn(c("moments", "momentsUpTo"))

  momentsTable$addColumnInfo(name = "moment", title = gettext("Moment"), type = "integer")
  momentsTable$addColumnInfo(name = "raw",    title = gettext("Raw"),    type = "number")
  momentsTable$addColumnInfo(name = "central",title = gettext("Central"),type = "number")

  momentsTable$setExpectedSize(rows = options$momentsUpTo)

  dataContainer[['moments']] <- momentsTable

  if(!ready) return()

  .ldFillObservedMomentsTableMain(momentsTable, variable, options)
  return()
}

.ldFillObservedMomentsTableMain <- function(momentsTable, variable, options){
  res <- data.frame(moment = 1:options$momentsUpTo, raw = NA, central = NA)

  res$raw     <- .computeObservedMoments(variable, max.moment = options$momentsUpTo, about.mean = FALSE)
  res$central <- .computeObservedMoments(variable, max.moment = options$momentsUpTo, about.mean = TRUE)

  momentsTable$setData(res)
}
### Helper functions ----
# .summarySkewness <- function(x) {
#
#   # Skewness function as in SPSS (for samlpes spaces):
#   # http://suite101.com/article/skew-and-how-skewness-is-calculated-in-statistical-software-a231005
#   x <- na.omit(x)
#   n <- length(x)
#   m <- mean(x)
#   s <- sd(x)
#   z <- (x - m) / s  # z scores
#   a <- n / ((n - 1) * (n - 2))
#
#   skewness <- sum(z^3) * a
#
#   return(skewness)
# }
#
# .summaryKurtosis <- function(x) {
#
#   # Kurtosis function as in SPSS:
#   # http://www.ats.ucla.edu/stat/mult_pkg/faq/general/kurtosis.htm
#   # http://en.wikipedia.org/wiki/Kurtosis#Estimators_of_population_kurtosis
#
#   x   <- na.omit(x)
#   n   <- length(x)
#   s4  <- sum((x - mean(x))^4)
#   s2  <- sum((x - mean(x))^2)
#   v   <- s2 / (n-1)
#   a   <- (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3))
#   b   <- s4 / (v^2)
#   c   <- (-3 * (n - 1)^2) / ((n - 2) * (n - 3))
#
#   kurtosis <- a * b + c
#
#   return(kurtosis)
# }


.computeObservedMoments <- function(x, max.moment = 2, about.mean = FALSE){
  x <- na.omit(x)

  moments <- numeric(length = max.moment)
  moments[1] <- mean(x)

  if(max.moment < 2)
    return(moments)

  if(about.mean)
    x <- x-moments[1]


  for(i in 2:max.moment){
    moments[i] <- mean(x^i)
  }

  return(moments)
}

.ldGetFitContainer <- function(jaspResults, options, name, title, position, errors = FALSE){
  if(!is.null(jaspResults[[name]])){
    fitContainer <- jaspResults[[name]]
  } else{
    fitContainer <- createJaspContainer(title = gettext(title))
    fitContainer$position <- position

    fitContainer$dependOn(c("variable", "simulateNow", "biasCorrected", options$parValNames, "methodMLE"))

    jaspResults[[name]] <- fitContainer
  }

  if(!isFALSE(errors)){
    fitContainer$setError(gettext(errors$message))
  }

  return(fitContainer)
}

.ldEstimatesTable <- function(container, options, ci.possible, se.possible, method){
  if(!options$outputEstimates) return()
  if(!is.null(container[['estParametersTable']])) return()

  tab <- createJaspTable(title = gettext("Estimated Parameters"))
  tab$dependOn(c("outputEstimates", "outputSE", "ciInterval", "ciIntervalInterval", "parametrization", method, options$parValNames))
  tab$position <- 1
  tab$showSpecifiedColumnsOnly <- TRUE
  tab$setExpectedSize(rows = length(options$pars) - length(options$fix.pars))

  tab$addColumnInfo(name = "parName",  title = gettext("Parameter"), type = "string")
  tab$addColumnInfo(name = "estimate", title = gettext("Estimate"), type = "number")

  #"\u03BC\u0302"
  if(options$outputSE && se.possible){
    tab$addColumnInfo(name = "se", title = gettext("SE"), type = "number")
  } else if(options$outputSE) {
    tab$addFootnote(gettext("Standard errors are unavailable with this method"))
  }

  if(options$ciInterval && ci.possible){
    tab$addColumnInfo(name = "lower", title = gettext("Lower"), type = "number",
                      overtitle = gettextf("%s%% CI", options[['ciIntervalInterval']]*100))
    tab$addColumnInfo(name = "upper", title = gettext("Upper"), type = "number",
                      overtitle = gettextf("%s%% CI", options[['ciIntervalInterval']]*100))
  } else if(options$ciInterval){
    tab$addFootnote(gettext("Confidence intervals are unavailable with this method."))
  }

  if(method == "methodMLE"){
    tab$addCitation(.ldAllTextsList()$references$fitdistrplus)

    if(options$ciInterval || options$outputSE){
      tab$addCitation(.ldAllTextsList()$references$car)
    }

    if(options$ciInterval && !options$outputSE){
      tab$addFootnote(gettext("Confidence intervals were calculated using the delta method."))
    } else if(!options$ciInterval && options$outputSE){
      tab$addFootnote(gettext("Standard errors were calculated using the delta method."))
    } else if(options$ciInterval && options$outputSE){
      tab$addFootnote(gettext("Standard errors and confidence intervals were calculated using the delta method."))
    }
  }


  container[['estParametersTable']] <- tab

  return(tab)
}

.ldOptionsDeterminePlotLimits <- function(options, switch = TRUE) {
  options[['range_x']] <- c(options[['min_x']], options[['max_x']])

  if(switch) {

    if(options[['highlightType']] == "minmax"){
      options[['highlightmin']] <- options[['min']]
      options[['highlightmax']] <- options[['max']]
    } else if(options[['highlightType']] == "lower"){
      options[['highlightmin']] <- options[['range_x']][1]
      options[['highlightmax']] <- options[['lower_max']]
    } else if(options[['highlightType']] == "upper"){
      options[['highlightmin']] <- options[['upper_min']]
      options[['highlightmax']] <- options[['range_x']][2]
    } else{
      options[['highlightmin']] <- options[['highlightmax']] <- NULL
    }

  } else {

    options[['highlightmin']] <- options[['min']]
    options[['highlightmax']] <- options[['max']]

  }

  options
}

### Fit distributions ----
### MLE stuff ----
.ldMLE <- function(jaspResults, variable, options, ready, errors, fillTable, analyticEstimates = NULL, normality=FALSE,...){
  ready <- ready && isFALSE(errors)
  # override MLE option if any assess fit method is requested
  options[["methodMLE"]] <- options[["methodMLE"]] || .ldAnyAssessFitRequested(options)
  # jump out if mle not desired
  if(!options[["methodMLE"]]) return()

  mleContainer <- .ldGetFitContainer(jaspResults, options, "mleContainer", gettext("Maximum likelihood"), 7, errors)

  # parameter estimates
  if(is.null(analyticEstimates)) {
    mleResults   <- .ldMLEResults(mleContainer, variable, options, ready, options$distNameInR)
    mleEstimatesTable  <- .ldEstimatesTable(mleContainer, options, TRUE, TRUE, "methodMLE")
  } else {
    mleResults <- analyticEstimates
    mleEstimatesTable  <- .ldEstimatesTable(mleContainer, options, mleResults$ci.possible, mleResults$se.possible, "methodAnalyticMLE")
  }
  fillTable(mleEstimatesTable, mleResults, options, ready, ...)

  # fit assessment
  mleFitContainer    <- .ldGetFitContainer(mleContainer, options, "mleFitAssessment", gettext("Fit Assessment"), 8)

  # fit statistics
  mleFitStatistics   <- .ldFitStatisticsTable(mleFitContainer, options, "methodMLE")
  mleFitStatisticsResults <- .ldFitStatisticsResults(mleContainer, mleResults$fitdist, variable, options, ready, normality)
  .ldFillFitStatisticsTable(mleFitStatistics, mleFitStatisticsResults, options, ready, normality)
  # fit plots
  .ldFitPlots(mleFitContainer, mleResults$fitdist$estimate, options, variable, ready)
}

.ldAnyAssessFitRequested <- function(options) {
  isTRUE(options[["kolmogorovSmirnov"]]) ||
  isTRUE(options[["cramerVonMisses"]]) ||
  isTRUE(options[["andersonDarling"]]) ||
  isTRUE(options[["lillienfors"]]) ||
  isTRUE(options[["shapiroWilk"]]) ||
  isTRUE(options[["chiSquare"]]) ||
  isTRUE(options[["estPDF"]]) ||
  isTRUE(options[["estPMF"]]) ||
  isTRUE(options[["estCDF"]]) ||
  isTRUE(options[["qqplot"]]) ||
  isTRUE(options[["ppplot"]])
}

.ldMLEResults <- function(mleContainer, variable, options, ready, distName){
  if(!ready) return()
  if(!is.null(mleContainer[['mleResults']])) return(mleContainer[['mleResults']]$object)

  starts <- options$pars
  if(!is.null(options$fix.pars)){
    starts[names(options$fix.pars)] <- NULL
  }

  results <- list()
  results$fitdist <- try(fitdistrplus::fitdist(data = variable, distr = distName, method = "mle",
                                               start = starts, fix.arg = options$fix.pars,
                                               keepdata = FALSE, optim.method = "L-BFGS-B",
                                               lower = options$lowerBound, upper = options$upperBound), silent = TRUE)

  if(isTryError(results)){
    results$fitdist <- try(MASS::fitdistr(x = variable, densfun = options$pdfFun, start = starts,
                                          lower = options$lowerBound, upper = options$upperBound), silent = TRUE)
  }

  if(isTryError(results)){
    args <- list(x = variable, densfun = options[['pdfFun']], start = starts, lower = options[['lowerBound']], upper = options[['upperBound']])
    args <- c(args, options[['fix.pars']])
    results$fitdist <- try(do.call(MASS::fitdistr, args), silent = TRUE)
  }

  if(isTryError(results)) {
    results$fitdist <- try(.ldMLEoptim(x = variable, densFun = options[['pdfFun']], start = starts,
                                       lower = options[['lowerBound']], upper = options[['upperBound']],
                                       fixPars = options[['fix.pars']]), silent = TRUE)
  }

  if(isTryError(results)){
    results$fitdist <- try(fitdistrplus::fitdist(data = variable, distr = distName, method = "mle",
                                                 start = starts, fix.arg = options$fix.pars,
                                                 keepdata = FALSE), silent = TRUE)
  } else{
    results$fitdist$convergence <- 0
  }

  if(isTryError(results)){
    mleContainer$setError(.ldAllTextsList()$feedback$fitdistrError)
    return()
  }

  if(is.null(results$fitdist$vcov)){
    mleContainer$setError(.ldAllTextsList()$feedback$vcovNA)
    #mleContainer[['estParametersTable']]$addFootnote(.ldAllTextsList()$feedback$vcovNA)
    results$fitdist$vcov <- diag(length(results$fitdist$estimate))

    include.se <- FALSE
  } else{
    include.se <- TRUE
  }

  results$structured <- .ldStructureResults(results$fitdist, options, include.se)

  mleContainer[['mleResults']] <- createJaspState(object = results, dependencies = c(options$parValNames, "ciIntervalInterval", "parametrization", "biasCorrected"))

  return(results)
}

.ldMLEoptim <- function(x, densFun, start, lower, upper, fixPars) {

  .ldMinusLogLikelihood <- function(pars, data) {
    args <- c(as.list(pars), as.list(fixPars), list(x = as.vector(data), log = TRUE))
    logliks <- do.call(densFun, args)

    return(-sum(logliks))
  }

  optimResults <- optim(par = start, fn = .ldMinusLogLikelihood, method = "L-BFGS-B",
                        lower = lower, upper = upper, hessian = TRUE, data = x)
  optimResults$vcov <- solve(optimResults[["hessian"]])

  return(optimResults)
}

.ldStructureResults <- function(fit, options, include.se){
  if(is.null(fit)) return()

  res <- sapply(options$transformations,
                function(tr) car::deltaMethod(fit$estimate, tr, fit$vcov, level = options$ciIntervalInterval))
  rownames(res) <- c("estimate", "se", "lower", "upper")
  res <- t(res)
  res <- cbind(par = rownames(res), res)
  res <- as.data.frame(res)

  if(!include.se){
    res$se <- res$lower <- res$upper <- NA
  }
  return(res)
}
#### Fit assessment ----
.ldFitStatisticsTable <- function(fitContainer, options, method){
  if(!is.null(fitContainer[['fitStatisticsTable']])) return()

  allTests <- c("kolmogorovSmirnov", "cramerVonMisses", "andersonDarling", "lillienfors", "shapiroWilk", "chiSquare")
  optionsTests <- allTests %in% names(options)
  whichTests <- unlist(options[allTests[optionsTests]])

  if(is.null(whichTests) || all(!whichTests)) return()

  tab <- createJaspTable(title = gettext("Fit Statistics"))
  tab$position <- 1
  tab$dependOn(c(method, allTests[optionsTests]))
  tab$setExpectedSize(rows = sum(whichTests))


  tab$addColumnInfo(name = "test",      title = gettext("Test"),      type = "string")
  tab$addColumnInfo(name = "statistic", title = gettext("Statistic"), type = "number")
  tab$addColumnInfo(name = "p.value",   title = gettext("p"),         type = "pvalue")

  tab$addCitation(.ldAllTextsList()$references$goftest)

  fitContainer[['fitStatisticsTable']] <- tab

  return(tab)
}

.ldFitStatisticsResults <- function(fitContainer, fit, variable, options, ready, normality=FALSE){
  if(!ready || fitContainer$getError()) return()
  if(is.null(fit)) return()
  if(!is.null(fitContainer[['fitStatisticsResults']])) return(fitContainer[['fitStatisticsResults']]$object)

  allTests <- c("kolmogorovSmirnov", "cramerVonMisses", "andersonDarling", "lillienfors", "shapiroWilk", "chiSquare")
  tests <- allTests[allTests %in% names(options)]

  res <- data.frame(test = tests, statistic = numeric(length = length(tests)), p.value = numeric(length = length(tests)))

  pars <- c(as.list(fit$estimate), options$fix.pars)

  for(test in tests){
    arg <- switch (test,
                   "kolmogorovSmirnov" = c(list(x = variable, y = options$cdfFun), pars),
                   "lillienfors" = list(x = variable),
                   "shapiroWilk" = list(x = variable),
                   "chiSquare" = list(x = as.numeric(table(variable)),
                                      p = do.call(options[['pdfFun']],
                                                  utils::modifyList(pars,
                                                                    list(x = as.numeric(names(table(variable))))
                                                  )
                                      ),
                                      rescale.p = TRUE),
                   c(list(x = variable, null = options$cdfFun, estimated=TRUE), pars)
    )

    fun <- switch (test,
                   "kolmogorovSmirnov" = stats::ks.test,
                   "cramerVonMisses"   = goftest::cvm.test,
                   "andersonDarling"   = goftest::ad.test,
                   "lillienfors"       = nortest::lillie.test,
                   "shapiroWilk"       = nortest::sf.test,
                   "chiSquare"         = stats::chisq.test
    )

    # exact normality tests are full of special cases, here they are intercepted
    if (normality) {
      if (test == "cramerVonMisses" && length(variable) > 7) {
        arg <- list(x = variable)
        fun <- nortest::cvm.test
      } else if (test == "andersonDarling" && length(variable) > 7) {
        arg <- list(x = variable)
        fun <- nortest::ad.test
      } else if (test == "lillienfors" && length(variable) < 5) {
        fun <- function(x) {
          return(list(statistic = NA, p.value = NA))
        }
      } else if (test == "shapiroWilk" && (length(variable) < 5 || length(variable) > 5000)) {
        fun <- stats::shapiro.test
      }
    } else {
      if (test == "shapiroWilk") {
        fun <- stats::shapiro.test
      } else if (test=="lillienfors") {
        fun <- function(x) {
          return(list(statistic = NA, p.value = NA))
        }
      }
    }

    compute <- do.call(fun, arg)
    res[res$test == test, "statistic"] <- as.numeric(compute$statistic)
    res[res$test == test, "p.value"]   <- as.numeric(compute$p.value)
  }

  fitContainer[['fitStatisticsResults']] <- createJaspState(object = res)

  return(res)
}

.ldFillFitStatisticsTable <- function(table, results, options, ready, normality=FALSE){
  if(!ready) return()
  if(is.null(results)) return()
  if(is.null(table)) return()


  allTests <- c("kolmogorovSmirnov", "cramerVonMisses", "andersonDarling", "lillienfors", "shapiroWilk", "chiSquare")
  tests <- allTests[allTests %in% names(options)]
  testNames <- c(gettext("Kolmogorov-Smirnov"),
                 gettext("Cramér-von Mises"),
                 gettext("Anderson-Darling"),
                 gettext("Lillienfors"),
                 gettext("Shapiro-Wilk"),
                 gettext("Chi-square"))[allTests %in% names(options)]

  whichTests <- unlist(options[tests])

  results$test <- testNames
  rownames(results) <- tests
  res <- results[whichTests,]

  table$setData(res)

  if (!normality && (isTRUE(options[["cramerVonMisses"]]) || isTRUE(options[["andersonDarling"]]))) {
    table$addFootnote(gettext("Using Brown (1980) approximation which tends to be innacurate for small sample sizes."),
                      rowNames = c("cramerVonMisses", "andersonDarling"))
    table$addCitation(.ldAllTextsList()$references$brown)
  }

  return()
}
### Fill plots----
.ldFitPlots <- function(fitContainer, estimates, options, variable, ready){
  estimates <- c(estimates, options$fix.pars)
  if(is.null(fitContainer[['estPDF']]) && isTRUE(options$estPDF)){
    pdfplot <- createJaspPlot(title = gettext("Histogram vs. Theoretical PDF"))
    pdfplot$dependOn(c("estPDF"))
    pdfplot$position <- 2
    fitContainer[['estPDF']] <- pdfplot

    if(ready && !fitContainer$getError())
      .ldFillEstPDFPlot(pdfplot, estimates, options, variable)
  }

  if(is.null(fitContainer[['estPMF']]) && isTRUE(options$estPMF)){
    pmfplot <- createJaspPlot(title = gettext("Histogram vs. Theoretical PMF"))
    pmfplot$dependOn(c("estPMF"))
    pmfplot$position <- 2
    fitContainer[['estPMF']] <- pmfplot

    if(ready && !fitContainer$getError())
      .ldFillEstPMFPlot(pmfplot, estimates, options, variable)
  }

  if(is.null(fitContainer[['qqplot']]) && options$qqplot){
    qqplot <- createJaspPlot(title = gettext("Q-Q plot"))
    qqplot$dependOn(c("qqplot", "qqPlotCi", "qqPlotCiLevel"))
    qqplot$position <- 3
    fitContainer[['qqplot']] <- qqplot

    if(ready && !fitContainer$getError())
      .ldFillQQPlot(qqplot, estimates, options, variable, ci = options[["qqPlotCi"]], ciLevel = options[["qqPlotCiLevel"]])
  }

  if(is.null(fitContainer[['estCDF']]) && options$estCDF){
    cdfplot <- createJaspPlot(title = gettext("Empirical vs. Theoretical CDF"))
    cdfplot$dependOn(c("estCDF"))
    cdfplot$position <- 4
    fitContainer[['estCDF']] <- cdfplot

    if(ready && !fitContainer$getError())
      .ldFillEstCDFPlot(cdfplot, estimates, options, variable)
  }

  if(is.null(fitContainer[['ppplot']]) && options$ppplot){
    ppplot <- createJaspPlot(title = gettext("P-P plot"))
    ppplot$dependOn(c("ppplot", "ppPlotCi", "ppPlotCiLevel"))
    ppplot$position <-5
    fitContainer[['ppplot']] <- ppplot

    if(ready && !fitContainer$getError())
      .ldFillPPPlot(ppplot, estimates, options, variable, ci = options[["ppPlotCi"]], ciLevel = options[["ppPlotCiLevel"]])
  }

  return()
}

.ldFillQQPlot <- function(qqplot, estParameters, options, variable, ci = FALSE, ciLevel = 0.95){
  estParameters <- as.list(estParameters)

  sample <- sort(variable)
  n      <- length(sample)
  p      <- stats::ppoints(n)

  args        <- estParameters
  args[["p"]] <- p

  theoretical <- do.call(options[["qFun"]], args)
  lmFit       <- lm(sample~theoretical)
  intercept   <- coefficients(lmFit)[[1]]
  slope       <- coefficients(lmFit)[[2]]

  df <- data.frame(sample = sample, theoretical = theoretical)

  if(isTRUE(ci)) {
    # Fox, J. (2016) Applied Regression Analysis and Generalized Linear Models, Third Edition. Sage.
    # Chapter 3.1.3
    alpha       <- 1-ciLevel
    args        <- estParameters
    args[["x"]] <- theoretical
    pdf         <- do.call(options[["pdfFun"]], args)
    se <- sqrt(p * (1 - p) / n) * slope / pdf

    df[["upper"]] <- theoretical + se * qnorm(alpha/2, lower.tail = FALSE)
    df[["lower"]] <- theoretical + se * qnorm(alpha/2, lower.tail = TRUE)

    ciLayer <-
      ggplot2::geom_ribbon(
        mapping = ggplot2::aes(x = theoretical, ymin = lower, ymax = upper),
        fill = "steelblue", color = "black", alpha = 0.5
      )
  } else {
    ciLayer <- NULL
  }

  yPoints <- as.vector(as.matrix(df))
  yBreaks <- jaspGraphs::getPrettyAxisBreaks(yPoints)
  yLabs   <- jaspGraphs::axesLabeller(yBreaks)
  yRange  <- range(c(yPoints, yBreaks))
  xBreaks <- jaspGraphs::getPrettyAxisBreaks(theoretical)
  xLabs   <- jaspGraphs::axesLabeller(xBreaks)
  xRange  <- range(c(theoretical, xBreaks))

  p <- ggplot2::ggplot(data = df, ggplot2::aes(sample = sample)) +
    ciLayer +
    ggplot2::geom_line(mapping = ggplot2::aes(x = theoretical, y = theoretical), size = 1) +
    ggplot2::stat_qq(distribution = options[['qFun']], dparams = estParameters, shape = 21, fill = "grey", size = 3) +
    ggplot2::scale_y_continuous(name = gettext("Sample"),      breaks = yBreaks, labels = yLabs, limits = yRange) +
    ggplot2::scale_x_continuous(name = gettext("Theoretical"), breaks = xBreaks, labels = xLabs, limits = xRange) +
    jaspGraphs::themeJaspRaw() +
    jaspGraphs::geom_rangeframe()

  qqplot$plotObject <- p

  return()
}

.ldFillEstPDFPlot <- function(pdfplot, estParameters, options, variable){
  p <- ggplot2::ggplot(data = data.frame(variable = variable), ggplot2::aes(x = variable)) +
    ggplot2::geom_histogram(ggplot2::aes(y = ..density..), fill = "grey", col = "black") +
    ggplot2::stat_function(fun = options[['pdfFun']], args = as.list(estParameters), size = 1.5) +
    ggplot2::geom_rug() +
    ggplot2::scale_x_continuous(limits = range(variable),
                                breaks = pretty(range(variable))) +
    ggplot2::ylab(gettext("Density")) + ggplot2::xlab(options[['variable']])

  p <- jaspGraphs::themeJasp(p)

  pdfplot$plotObject <- p

  return()
}

.ldFillEstPMFPlot <- function(pmfplot, estimates, options, variable){
  range <- range(variable)

  mids <- range[1]:range[2]
  counts <- sapply(mids, function(i) sum(variable == i))
  dat  <- data.frame(counts = counts, mids = mids, pmf = do.call(options$pdfFun, c(list(x = mids), estimates)))

  xBreaks <- unique(floor(jaspGraphs::getPrettyAxisBreaks(mids)))
  xLabs   <- jaspGraphs::axesLabeller(xBreaks)
  p <- ggplot2::ggplot(data = dat, ggplot2::aes(x = mids, y = counts/sum(counts))) +
    ggplot2::geom_bar(stat="identity", fill = "grey", colour = "black") +
    jaspGraphs::geom_point(ggplot2::aes(x = mids, y = pmf)) +
    ggplot2::scale_x_continuous(limits = range + c(-0.5, 0.5),
                                expand = c(0.1, 0.1),
                                breaks = xBreaks,
                                labels = xLabs) +
    ggplot2::xlab(options$variable) +
    ggplot2::ylab(gettext("Probability Mass"))

  p <- jaspGraphs::themeJasp(p)

  pmfplot$plotObject <- p

  return()
}

.ldFillPPPlot <- function(ppplot, estParameters, options, variable, ci = FALSE, ciLevel = 0.95){
  estParameters <- as.list(estParameters)

  variable <- sort(variable)
  n        <- length(variable)
  sample   <- stats::ppoints(n)

  args        <- estParameters
  args[["q"]] <- variable

  theoretical <- sort(do.call(options[['cdfFun']], args))

  df <- data.frame(sample = sample, theoretical = theoretical)

  if(isTRUE(ci)) {
    # Stirling, W. D. (1982). Enhancements to aid interpretation of probability plots. Journal of the Royal Statistical Society: Series D (The Statistician), 31(3), 211-220.
    # Quesenberry, C. P., & Hales, C. (1980). Concentration bands for uniformity plots. Journal of Statistical Computation and Simulation, 11(1), 41-53.
    i     <- seq_along(variable)
    alpha <- 1-ciLevel

    df[["lower"]] <- qbeta(  alpha/2, i, n-i+1)
    df[["upper"]] <- qbeta(1-alpha/2, i, n-i+1)

    ciLayer <- ggplot2::geom_ribbon(
      mapping = ggplot2::aes(y = sample, xmin = lower, xmax = upper),
      fill = "steelblue", color = "black", alpha = 0.5
    )
  } else {
    ciLayer <- NULL
  }

  p <- ggplot2::ggplot(data = df) +
    ciLayer +
    jaspGraphs::geom_abline2(slope = 1, intercept = 0, size = 1) +
    jaspGraphs::geom_point(ggplot2::aes(x = theoretical, y = sample)) +
    ggplot2::xlab(gettext("Theoretical")) + ggplot2::ylab(gettext("Sample")) +
    ggplot2::scale_x_continuous(limits = 0:1, expand = ggplot2::expand_scale(mult = 0, add = c(0.05, 0.1))) +
    ggplot2::scale_y_continuous(limits = 0:1, expand = ggplot2::expand_scale(mult = 0, add = c(0.05, 0.1))) +
    jaspGraphs::themeJaspRaw() +
    jaspGraphs::geom_rangeframe()

  ppplot$plotObject <- p

  return()
}

.ldFillEstCDFPlot <- function(cdfplot, estParameters, options, variable){
  p <- ggplot2::ggplot(data = data.frame(variable = variable), ggplot2::aes(x = variable)) +
    ggplot2::stat_ecdf(geom = "step") +
    ggplot2::geom_rug() +
    ggplot2::stat_function(fun = options[['cdfFun']], args = as.list(estParameters), size = 1.5) +
    ggplot2::scale_x_continuous(limits = range(variable), breaks = pretty(range(variable))) +
    ggplot2::scale_y_continuous(limits = 0:1) +
    ggplot2::ylab(substitute(p~(X <= x), list(p = gettext("Probability")))) +
    ggplot2::xlab(options[['variable']])

  p <- jaspGraphs::themeJasp(p)

  cdfplot$plotObject <- p

  return()
}

#### Plots ----
.ldShowDistribution <- function(jaspResults, options, name, parSupportMoments, formulaPDF=NULL, formulaPMF=NULL, formulaCDF=NULL, formulaCMF=NULL, formulaQF=NULL) {
  .ldIntroText(jaspResults, options, name)
  parSupportMoments(jaspResults, options)

  if (!is.null(formulaPDF) && is.null(formulaPMF)) {
    pdfContainer <- .ldGetPlotContainer(jaspResults, options, "plotPDF", gettext("Probability Density Function"), 3)
    .ldFillPDFContainer(pdfContainer, options, formulaPDF)
  } else if (is.null(formulaPDF) && !is.null(formulaPMF)) {
    pmfContainer <- .ldGetPlotContainer(jaspResults, options, "plotPMF", gettext("Probability Mass Function"), 3)
    .ldFillPMFContainer(pmfContainer, options, formulaPMF)
  }

  if (!is.null(formulaCDF) && is.null(formulaCMF)) {
    cdfContainer <- .ldGetPlotContainer(jaspResults, options, "plotCDF", gettext("Cumulative Distribution Function"), 4)
    .ldFillCDFContainer(cdfContainer, options, formulaCDF)
  } else if(is.null(formulaCDF) && !is.null(formulaCMF)) {
    cmfContainer <- .ldGetPlotContainer(jaspResults, options, "plotCMF", gettext("Cumulative Distribution Function"), 4)
    .ldFillCMFContainer(cmfContainer, options, formulaCMF)
  }

  if (!is.null(formulaQF)) {
    qfContainer <- .ldGetPlotContainer(jaspResults, options, "plotQF",   gettext("Quantile Function"), 5)
    .ldFillQFContainer(qfContainer, options, formulaQF)
  }

  return()
}
.ldGetPlotContainer <- function(jaspResults, options, name, title, position){
  if(!is.null(jaspResults[[name]])){
    plotsContainer <- jaspResults[[name]]
  } else{
    plotsContainer <- createJaspContainer(title = gettext(title))
    plotsContainer$position <- position

    if("parametrization" %in% names(options)){
      plotsContainer$dependOn(c(options$parValNames, "parametrization"))
    } else{
      plotsContainer$dependOn(c(options$parValNames))
    }

    jaspResults[[name]] <- plotsContainer
  }

  return(plotsContainer)
}

.ldFormulaPlot <- function(container, options, formulaText = NULL, depend = NULL){
  if(!options$formulas) return()
  if(!is.null(container[['formula']])) return()

  formula <- createJaspHtml(title = gettext("Formula"), elementType = "h1")
  formula$position <- 3
  formula$dependOn(c("formulas", depend))

  if(is.function(formulaText)){
    formula[['text']] <- formulaText(options)
  } else if(is.character(formulaText)){
    formula[['text']] <- formulaText
  }

  container[['formula']] <- formula
}

### Plot distributions ----
### Plot PDF ----
.ldFillPDFContainer <- function(pdfContainer, options, formulaText = NULL, explanationText = NULL){
  if(!options$plotPDF) return()

  .ldExplanationPDF(pdfContainer, options, explanationText)
  .ldPlotPDF(pdfContainer, options)
  .ldFormulaPlot(pdfContainer, options, formulaText, "plotPDF")

  return()
}

.ldExplanationPDF <- function(pdfContainer, options, explanationText = NULL){
  if(!options$explanatoryText) return()
  if(!is.null(pdfContainer[['explanation']])) return()

  explanation <- createJaspHtml()
  explanation$dependOn(c("plotPDF", "explanatoryText"))
  explanation$position <- 1

  if(is.null(explanationText)){
    explanationText <- .ldAllTextsList()$explanations$pdf
  }

  explanation[['text']] <- explanationText
  pdfContainer[['explanation']] <- explanation

}

.ldPlotPDF <- function(pdfContainer, options){
  if(!is.null(pdfContainer[['pdfPlot']])) return()

  pdfPlot <- createJaspPlot(title = gettext("Density Plot"), width = 600, height = 320)
  pdfPlot$position <- 2 # after explanation, before formula
  pdfPlot$dependOn(c('plotPDF', 'min_x', 'max_x', 'highlightType',
                     'highlightDensity', 'highlightProbability',
                     'min', 'max', 'lower_max', 'upper_min'))
  pdfContainer[['pdfPlot']] <- pdfPlot

  .ldFillPlotPDF(pdfPlot, options)

  return()
}

.ldFillPlotPDF <- function(pdfPlot, options){

  # basic density curve
  plot <- ggplot2::ggplot(data = data.frame(x = options[['range_x']]), ggplot2::aes(x = x)) +
    ggplot2::stat_function(fun = options[['pdfFun']], n = 101, args = options[['pars']], size = 1.25)
  # highlight probability
  if(options$highlightProbability){
    # determine plotting region
    args <- options[['pars']]
    argsPDF <- options[['pars']]
    if(options[['highlightType']] == "minmax"){
      args[['q']] <- c(options[['highlightmin']], options[['highlightmax']])
    } else if(options[['highlightType']] == "lower"){
      args[['q']] <- c(-Inf, options[['highlightmax']])
    } else if(options[['highlightType']] == "upper"){
      args[['q']] <- c(options[['highlightmin']], Inf)
    }

    # calculate value under the curve
    cdfValue <- do.call(options[['cdfFun']], args)
    cdfValue <- cdfValue[2] - cdfValue[1]

    # round value under the curve for plotting
    cdfValueRound <- round(cdfValue, 2)
    if(cdfValueRound %in% c(0, 1)){
      cdfValueRound <- round(cdfValue, 3)
    }

    # calculate position of the geom_text
    args[['q']] <- c(options[['highlightmin']], options[['highlightmax']])
    argsPDF[['x']] <- seq(args[['q']][1], args[['q']][2], length.out = 20)
    w <- do.call(options[['pdfFun']], argsPDF)
    argsPDF[['x']] <- argsPDF[['x']][!is.na(w)]
    w <- w[!is.na(w)]
    x <- weighted.mean(argsPDF[['x']], w)
    argsPDF[['x']] <- x
    y <- do.call(options[['pdfFun']], argsPDF)
    y <- y[!is.na(y)]

    argsPDF[['x']] <- seq(options[['range_x']][1], options[['range_x']][2], length.out = 101)
    max_y <- max(do.call(options[['pdfFun']], argsPDF), na.rm = TRUE)

    if(length(y) == 0) {
        y <- 0
    } else if(y < 0.1*max_y) {
        y <- y*5
    } else {
        y <- y/3
    }

    plot <- plot +
      ggplot2::stat_function(fun = options[['pdfFun']], n = 101, args = options[['pars']], geom = "area",
                             xlim = args[['q']], fill = "steelblue") +
      ggplot2::geom_text(data = data.frame(x = x, y = y, label = cdfValueRound),
                         mapping = ggplot2::aes(x = x, y = y, label = label), size = 8, parse = TRUE)
  }

  # highlight density
  if(options$highlightDensity){
    # determine plotting region
    args <- options[['pars']]
    if(options[['highlightType']] == "minmax"){
      args[['x']] <- c(options[['highlightmin']], options[['highlightmax']])
    } else if(options[['highlightType']] == "lower"){
      args[['x']] <- options[['highlightmax']]
    } else if(options[['highlightType']] == "upper"){
      args[['x']] <- options[['highlightmin']]
    }


    pdfValue <- do.call(options[['pdfFun']], args)

    segment_data <- data.frame(x = options[['range_x']][1] + (options[['range_x']][2]-options[['range_x']][1])/15,
                               xend = args[['x']], y = pdfValue)

    # plot density
    plot <- plot +
      ggplot2::geom_segment(data = segment_data,
                            mapping = ggplot2::aes(x = x, xend = xend, y = y, yend = y),
                            linetype = 2) +
      ggplot2::geom_text(data = data.frame(x = options[['range_x']][1], y = pdfValue, label = round(pdfValue, 2)),
                         ggplot2::aes(x = x, y = y, label = label), size = 6) +
      ggplot2::geom_linerange(x = args[['x']], ymin = 0, ymax = pdfValue, linetype = 2) +
      jaspGraphs::geom_point(x = args[['x']], y = pdfValue, size = 5)
  }

  lineData <- ggplot2::layer_data(plot, 1)
  lineData <- lineData[is.finite(lineData$y),]
  plot <- plot + ggplot2::ylab(gettext("Density")) +
    ggplot2::scale_x_continuous(limits = options[['range_x']],  breaks = jaspGraphs::getPrettyAxisBreaks(options[['range_x']])) +
    ggplot2::scale_y_continuous(limits = c(0, max(lineData$y)), breaks = jaspGraphs::getPrettyAxisBreaks(lineData$y), expand = ggplot2::expansion(mult=c(0,0.2)))

  plot <- jaspGraphs::themeJasp(plot)

  pdfPlot[['plotObject']] <- plot
}

### Plot CDF ----
.ldFillCDFContainer <- function(cdfContainer, options, formulaText = NULL, explanationText = NULL){
  if(!options$plotCDF) return()

  .ldExplanationCDF(cdfContainer, options, explanationText)
  .ldPlotCDF(cdfContainer, options)
  .ldFormulaPlot(cdfContainer, options, formulaText, "plotCDF")
}

.ldExplanationCDF <- function(cdfContainer, options, explanationText = NULL){
  if(!options$explanatoryText) return()
  if(!is.null(cdfContainer[['explanation']])) return()

  explanation <- createJaspHtml()
  explanation$dependOn(c("plotCDF", "explanatoryText"))
  explanation$position <- 1

  if(is.null(explanationText)){
    explanationText <- .ldAllTextsList()$explanations$cdf
  }

  explanation[['text']] <- explanationText
  cdfContainer[['explanation']] <- explanation
}

.ldPlotCDF <- function(cdfContainer, options){
  if(!is.null(cdfContainer[['cdfPlot']])) return()

  cdfPlot <- createJaspPlot(title = gettext("Cumulative Probability Plot"), width = 600, height = 320)
  cdfPlot$position <- 2 # after explanation, before formula
  cdfPlot$dependOn(c('plotCDF', 'min_x', 'max_x', 'highlightType',
                     'highlightDensity', 'highlightProbability',
                     'min', 'max', 'lower_max', 'upper_min'))
  cdfContainer[['cdfPlot']] <- cdfPlot

  .ldFillPlotCDF(cdfPlot, options)

  return()
}

.ldFillPlotCDF <- function(cdfPlot, options){

  plot <- ggplot2::ggplot(data = data.frame(x = options[['range_x']]), ggplot2::aes(x = x)) +
    ggplot2::stat_function(fun = options[['cdfFun']], n = 101, args = options[['pars']], size = 1.25)


  args <- options[['pars']]
  if(options[['highlightType']] == "minmax"){
    args[['q']] <- c(options[['highlightmin']], options[['highlightmax']])
  } else if(options[['highlightType']] == "lower"){
    args[['q']] <- options[['highlightmax']]
  } else if(options[['highlightType']] == "upper"){
    args[['q']] <- options[['highlightmin']]
  }

  cdfValue <- do.call(options[['cdfFun']], args)
  point_data <- data.frame(x = args[['q']], y = cdfValue, col = as.factor(seq_along(args[['q']])))

  if(options$highlightProbability){
    # round value for plotting as text
    cdfValueRound <- round(cdfValue, 2)

    segment_data <- data.frame(xoffset = options[['range_x']][1] + (options[['range_x']][2]-options[['range_x']][1])/15,
                               x = options[['range_x']][1], xend = args[['q']], y = cdfValue, label = cdfValueRound)


    plot <- plot +
      ggplot2::geom_segment(data = segment_data,
                            mapping = ggplot2::aes(x = xoffset, xend = xend, y = y, yend = y), linetype = 2) +
      ggplot2::geom_text(data = segment_data, ggplot2::aes(x = x, y = y, label = label), size = 6) +
      ggplot2::geom_linerange(x = args[['q']], ymin = 0, ymax = cdfValue, linetype = 2) +
      jaspGraphs::geom_point(data = point_data, ggplot2::aes(x = x, y = y), size = 5)
  }

  if(options$highlightDensity){
    # determine plotting region
    pdfArgs <- args
    pdfArgs[['x']] <- pdfArgs[['q']]
    pdfArgs[['q']] <- NULL

    pdfValue <- do.call(options[['pdfFun']], pdfArgs)
    intercept <- cdfValue - args[['q']]*pdfValue
    slopeText <-  round(pdfValue, 2)

    line_data <- data.frame(slope = pdfValue, intercept = intercept, col = as.factor(1:length(pdfArgs[['x']])))

    plot <- plot +
      ggplot2::geom_abline(data = line_data, ggplot2::aes(slope = slope, intercept = intercept, col = col), size = 1) +
      jaspGraphs::geom_point (data = point_data, ggplot2::aes(x = x, y = y, col = col), size = 5) +
      jaspGraphs::scale_JASPcolor_discrete(name = gettext("Slope"), labels = as.character(slopeText))
  }


  plot <- plot +
    ggplot2::ylab(substitute(p~(X <= x), list(p = gettext("Probability")))) +
    ggplot2::scale_x_continuous(limits = options[['range_x']],
                                breaks = jaspGraphs::getPrettyAxisBreaks(options[['range_x']])) +
    ggplot2::scale_y_continuous(limits = c(0, 1))

  plot <- jaspGraphs::themeJasp(plot, legend.position = c(0.85, 0.4))

  cdfPlot[['plotObject']] <- plot
}

### Plot QF ----
.ldFillQFContainer <- function(qfContainer, options, formulaText = NULL, explanationText = NULL){
  if(!options$plotQF) return()

  .ldExplanationQF(qfContainer, options, explanationText)
  .ldPlotQF(qfContainer, options)
  .ldFormulaPlot(qfContainer, options, formulaText, "plotQF")
}

.ldExplanationQF <- function(qfContainer, options, explanationText){
  if(!options$explanatoryText) return()
  if(!is.null(qfContainer[['explanation']])) return()

  explanation <- createJaspHtml()
  explanation$dependOn(c("plotQF", "explanatoryText"))
  explanation$position <- 1

  if(is.null(explanationText)){
    explanationText <- .ldAllTextsList()$explanations$qf
  }

  explanation[['text']] <- explanationText
  qfContainer[['explanation']] <- explanation
}

.ldPlotQF <- function(qfContainer, options){
  if(!is.null(qfContainer[['qfPlot']])) return()

  qfPlot <- createJaspPlot(title = gettext("Quantile Plot"), width = 600, height = 320)
  qfPlot$position <- 2 # after explanation, before formula
  qfPlot$dependOn(c('plotQF', 'min_x', 'max_x'))
  qfContainer[['qfPlot']] <- qfPlot

  .ldFillPlotQF(qfPlot, options)

  return()

}

.ldFillPlotQF <- function(qfPlot, options){
  args <- options[['pars']]
  args[['q']] <- options[['range_x']]
  prange <- do.call(options[['cdfFun']], args)
  args[['q']] <- NULL

  plot <- ggplot2::ggplot(data = data.frame(x = prange), ggplot2::aes(x = x)) +
    ggplot2::stat_function(fun = options[['qFun']], n = 151, args = args, size = 1.25)  +
    ggplot2::ylab("x") + ggplot2::xlab(substitute(p~(X <= x), list(p = gettext("Probability")))) +
    ggplot2::scale_x_continuous(limits = 0:1) +
    ggplot2::scale_y_continuous(limits = options[['range_x']],
                                breaks = jaspGraphs::getPrettyAxisBreaks(options[['range_x']]))

  plot <- jaspGraphs::themeJasp(plot)

  qfPlot[['plotObject']] <- plot
}

### Plot PMF ----
.ldFillPMFContainer <- function(pmfContainer, options, formulaText = NULL, explanationText = NULL){
  if(!options$plotPMF) return()

  .ldExplanationPMF(pmfContainer, options, explanationText)
  .ldPlotPMF(pmfContainer, options)
  .ldFormulaPlot(pmfContainer, options, formulaText, "plotPMF")

  return()
}

.ldExplanationPMF <- function(pmfContainer, options, explanationText = NULL){
  if(!options$explanatoryText) return()
  if(!is.null(pmfContainer[['explanation']])) return()

  explanation <- createJaspHtml()
  explanation$dependOn(c("plotPMF", "explanatoryText"))
  explanation$position <- 1

  if(is.null(explanationText)){
    explanationText <- .ldAllTextsList()$explanations$pmf
  }

  explanation[['text']] <- explanationText
  pmfContainer[['explanation']] <- explanation

}

.ldPlotPMF <- function(pmfContainer, options){
  if(!is.null(pmfContainer[['pmfPlot']])) return()

  pmfPlot <- createJaspPlot(title = gettext("Probability Mass Plot"), width = 600, height = 320)
  pmfPlot$position <- 2 # after explanation, before formula
  pmfPlot$dependOn(c('plotPMF',
                     'min_x', 'max_x',
                     'highlightDensity', 'highlightProbability',
                     'min', 'max'))
  pmfContainer[['pmfPlot']] <- pmfPlot

  .ldFillPlotPMF(pmfPlot, options)

  return()
}

.ldFillPlotPMF <- function(pmfPlot, options){
  args <- options[['pars']]
  args[['x']] <- options[['range_x']][1]:options[['range_x']][2]

  # make a room next to the y-axis
  xlim <- options[['range_x']] + c(-0.1, 0.1) * diff(options[['range_x']]) + c(-0.8, 0.8)

  # if(diff(options[['range_x']]) <= 2){
  #   xlim[1] <- floor(xlim[1]) - 1
  #   xlim[2] <- ceiling(xlim[2]) + 1
  # }
  dat <- data.frame(x = args[['x']], y = do.call(options[['pdfFun']], args))

  # basic plot
  plot <- ggplot2::ggplot(data = dat, ggplot2::aes(x = x, y = y)) +
    ggplot2::geom_bar(stat="identity", fill = "grey", colour = "black", width = 0.8)

  # highlight probability
  if(options$highlightProbability){
    # determine plotting region
    args[['x']] <- options[['highlightmin']]:options[['highlightmax']]
    datHigh <- data.frame(x = args[['x']], pmf = do.call(options[['pdfFun']], args))

    # calculate value under the curve
    cdfValue <- sum(datHigh$pmf)

    # round value under the curve for plotting
    cdfValueRound <- round(cdfValue, 2)
    if(cdfValueRound %in% c(0, 1)){
      cdfValueRound <- round(cdfValue, 3)
    }

    # calculate position of the geom_text
    datShown <- datHigh[datHigh$x %in% dat$x, ]
    if(ncol(datShown) > 0) {
      x <- datShown$x[which.max(round(datShown$pmf, 3))]
      y <- datShown$pmf[which.max(round(datShown$pmf, 3))] + 0.1 * max(dat$y)
    } else{ # the entire highlight region is outside of displayed range
      x <- NA
      y <- NA
      cdfValueRound <- NA
    }

    plot <- plot +
      ggplot2::geom_bar(ggplot2::aes(x = x, y = pmf),
                        data = datShown, width = 0.8,
                        fill = "steelblue", stat = "identity") +
      ggplot2::geom_text(data = data.frame(x = x, y = y, label = cdfValueRound),
                         mapping = ggplot2::aes(x = x, y = y, label = label), size = 8, parse = TRUE)
  }

  # highlight density
  if(options$highlightDensity){
    # determine plotting region
    args <- options[['pars']]
    args[['x']] <- c(options[['highlightmin']], options[['highlightmax']])

    segment_data <- subset(dat, x %in% args[['x']])
    segment_data$xend <- segment_data$x
    segment_data$x    <- xlim[1] + 0.05 * diff(options[['range_x']])
    segment_data$xseg <- xlim[1] + 0.1 * diff(options[['range_x']])
    segment_data$label <- round(segment_data$y, 2)

    # make 10% margin to write values along axis
    #xlim <- xlim + c(-0.1, 0) * diff(options[['range_x']])
    # plot density
    plot <- plot +
      ggplot2::geom_bar(ggplot2::aes(x = xend, y = y), stat = "identity",
                        data = segment_data,
                        alpha = 0, colour = "black", size = 1.5, width = 0.8) +
      ggplot2::geom_segment(data = segment_data,
                            mapping = ggplot2::aes(x = xseg, xend = xend, y = y, yend = y),
                            linetype = 2) +
      ggplot2::geom_text(data = segment_data,
                         ggplot2::aes(x = x, y = y, label = label), size = 6)
  }

  breaks <- jaspGraphs::getPrettyAxisBreaks(options[['range_x']])
  # display only pretty integers
  breaks <- breaks[breaks %% 1 == 0]
  plot <- plot +
    ggplot2::ylab(substitute(p~(X == x), list(p = gettext("Probability")))) +
    ggplot2::scale_x_continuous(limits = xlim,
                                breaks = breaks,
                                labels = breaks,
                                expand = c(0, 0))

  plot <- jaspGraphs::themeJasp(plot)

  pmfPlot[['plotObject']] <- plot
}

### Plot CMF ----
.ldFillCMFContainer <- function(cmfContainer, options, formulaText = NULL, explanationText = NULL){
  if(!options$plotCMF) return()

  .ldExplanationCMF(cmfContainer, options, explanationText)
  .ldPlotCMF(cmfContainer, options)
  .ldFormulaPlot(cmfContainer, options, formulaText, "plotCMF")
}

.ldExplanationCMF <- function(cmfContainer, options, explanationText = NULL){
  if(!options$explanatoryText) return()
  if(!is.null(cmfContainer[['explanation']])) return()

  explanation <- createJaspHtml()
  explanation$dependOn(c("plotCMF", "explanatoryText"))
  explanation$position <- 1

  if(is.null(explanationText)){
    explanationText <- .ldAllTextsList()$explanations$cdf
  }

  explanation[['text']] <- explanationText
  cmfContainer[['explanation']] <- explanation
}

.ldPlotCMF <- function(cmfContainer, options){
  if(!is.null(cmfContainer[['cmfPlot']])) return()

  cmfPlot <- createJaspPlot(title = gettext("Cumulative Probability Plot"), width = 600, height = 320)
  cmfPlot$position <- 2 # after explanation, before formula
  cmfPlot$dependOn(c('plotCMF', 'min_x', 'max_x', 'highlightType',
                     'highlightDensity', 'highlightProbability',
                     'min', 'max', 'lower_max', 'upper_min'))
  cmfContainer[['cmfPlot']] <- cmfPlot

  .ldFillPlotCMF(cmfPlot, options)

  return()
}

.ldFillPlotCMF <- function(cmfPlot, options){
  args <- options[['pars']]
  args[['q']] <- options[['range_x']][1]:options[['range_x']][2]

  dat <- data.frame(x = args[['q']], y = do.call(options[['cdfFun']], args))

  # make a room next to the y-axis
  xlim <- options[['range_x']] + c(-0.1, 0.1) * diff(options[['range_x']]) + c(-0.8, 0.8)

  # basic plot
  plot <- ggplot2::ggplot(data = dat, ggplot2::aes(x = x, y = y)) +
    ggplot2::geom_bar(stat="identity", fill = "grey", colour = "black", width = 0.8)


  # determine plotting region
  args <- options[['pars']]
  args[['q']] <- c(options[['highlightmin']], options[['highlightmax']])

  pdfArgs <- args
  pdfArgs[['x']] <- pdfArgs[['q']]
  pdfArgs[['q']] <- NULL

  pdfValue <- do.call(options[['pdfFun']], pdfArgs)
  cmfValue <- do.call(options[['cdfFun']], args)

  pdfValueRound <-  round(pdfValue, 2)
  cmfValueRound <- round(cmfValue, 2)

  # is there anything to highlight on the plot? (i.e., is the highlighted region inside the range of x)
  highlights <- any(args[['q']] %in% dat$x)

  if(options$highlightDensity && highlights){
    # redraw the bars with tip colored highlighting the increment at the selected x
    datDens <- dat
    datDens$col <- "grey"
    if(options$highlightmin %in% datDens$x) {
      datDens[dat$x == options$highlightmin, "y"] <- cmfValue[1] - pdfValue[1]
      datDens <- rbind(datDens, data.frame(x = options$highlightmin, y = pdfValue[1], col = "blue"))
    }
    if(options$highlightmax %in% datDens$x){
      datDens[datDens$x == options$highlightmax, "y"] <- cmfValue[2] - pdfValue[2]
      datDens <- rbind(datDens, data.frame(x = options$highlightmax, y = pdfValue[2], col = "blue"))
    }
    datDens$col <- factor(datDens$col, levels = c("blue","grey"))
    segment_data <- data.frame(xseg = args[['q']],
                               xend = xlim[2] - 0.1*diff(options[['range_x']]),
                               x    = xlim[2] - 0.05*diff(options[['range_x']]),
                               ymin = cmfValue - pdfValue, ymax = cmfValue, ymid = cmfValue - 0.5*pdfValue,
                               labelDensity = pdfValueRound)

    plot <- plot +
      ggplot2::geom_bar(data = datDens, ggplot2::aes(x = x, y = y, fill = col), width = 0.8, stat = "identity", color = "black") +
      ggplot2::geom_segment(data = segment_data, ggplot2::aes(x = xseg, xend = xend, y = ymid, yend = ymid), linetype = 2) +
      ggplot2::geom_text(data = segment_data, ggplot2::aes(x = x, y = ymid, label = labelDensity), size = 6) +
      ggplot2::scale_fill_manual(values = c("steelblue", "grey"), guide = FALSE)
  }

  if(options$highlightProbability && highlights){
    segment_data <- subset(dat, x %in% args[['q']])
    segment_data$xend <- segment_data$x

    segment_data$x    <- xlim[1] + 0.05 * diff(options[['range_x']])
    segment_data$xseg <- xlim[1] + 0.1 * diff(options[['range_x']])
    segment_data$label <- round(segment_data$y, 2)

    plot <- plot +
      ggplot2::geom_bar(ggplot2::aes(x = xend, y = y), stat = "identity",
                        data = segment_data,
                        alpha = 0, colour = "black", size = 1.5, width = 0.8) +
      ggplot2::geom_segment(data = segment_data,
                            mapping = ggplot2::aes(x = xseg, xend = xend, y = y, yend = y),
                            linetype = 2) +
      ggplot2::geom_text(data = segment_data,
                         ggplot2::aes(x = x, y = y, label = label), size = 6)
  }

  breaks <- jaspGraphs::getPrettyAxisBreaks(options[['range_x']])
  # display only pretty integers
  breaks <- breaks[breaks %% 1 == 0]
  plot <- plot +
    ggplot2::ylab(substitute(p~(X <= x), list(p = gettext("Probability")))) +
    ggplot2::scale_x_continuous(limits = xlim,
                                breaks = breaks,
                                labels = breaks,
                                expand = c(0, 0)) +
    ggplot2::scale_y_continuous(limits = c(0, 1))

  plot <- jaspGraphs::themeJasp(plot)

  cmfPlot[['plotObject']] <- plot
}

#### Plot empirical ----
.ldPlotHistogram <- function(dataContainer, variable, options, ready, as = "scale"){
  if(!options$histogram) return()
  if(!is.null(dataContainer[['histogram']])) return()

  title <- switch(as, scale = gettext("Histogram"), gettext("Bar plot"))
  histPlot <- createJaspPlot(title = title, width = 500, height = 320)

  if(as != "scale"){
    histPlot$dependOn(c("histogram"))
  } else{
    histPlot$dependOn(c("histogramBins", "histogram"))
  }
  histPlot$position <- 3

  dataContainer[['histogram']] <- histPlot

  if(!ready) return()

  .ldFillPlotHistogram(histPlot, options, variable, as)

}

.ldFillPlotHistogram <- function(histPlot, options, variable, as = "scale"){
  if(as == "scale"){
    range <- range(variable)
    histData <- hist(variable,
                     breaks = seq(range[1], range[2], length.out = options[['histogramBins']]+1),
                     plot = FALSE)
    dat <- data.frame(counts = histData$counts, density = histData$density, mids = histData$mids)
  } else if(as == "discrete"){
    range <- range(variable)
    mids <- range[1]:range[2]
    counts <- sapply(mids, function(i) sum(variable == i))
    dat  <- data.frame(counts = counts, mids = mids)
  } else if(as == "factor"){
    levs <- levels(variable)
    mids <- seq_along(levs)
    range <- range(mids)
    counts <- sapply(levs, function(i) sum(variable == i))
    dat <- data.frame(counts = counts, mids = mids, labs = levs)
  }

  plot <- ggplot2::ggplot(data = dat, ggplot2::aes(x = mids, y = counts/sum(counts))) +
    ggplot2::geom_bar(stat="identity", fill = "grey", colour = "black") +
    ggplot2::xlab(options$variable) +
    ggplot2::ylab(gettextf("Rel. Freq (%s in bin)", options[['variable']]))

  if(as == "scale"){
    plot <- plot + ggplot2::scale_x_continuous(limits = range,
                                               expand = c(0.05, 0),
                                               breaks = jaspGraphs::getPrettyAxisBreaks(range))
  } else if (as == "discrete"){
    breaks <- pretty(range)
    breaks <- breaks[breaks %% 1 == 0]
    plot <- plot + ggplot2::scale_x_continuous(limits = range + c(-1, 1),
                                               breaks = breaks,
                                               labels = breaks,
                                               expand = c(0, 0))
  } else if (as == "factor"){
    plot <- plot + ggplot2::scale_x_continuous(limits = range + c(-1, 1),
                                               labels = dat$labs,
                                               breaks = dat$mids,
                                               expand = c(0, 0))
  }

  plot <- jaspGraphs::themeJasp(plot)
  histPlot[['plotObject']] <- plot
}

.ldPlotECDF <- function(dataContainer, variable, options, ready){
  if(!options[['ecdf']]) return()
  if(!is.null(dataContainer[['ecdf']])) return()

  ecdfPlot <- createJaspPlot(title = gettext("Empirical Cumulative Distribution"), width = 500, height = 320)

  ecdfPlot$dependOn(c("ecdf"))
  ecdfPlot$position <- 4

  dataContainer[['ecdf']] <- ecdfPlot

  if(!ready) return()

  .ldFillPlotECDF(ecdfPlot, options, variable)
}

.ldFillPlotECDF <- function(plot, options, variable){
  p <- ggplot2::ggplot(data = data.frame(variable = variable), ggplot2::aes(x = variable)) +
    ggplot2::stat_ecdf(geom = "step", size = 1.5) +
    ggplot2::geom_rug() +
    ggplot2::scale_x_continuous(limits = range(variable)*1.1) +
    ggplot2::xlab(options$variable) +
    ggplot2::ylab(substitute(f~(v <= x), list(f = gettext("Freq"), v = decodeColNames(options[['variable']]))))

  p <- jaspGraphs::themeJasp(p)
  plot[['plotObject']] <- p

}

#### Texts ----
.ldParsSupportMoments <- function(pars, support, moments){
  formulas <- createJaspHtml(title = gettext("Parameters, Support, and Moments"))
  formulas$dependOn(c("parsSupportMoments", "parametrization"))
  formulas$position <- 2

  if(all(!is.na(moments))){
    text <- gettextf(
      "<b>Parameters</b>
      %1$s

      <b>Support</b>
      %2$s

      <b>Moments</b>
      E(X) = %3$s
      Var(X) = %4$s", paste(pars, collapse = " \n "), support, moments[[1]], moments[[2]])
  } else{
    text <- gettextf(
      "<b>Parameters</b>
      %1$s

      <b>Support</b>
      %2$s

      <b>Moments</b>
      not available", paste(pars, collapse = " \n "), support)
  }
  formulas$text <- text

  return(formulas)
}

.ldIntroText <- function(jaspResults, options, introText = NULL){
  if(!options$explanatoryText) return()
  if(!is.null(jaspResults[['introText']])) return()

  intro <- createJaspHtml()
  intro$dependOn(c("explanatoryText"))
  intro$position <- 1

  if(is.function(introText)){
    intro[['text']] <- gettext(introText())
  } else if(is.character(introText)){
    intro[['text']] <- gettextf(.ldAllTextsList(distributionName=introText)$explanations$intro,
                                introText, introText, introText, introText, introText)
  }

  jaspResults[['introText']] <- intro

  return()
}

.ldAllTextsList <- function(distributionName="..."){
  list(
    explanations = list(
      pdf = gettext("The probability density function (PDF), usually denoted as f(x), is a function of a random variable X.
    The value of f(x) provides the relative likelihood that a realization of the random variable X yields a value equal to x.
    More formally, the PDF is defined as a derivative of a cumulative distribution function (CDF).

    The density plot displays the probability density function of a random variable.
    The <i>y</i>-axis displays the value of the density function for a particular value of the random variable (displayed on the <i>x</i>-axis)."),

      pmf = gettext("The probability mass function (PMF), usually denoted as f(x), is a function of a random variable X.
    The value of f(x) provides the probability that a realization of the random variable X yields a value equal to x.

    The probability mass plot displays the probability mass function of a random variable.
    The <i>y</i>-axis displays the value of the probability mass function for a particular value of the random variable (displayed on the <i>x</i>-axis)."),

      cdf = gettext("The cumulative distribution function (CDF), usually denoted as F(x), is a function of a random variable X.
    The value of F(x) provides the probability that a realization of the random variable X yields a value that is equal to or smaller than x.

    The cumulative probability plot displays the cumulative distribution function of a random variable.
    The <i>y</i>-axis displays the value of the cumulative distribution function for a particular value of the random variable (displayed on the <i>x</i>-axis)."),

      qf  = gettext("The quantile function, usually denoted as Q(p), is the inverse of the cumulative distribution function.
    The function gives the quantile such that the probability of the random variable being less than or equal to that value equals the given probability p.

    The quantile plot displays the quantile function.
    The <i>y</i>-axis displays the quantile of which the probability that the random variable is less or equal to that value is equal to p (displayed on the <i>x</i>-axis)."),

      intro = gettextf("<h3> Demonstration of the %1$s </h3>
This demonstration is divided into four parts.
The first part displays the %2$s, its probability density function, cumulative distribution function, and quantile function.
The second part allows you to generate data from the %3$s, compute descriptive statistics, and display descriptive plots.
The third part allows you to estimate the parameters of the %4$s.
The fourth part allows you to check the fit of the %5$s to the data.

<b>References</b>

Blitzstein, J. K., & Hwang, J. (2014). <i>Introduction to probability.</i> Chapman and Hall/CRC.

Leemis, L. M., & Pasupathy, R. (2019). The ties that bind. <i>Significance, 16</i>(4), 8–9.

For relationships with other distributions, visit %6$s.

%7$s", distributionName, distributionName, distributionName, distributionName, distributionName,
                       "www.math.wm.edu/~leemis/chart/UDR/UDR.html",
                       "https://en.wikipedia.org/wiki/List_of_probability_distributions")
    ),
    references   = list(
      jasp = "JASP Team (2020). JASP (Version 0.12) [Computer software].",
      goftest = "Julian Faraway, George Marsaglia, John Marsaglia and Adrian Baddeley (2017). goftest: Classical Goodness-of-Fit Tests for Univariate Distributions. R package version 1.1-1. https://CRAN.R-project.org/package=goftest",
      fitdistrplus = "Marie Laure Delignette-Muller, Christophe Dutang (2015). fitdistrplus: An R Package for Fitting Distributions. Journal of Statistical Software, 64(4), 1-34. URL: http://www.jstatsoft.org/v64/i04/.",
      car = "John Fox and Sanford Weisberg (2011). An R Companion to Applied Regression, Second Edition. Thousand Oaks CA: Sage. URL: http://socserv.socsci.mcmaster.ca/jfox/Books/Companion.",
      brown = "Braun, H. (1980) A simple method for testing goodness-of-fit in the presence of nuisance parameters. Journal of the Royal Statistical Society 42, 53–63."
    ),
    feedback = list(
      fitdistrError = gettext("Estimation failed: Optimization did not converge. <ul><li>Try adjusting parameter values, check outliers or feasibility of the distribution fitting the data.</li></ul>"),
      vcovNA = gettext("Estimation failed: Hessian matrix is not numerically computable. <ul><li>Check outliers or feasibility of the distribution fitting the data.</li></ul>")
    )
  )
}
jasp-stats/jaspDistributions documentation built on April 5, 2025, 3:46 p.m.