R/kGOFTest.R

#' kGOFTest
#'
#' Performs a hypothesis test for goodness-of-fit based on the estimated kernel densities.
#'
#' @param data A numeric vector of data.
#' @param rfunc A function to generate data from the real density (see examples).
#' @param dfunc A function to evaluate real density values (see examples).
#' @param perm Boolean indicating weather to obtain the p-value trough the permutation test or just return return the common area between densities.
#' @param B Number of permutations.
#' @param bw The bandwidth used to estimate the kernel densities.
#' @param npoints The number of points used to estimate the kernel densities.
#' @param threads Number of cores to be used for parallel computing.
#' @param param_names A vector of variable names (as character). This parameter can be ignored when threads = 1. When using more then 1 threads, it is needed to export the global parameters name on the rfunc and dfunc functions (see examples).
#'
#' @return A list containing:
#'
#' - density: The estimated density for the data.
#'
#' - dfunc: The theoric density function.
#'
#' - ca: Common area between the kernel and the theoric density.
#'
#' - pvalue: The p-value generated by the permutation test (if perm = TRUE).
#'
#' @export
#'
#' @examples
#'
#' #When using no extra parameters on rfunc and dfunc:
#'
#' data = rnorm(100)
#'
#' rfunc = function(n) {
#'   return(rnorm(n, 0, 1))
#' }
#'
#' dfunc = function(x) {
#'   return(dnorm(x, 0, 1))
#' }
#'
#' kGOFTest(data, rfunc, dfunc)
#'
#' #When using parameters on rfunc and dfunc:
#'
#' data = rnorm(100)
#'
#' param1 = 0
#'
#' param2 = 1
#'
#' var_names = c(param1, param2)
#'
#' rfunc = function(n) {
#'   return(rnorm(n, param1, param2))
#' }
#'
#' dfunc = function(x) {
#'   return(dnorm(x, param1, param2))
#' }
#'
#' kGOFTest(data, rfunc, dfunc, param_names = c('param1', 'param2'))

kGOFTest = function(data, rfunc, dfunc, perm = TRUE, B = 5000, bw = bw.nrd0(data), npoints = 512, threads = detectCores() - 1, param_names = NULL) {

  if(!is.numeric(data)) {

    stop('data must be a numeric vector of data.')

  }

  if(!is.function(rfunc)) {

    stop('rfunc must be a function to generate data.')

  }

  if(!is.function(dfunc)) {

    stop('dfunc must be a function to evaluate real density values.')

  }

  output = list()

  output$density = density(data, bw, n = npoints, na.rm = TRUE, from = min(data) - 3*bw, to = max(data) + 3*bw)

  output$ca = commonAreaReal(output$density, dfunc)

  if(perm) {

    if(threads > 1) {

      if(threads > detectCores()) {

        warning("threads inserted greater than available, parameter threads set to 1.")

        threads = 1

      }

      cl = makeCluster(threads)

      registerDoParallel(cl)

      on.exit(stopCluster(cl))

      Ti = foreach(i = 1:B, .combine = c, .export = c("commonAreaReal", "densitiesEval", param_names), .packages = "MESS") %dopar% {

        data = rfunc(length(data))

        return(commonAreaReal(density(data, bw, n = npoints, na.rm = TRUE, from = min(data) - 3*bw, to = max(data) + 3*bw), dfunc))

      }

    } else {

      Ti = vector("numeric", B)

      for(i in 1:B) {

        data = rfunc(length(data))

        Ti[i] = commonAreaReal(density(data, bw, n = npoints, na.rm = TRUE, from = min(data) - 3*bw, to = max(data) + 3*bw), dfunc)

      }

    }

    output$pvalue = (1/B)*sum(Ti < output$ca)

  }

  output$dfunc = dfunc

  class(output) = 'kGOFTest'

  return(output)
}


#' @export

print.kGOFTest = function(output) {

  cat('\n kGOFTest results: \n\n')

  cat(paste('- Common area between densities:', round(output$ca, 4)))

  if(!is.null(output$pvalue)) {

    cat(paste0('\n\n- p-value for H0 (Observed and theoric densities are equal): ', round(output$pvalue, 4),'\n\n'))

  }
}


#' @export

plot.kGOFTest = function(output) {

  xlabel = paste('Common area =', round(output$ca, 4))

  if(!is.null(output$pvalue)) {

    xlabel = paste(xlabel, '/ p-value =', round(output$pvalue, 4))

  }

  plot(output$density, main = 'Comparison of densities', xlab = xlabel, ylab = "Density")

  plot(output$dfunc, output$density$x[1], output$density$x[length(output$density$x)], add = TRUE, col = 2)

  legend('topright', c('Data', 'Null hypothesis'), col = 1:2, bty = 'n', lwd = 1)

}
vcoscrato/ktest documentation built on May 9, 2019, 10:01 p.m.