R/envelopes.r

Defines functions unscaled_envelope st_envelope qdir_envelope rank_envelope global_envelope_test fwer_envelope central_region plot.combined_global_envelope plot.global_envelope print.combined_global_envelope print.global_envelope combined_CR_or_GET_1step combined_CR_or_GET individual_global_envelope_test individual_central_region make_envelope_object env_hiname env_loname critical get_alternative IsNfuncTooSmall

Documented in central_region global_envelope_test plot.combined_global_envelope plot.global_envelope print.combined_global_envelope print.global_envelope qdir_envelope rank_envelope st_envelope unscaled_envelope

IsNfuncTooSmall <- function(Nfunc, alpha) {
  any(Nfunc*alpha < 1-.Machine$double.eps^0.5)
}

get_alternative <- function(global_envelope) {
  attr(global_envelope, "alternative")
}

# It should be:
# small_significant=TRUE for 'rank', 'erl', 'cont' and 'area' -> ordering decreasing
# small_significant=FALSE for 'qdir', 'st', 'unscaled' -> ordering increasing (decreasing=FALSE)
critical <- function(distance, alpha, Nfunc, small_significant) {
  distancesorted <- sort(distance, decreasing=small_significant)
  distancesorted[floor((1-alpha)*Nfunc)]
}

# Multiple envelopes can be plotted in certain situations.
# These functions return the names of the lower and upper bounds of the envelopes
# that exist in global_envelope objects.
# largest = FALSE: names of all envelopes
# largest = TRUE: the maximal envelope corresponding to smallest alpha
env_loname <- function(alpha, largest=FALSE) {
  if(length(alpha)>1) {
    if(largest) paste0("lo.", 100*(1-min(alpha)))
    else paste0("lo.", 100*(1-alpha))
  }
  else "lo"
}
env_hiname <- function(alpha, largest=FALSE) {
  if(length(alpha)>1) {
    if(largest) paste0("hi.", 100*(1-min(alpha)))
    else paste0("hi.", 100*(1-alpha))
  }
  else "hi"
}

make_envelope_object <- function(type, curve_set, LB, UB, T_0,
                                 picked_attr, isenvelope,
                                 Malpha, alpha, distance) {
  Nfunc <- curve_set_nfunc(curve_set)
  if(curve_set_is1obs(curve_set)) {
    df <- data.frame(curve_set_rdf(curve_set), obs=curve_set_1obs(curve_set),
                     central=T_0, lo=LB, hi=UB)
  }
  else {
    df <- data.frame(curve_set_rdf(curve_set), central=T_0, lo=LB, hi=UB)
  }
  if(isenvelope) {
    res <- spatstat.explore::fv(x=df, argu = picked_attr[['argu']],
                        ylab = picked_attr[['ylab']], valu = "central", fmla = ". ~ r",
                        alim = c(min(curve_set[['r']]), max(curve_set[['r']])),
                        labl = picked_attr[['labl']], desc = picked_attr[['desc']],
                        unitname = NULL, fname = picked_attr[['fname']], yexp = picked_attr[['yexp']])
    attr(res, "shade") <- c("lo", "hi")
    attr(res, "alternative") <- picked_attr[['alternative']]
  }
  else {
    res <- df
    attrnames <- names(picked_attr)
    for(n in attrnames) attr(res, n) <- picked_attr[[n]]
  }
  class(res) <- c("global_envelope", class(res))
  if(curve_set_is2d(curve_set)) class(res) <- c("global_envelope2d", class(res))
  attr(res, "method") <- "Global envelope"
  attr(res, "type") <- type
  attr(res, "alpha") <- alpha
  attr(res, "M") <- distance
  attr(res, "M_alpha") <- Malpha
  res
}

# Functionality for central regions based on a curve set
# @param ... Ignored.
individual_central_region <- function(curve_set, type = "erl", coverage = 0.50,
                                      alternative = c("two.sided", "less", "greater"),
                                      probs = c((1-coverage[1])/2, 1-(1-coverage[1])/2),
                                      quantile.type = 7,
                                      central = "median") {
  isenvelope <- inherits(curve_set, "envelope")
  if(!is.numeric(coverage) || any(coverage <= 0 | coverage > 1)) stop("Unreasonable value of coverage.")
  coverage <- sort(coverage, decreasing = TRUE)
  alpha <- 1 - coverage
  if(!(type %in% c("rank", "erl", "cont", "area", "qdir", "st", "unscaled")))
    stop("No such type for global envelope.")
  alternative <- match.arg(alternative)
  small_significant <- TRUE
  if(type %in% c("qdir", "st", "unscaled")) {
    small_significant <- FALSE
    if(alternative != "two.sided") {
      warning("For qdir, st and unscaled envelopes only the two.sided alternative is valid.")
      alternative <- "two.sided"
    }
  }
  check_probs(probs)
  if(!(central %in% c("mean", "median"))) {
    central <- "median"
    warning("Invalid option fiven for central. Using central = median.")
  }
  if(central == "median" && type %in% c("qdir", "st", "unscaled")) {
    central <- "mean"
    warning("Using the mean as the central function. qdir, st and unscaled envelopes are defined with the mean.")
  }
  picked_attr <- pick_attributes(curve_set, alternative=alternative) # saving for attributes / plotting purposes
  if(isenvelope & length(alpha)>1) {
    # Note: no fv object for multiple coverages
    isenvelope <- FALSE
  }
  curve_set <- as.curve_set(curve_set, allfinite=TRUE)

  # Measures for functional ordering
  measure <- type
  scaling <- ""
  switch(type,
         qdir = {
           measure <- "max"
           scaling <- "qdir"
         },
         st = {
           measure <- "max"
           scaling <- "st"
         },
         unscaled = {
           measure <- "max"
           scaling <- "none"
         })
  distance <- forder(curve_set, measure=measure, scaling=scaling,
                     alternative=alternative, probs=probs, quantile.type=quantile.type)

  all_curves <- data_and_sim_curves(curve_set) # all the functions
  Nfunc <- length(distance) # Number of functions
  nr <- curve_set_narg(curve_set)
  # Define the central curve T_0
  T_0 <- get_T_0(curve_set, central=central)

  # Check reasonability of Nfunc vs alpha
  if(IsNfuncTooSmall(Nfunc, alpha))
    stop("Number of functions s is only ", Nfunc, ", but smallest alpha is ", min(alpha),
         ". So, s*alpha is ", Nfunc*min(alpha), ".", sep="")

  # The critical value
  Malpha <- critical(distance, alpha, Nfunc, small_significant)

  #-- 100(1-alpha)% global envelope
  LBounds <- UBounds <- vector(mode="list", length=length(alpha))
  switch(type,
         rank = {
           for(i in 1:nr) {
             Hod <- sort(all_curves[,i])
             for(ai in seq_along(alpha)) {
               LBounds[[ai]][i]<- Hod[Malpha[ai]]
               UBounds[[ai]][i]<- Hod[Nfunc-Malpha[ai]+1]
             }
           }
         },
         erl =,
         cont =,
         area = {
           for(ai in seq_along(alpha)) {
             j <- distance >= Malpha[ai]
             for(i in 1:nr){
               lu <- range(all_curves[j,i])
               LBounds[[ai]][i]<- lu[1]
               UBounds[[ai]][i]<- lu[2]
             }
           }
         },
         qdir = { # Note: All coverage levels use same probs
           curve_set_res <- residual(curve_set, use_theo=TRUE)
           quant_m <- curve_set_quant(curve_set_res, probs=probs, type=quantile.type)
           for(ai in seq_along(alpha)) {
             LBounds[[ai]] <- T_0 - Malpha[ai]*abs(quant_m[1,])
             UBounds[[ai]] <- T_0 + Malpha[ai]*abs(quant_m[2,])
           }
         },
         st = {
           sdX <- curve_set_sd(curve_set)
           for(ai in seq_along(alpha)) {
             LBounds[[ai]] <- T_0 - Malpha[ai]*sdX
             UBounds[[ai]] <- T_0 + Malpha[ai]*sdX
           }
         },
         unscaled = {
           for(ai in seq_along(alpha)) {
             LBounds[[ai]] <- T_0 - Malpha[ai]
             UBounds[[ai]] <- T_0 + Malpha[ai]
           }
         })

  switch(alternative,
         "two.sided" = {},
         "less" = { for(ai in seq_along(alpha)) UBounds[[ai]] <- Inf },
         "greater" = { for(ai in seq_along(alpha)) LBounds[[ai]] <- -Inf })

  if(length(alpha) > 1) { # Multiple envelopes
    names(LBounds) <- names(UBounds) <- paste0(100*coverage)
    LB <- do.call(cbind, LBounds)
    UB <- do.call(cbind, UBounds)
  }
  else {
    LB <- LBounds[[1]]
    UB <- UBounds[[1]]
  }
  res <- make_envelope_object(type, curve_set, LB, UB, T_0,
                              picked_attr, isenvelope,
                              Malpha, alpha, distance)
  attr(res, "call") <- match.call()
  res
}

# Functionality for global envelope tests based on a curve set (individual central region + p-values)
individual_global_envelope_test <- function(curve_set, type = "erl", alpha = 0.05,
                                            alternative = c("two.sided", "less", "greater"),
                                            ties = "erl",
                                            probs = c(0.025, 0.975), quantile.type = 7,
                                            central = "mean") {
  alternative <- match.arg(alternative)
  tmp <- as.curve_set(curve_set, allfinite=TRUE, verbose=FALSE)
  if(!curve_set_is1obs(tmp))
    stop("The curve_set does not contain one observed function. Testing does not make sense.\n Did you want to construct a central region of your data? See the function central_region.")
  if(!is.numeric(alpha) || any(alpha < 0 | alpha >= 1)) stop("Unreasonable value of alpha.")
  res <- individual_central_region(curve_set, type=type, coverage=1-alpha,
                                   alternative=alternative,
                                   probs=probs, quantile.type=quantile.type,
                                   central=central)
  # The type of the p-value
  possible_ties <- c('midrank', 'random', 'conservative', 'liberal', 'erl')
  if(!(ties %in% possible_ties)) stop("Unreasonable ties argument!")

  # Measures for functional ordering
  distance <- attr(res, "M")

  #-- Calculate the p-values
  switch(type,
         rank = {
           u <- -distance
           #-- p-interval
           p_low <- estimate_p_value(x=u[1], sim_vec=u[-1], ties='liberal')
           p_upp <- estimate_p_value(x=u[1], sim_vec=u[-1], ties='conservative')
           #-- unique p-value
           if(ties == "erl") {
             distance_lexo <- forder(curve_set, measure="erl", alternative=alternative)
             u_lexo <- -distance_lexo
             p <- estimate_p_value(x=u_lexo[1], sim_vec=u_lexo[-1], ties="conservative")
           }
           else p <- estimate_p_value(x=u[1], sim_vec=u[-1], ties=ties)
         },
         erl = {
           u_lexo <- -distance
           p <- estimate_p_value(x=u_lexo[1], sim_vec=u_lexo[-1], ties="conservative")
         },
         cont = {
           u_cont <- -distance
           p <- estimate_p_value(x=u_cont[1], sim_vec=u_cont[-1], ties="conservative")
         },
         area = {
           u_area <- -distance
           p <- estimate_p_value(x=u_area[1], sim_vec=u_area[-1], ties="conservative")
         },
         qdir = {
           p <- estimate_p_value(x=distance[1], sim_vec=distance[-1])
         },
         st = {
           p <- estimate_p_value(x=distance[1], sim_vec=distance[-1])
         },
         unscaled = {
           p <- estimate_p_value(x=distance[1], sim_vec=distance[-1])
         })

  # Change the "method" attribute
  attr(res, "method") <- paste(attr(res, "method"), " test", sep="")
  # Add attributes related to p-values
  attr(res, "p") <- p
  if(type == "rank") {
    attr(res, "p_interval") <- c(p_low, p_upp)
    attr(res, "ties") <- ties
  }
  # Update "call" attribute
  attr(res, "call") <- match.call()
  res
}

# Functionality for combined_central_region and combined_global_envelope_test (two-step procedure)
combined_CR_or_GET <- function(curve_sets, CR_or_GET = c("CR", "GET"), coverage, ...) {
  ntests <- length(curve_sets)
  if(ntests < 1) stop("Only one curve_set, no combining to be done.")
  check_curve_set_dimensions(curve_sets) # Do not catch the curve_set here
  CR_or_GET <- match.arg(CR_or_GET)

  # 1) First stage: Calculate the functional orderings individually for each curve_set
  res_ls <- lapply(curve_sets, FUN = function(x) { individual_central_region(x, ...) })
  type <- attr(res_ls[[1]], "type")

  # 2) Second stage: ERL central region/test
  # Create a curve_set for the ERL test
  k_ls <- lapply(res_ls, FUN = function(x) attr(x, "M"))
  k_mat <- do.call(rbind, k_ls, quote=FALSE)
  Nfunc <- ncol(k_mat)
  # Construct the one-sided ERL central region
  if(type %in% c("qdir", "st", "unscaled")) alt2 <- "greater"
  else alt2 <- "less"
  switch(CR_or_GET,
         CR = {
           curve_set_u <- create_curve_set(list(r=1:ntests, obs=k_mat))
           res_erl <- individual_central_region(curve_set_u, type="erl", coverage=coverage, alternative=alt2)
         },
         GET = {
           curve_set_u <- create_curve_set(list(r=1:ntests, obs=k_mat[,1], sim_m=k_mat[,-1]))
           res_erl <- individual_global_envelope_test(curve_set_u, type="erl", alpha=1-coverage, alternative=alt2)
         }
  )
  res_erl <- envelope_set_labs(res_erl, xlab="Function", ylab="ERL measure")
  attr(res_erl, "labels") <- names(curve_sets)
  coverage <- 1 - attr(res_erl, "alpha") # ordered

  # 3) The 100(1-alpha)% global combined ERL envelope
  distance_lexo_sorted <- sort(attr(res_erl, "M"), decreasing=TRUE)
  Malpha <- distance_lexo_sorted[floor(coverage*Nfunc)]

  LBounds <- UBounds <- vector("list", length(coverage))
  for(ai in seq_along(coverage)) {
    # Indices of the curves from which to calculate the convex hull
    curves_for_envelope_ind <- which(attr(res_erl, "M") >= Malpha[ai])
    # Curves
    curve_sets <- lapply(curve_sets, FUN=as.curve_set)
    all_curves_l <- lapply(curve_sets, function(x) { data_and_sim_curves(x) })
    # Curves from which to calculate the convex hull
    curves_for_envelope_l <- lapply(all_curves_l, function(x) { x[curves_for_envelope_ind,] })
    # Bounding curves
    LBounds[[ai]] <- lapply(curves_for_envelope_l, FUN = function(x) { apply(x, MARGIN=2, FUN=min) })
    UBounds[[ai]] <- lapply(curves_for_envelope_l, FUN = function(x) { apply(x, MARGIN=2, FUN=max) })
  }
  # Update the bounding curves (lo, hi) and Malpha to the first level central regions
  if(length(coverage) == 1) { # Use names 'lo' and 'hi'
    for(i in 1:ntests) {
      if(get_alternative(res_ls[[i]]) != "greater") res_ls[[i]]$lo <- LBounds[[1]][[i]]
      if(get_alternative(res_ls[[i]]) != "less") res_ls[[i]]$hi <- UBounds[[1]][[i]]
      attr(res_ls[[i]], "alpha") <- attr(res_ls[[i]], "M_alpha") <- NULL
      attr(res_ls[[i]], "method") <- paste0("1/", ntests, "th of a combined global envelope test")
    }
  }
  else { # Names according to the coverages, i.e. lo.xx, hi.xx where xx represent the levels
    for(i in 1:ntests) {
      if(get_alternative(res_ls[[i]]) != "greater")
        for(ai in seq_along(coverage))
          res_ls[[i]][paste0("lo.", 100*coverage[ai])] <- LBounds[[ai]][[i]]
      if(get_alternative(res_ls[[i]]) != "less")
        for(ai in seq_along(coverage))
          res_ls[[i]][paste0("hi.", 100*coverage[ai])] <- UBounds[[ai]][[i]]
      res_ls[[i]]$lo <- res_ls[[i]]$hi <- NULL
      attr(res_ls[[i]], "alpha") <- attr(res_ls[[i]], "M_alpha") <- NULL
      attr(res_ls[[i]], "method") <- paste0("1/", ntests, "th of a combined global envelope test")
    }
  }

  if(!is.null(names(curve_sets))) names(res_ls) <- names(curve_sets)

  # Return
  attr(res_ls, "level2_ge") <- res_erl
  attr(res_ls, "level2_curve_set") <- curve_set_u
  switch(CR_or_GET,
         CR = {
           attr(res_ls, "method") <- "Combined global envelope"
         },
         GET = {
           attr(res_ls, "method") <- "Combined global test"
         })
  if(!is.null(attr(res_ls[[1]], "argu")))
    res_ls <- envelope_set_labs(res_ls, xlab=attr(res_ls[[1]], "xlab"),
                                ylab=substitute(italic(T(i)), list(i=attr(res_ls[[1]], "argu"))))
  else
    res_ls <- envelope_set_labs(res_ls, xlab=expression(italic(r)),
                                ylab=expression(italic(T(r))))
  attr(res_ls, "alternative") <- get_alternative(res_ls[[1]])
  attr(res_ls, "type") <- type
  attr(res_ls, "alpha") <- 1-coverage
  attr(res_ls, "M") <- attr(res_erl, "M")
  attr(res_ls, "M_alpha") <- attr(res_erl, "M_alpha")
  attr(res_ls, "p") <- attr(res_erl, "p")
  attr(res_ls, "nstep") <- 2
  class(res_ls) <- c("combined_global_envelope", class(res_ls))
  if(curve_set_is2d(curve_sets[[1]]))
    class(res_ls) <- c("combined_global_envelope2d", class(res_ls))
  res_ls
}

# Functionality for combined_central_region and combined_global_envelope_test (one-step procedure)
combined_CR_or_GET_1step <- function(curve_sets, CR_or_GET = c("CR", "GET"), coverage, ...) {
  curve_set <- combine_curve_sets(curve_sets, equalr=TRUE)
  switch(CR_or_GET,
         CR = {
           res <- individual_central_region(curve_set, coverage=coverage, ...)
         },
         GET = {
           res <- individual_global_envelope_test(curve_set, alpha=1-coverage, ...)
         })
  # Transform the envelope to a combined envelope
  nfuns <- length(curve_sets)
  nr <- curve_set_narg(curve_sets[[1]]) # all curve sets have the same
  # Split the envelopes to the original groups
  res_ls <- split(res, f = rep(1:nfuns, each=nr))

  # Set unreasonable attributes of individuals sets of curves to NULL
  for(i in 1:nfuns)
    attr(res_ls[[i]], "method") <- paste0("1/", nfuns, "th of a combined global envelope test")
  anames <- c("p", "p_interval", "ties", "M", "M_alpha", "alpha")
  anames <- anames[anames %in% names(attributes(res_ls[[1]]))]
  for(name in anames) {
    for(i in 1:nfuns) attr(res_ls[[i]], name) <- NULL
  }

  mostattributes(res_ls) <- attributes(res)
  attr(res_ls, "row.names") <- NULL
  if(!is.null(names(curve_sets))) names(res_ls) <- names(curve_sets)
  switch(CR_or_GET,
         CR = {
           attr(res_ls, "method") <- "Combined global envelope"
         },
         GET = {
           attr(res_ls, "method") <- "Combined global test"
         })
  attr(res_ls, "nstep") <- 1
  class(res_ls) <- c("combined_global_envelope", "list")
  if(curve_set_is2d(curve_sets[[1]]))
    class(res_ls) <- c("combined_global_envelope2d", class(res_ls))
  res_ls
}

#' Print method for the class 'global_envelope'
#'
#' @param x A 'global_envelope' object.
#' @param ... Ignored.
#' @export
print.global_envelope <- function(x, ...) {
  printhelper_ge_base(x)
}

#' Print method for the class 'combined_global_envelope'
#'
#' @param x A 'combined_global_envelope' object
#' @param ... Ignored.
#' @export
print.combined_global_envelope <- function(x, ...) {
  printhelper_ge_combined(x)
}

#' Plot method for the class 'global_envelope'
#'
#' @details If several envelopes have been computed, their are plotted in different
#' grey scales so that the smallest envelope has the darkest color and the widest
#' envelope consist of all grey scales with the lightest color in the outskirts.
#' @param x An 'global_envelope' object
#' @param dotplot Logical. If TRUE, then instead of envelopes a dot plot is done.
#' Suitable for low dimensional test vectors.
#' Default: TRUE if the dimension is less than 10, FALSE otherwise.
#' @param sign.col The color for the observed curve when outside the global envelope
#' (significant regions). Default to "red". Setting the color to \code{NULL} corresponds
#' to no coloring. If the object contains several envelopes, the coloring is done for
#' the widest one.
#' @param labels A character vector of suitable length.
#' If \code{dotplot = TRUE}, then labels for the tests at x-axis.
#' @param digits The number of digits used for printing the p-value or p-interval
#' in the default main.
#' @param ... Ignored.
#'
#' @export
#' @seealso \code{\link{central_region}}, \code{\link{global_envelope_test}}
#' @examples
#' if(require("spatstat.explore", quietly=TRUE)) {
#'   X <- unmark(spruces)
#'   \donttest{nsim <- 1999 # Number of simulations}
#'   \dontshow{nsim <- 19 # Number of simulations}
#'   env <- envelope(X, fun="Kest", nsim=nsim,
#'                   savefuns=TRUE, # save the functions
#'                   correction="translate", # edge correction for K
#'                   simulate=expression(runifpoint(ex=X))) # Simulate CSR
#'   res <- global_envelope_test(env, type="erl")
#'
#'   # Default plot
#'   plot(res)
#'   # Plots can be edited, e.g.
#'   # Remove legend
#'   plot(res) + ggplot2::theme(legend.position="none")
#'   # Change its position
#'   plot(res) + ggplot2::theme(legend.position="right")
#'   # Change the outside color
#'   plot(res, sign.col="#5DC863FF")
#'   plot(res, sign.col=NULL)
#'   # Change default title and x- and y-labels
#'   plot(res) + ggplot2::labs(title="95% global envelope", x="x", y="f(x)")
#'
#'   # Prior to the plot, you can set your preferred ggplot theme by theme_set
#'   old <- ggplot2::theme_set(ggplot2::theme_bw())
#'   plot(res)
#'
#'   # Do other edits, e.g. turn off expansion with the default limits
#'   plot(res) + ggplot2::coord_cartesian(expand=FALSE)
#'
#'   # Go back to the old theme
#'   ggplot2::theme_set(old)
#'
#'   # If you are working with the R package spatstat and its envelope-function,
#'   # you can obtain global envelope plots in the style of spatstat using plot.fv:
#'   plot.fv(res)
#' }
#' @importFrom ggplot2 labs
plot.global_envelope <- function(x, dotplot = length(x$r)<10, sign.col = "red",
                                 labels = NULL, digits = 3, ...) {
  if(!is.null(x[['r']]) && !all(x[['r']][-1] - x[['r']][-length(x[['r']])] > 0))
    warning("r values non-increasing. Plot not valid.")
  if(missing(labels)) labels <- default_labels(x, labels)
  main <- env_main_default(x, digits=digits)
  d <- plotdefaultlabs(x)
  if(dotplot) {
    env_dotplot_ggplot(x, labels=labels, sign.col=sign.col) +
      labs(title=main, x=d$xlab, y=d$ylab)
  } else {
    env_ggplot(x, main=main, xlab=d$xlab, ylab=d$ylab, sign.col=sign.col)
  }
}

#' Plot method for the class 'combined_global_envelope'
#'
#' Plotting method for the class 'combined_global_envelope', i.e. combined envelopes for
#' 1d functions.
#'
#' @description This function provides plots for combined global envelopes.
#' @param x An 'combined_global_envelope' object
#' @inheritParams plot.global_envelope
#' @param labels A character vector of suitable length.
#' If \code{dotplot = TRUE} (for the level 2 test), then labels for the tests at x-axis;
#' only valid/used when all components of \code{x} have the same dimension.
#' Otherwise labels for the separate plots.
#' @param scales See \code{\link[ggplot2]{facet_wrap}}.
#' Use \code{scales = "free"} when the scales of the functions in the global envelope
#' vary. \code{scales = "fixed"} is a good choice, when you want the same y-axis for all components.
#' A sensible default based on r-values exists.
#' @param dotplot Logical. If TRUE, then instead of envelopes a dot plot is done.
#' Suitable for low dimensional test vectors.
#' @param ncol The maximum number of columns for the figures.
#' Default 2 or 3, if the length of x equals 3.
#' (Relates to the number of curve_sets that have been combined.)
#' @param level 1 or 2. In the case of two-step combined tests (with several test functions),
#' two different plots are available:
#' 1 for plotting the combined global envelopes (default and most often wanted) or
#' 2 for plotting the second level test result.
#' @param ... Ignored in most cases. If \code{dotplot = TRUE}, then parameters
#' can be passed to \code{\link{arrow}}, e.g. length = unit(0.25, "cm").
#' @export
#' @seealso \code{\link{central_region}}
plot.combined_global_envelope <- function(x, labels, scales, sign.col = "red",
                                          dotplot = length(x[[1]]$obs) < 5,
                                          ncol = 2 + 1*(length(x)==3),
                                          digits = 3, level = 1, ...) {
  if(!(level %in% c(1,2))) stop("Unreasonable value for level.")

  if(missing(scales)) {
    if(all(sapply(x, FUN=function(y) { all(range(y[['r']]) == range(x[[1]][['r']])) })))
      scales <- "fixed"
    else
      scales <- "free"
  }

  alt <- get_alternative(x[[1]])
  main <- env_main_default(x, digits=digits, alternative=alt)
  d <- plotdefaultlabs(x)

  if(level == 1) {
    if(dotplot) {
      env_combined_dotplot(x, main=main, xlab=d$xlab, ylab=d$ylab,
                           labels=labels, scales=scales,
                           max_ncols_of_plots=ncol, sign.col=sign.col, ...)
    }
    else {
      if(missing(labels)) labels <- default_labels(x, labels)
      env_combined_ggplot(x, main=main, xlab=d$xlab, ylab=d$ylab,
                          labels=labels, scales=scales,
                          max_ncols_of_plots=ncol, sign.col=sign.col)
    }
  }
  else {
    if(attr(x, "nstep") != 2)
      stop("level = 2 plot not available for one-step combined global envelopes.")
    if(missing(labels)) labels <- default_labels(attr(x, "level2_ge"), labels)
    env_dotplot_ggplot(attr(x, "level2_ge"), labels=labels)
  }
}

#' Central region / Global envelope
#'
#' Provides central regions or global envelopes or confidence bands
#'
#'
#' Given a \code{\link{curve_set}} object,
#' or an \code{envelope} object of \pkg{spatstat} or \code{fdata} object of \pkg{fda.usc},
#' the function \code{central_region} constructs a central region, i.e. a global envelope,
#' from the given set of functions (or vectors).
#'
#' Generally an envelope is a band bounded by the vectors (or functions)
#' \eqn{T_{low}}{T_lo} and \eqn{T_{hi}}{T_hi}.
#' A \eqn{100(1-\alpha)}{100(1-alpha)}\% or 100*coverage\% global envelope is a set
#' \eqn{(T_{low}, T_{hi})}{(T_lo, T_hi)} of envelope vectors
#' such that the probability that \eqn{T_i}{T_i} falls outside this envelope
#' in any of the d points of the vector \eqn{T_i}{T_i} is less or equal to \eqn{\alpha}{alpha}.
#' The global envelopes can be constructed based on different measures
#' that order the functions from the most extreme one to the least extreme one.
#' We use such orderings of the functions for which we are able to construct global envelopes
#' with intrinsic graphical interpretation.
#'
#' The type of the global envelope can be chosen with the argument \code{type} and
#' the options are given in the following.
#' Further information about the measures, on which the global envelopes are based,
#' can be found in Myllymäki and Mrkvička (2020, Section 2.).
#' \itemize{
#'  \item \code{'rank'}: The global rank envelope
#' proposed by Myllymäki et al. (2017) based on the extreme rank defined as the minimum of pointwise
#' ranks.
#'  \item \code{'erl'}: The global rank envelope based on the extreme rank
#'  length (Myllymäki et al.,2017, Mrkvička et al., 2018).
#' This envelope is constructed as the convex hull of the functions which have extreme rank
#' length measure that is larger or equal to the critical \eqn{\alpha}{alpha} level of the
#' extreme rank length measure.
#'  \item \code{'cont'}: The global rank envelope based on the continuous rank
#'  (Hahn, 2015; Mrkvička et al., 2019) based on minimum of continuous pointwise ranks.
#'  It is contructed as the convex hull in a similar way as the \code{'erl'} envelope.
#'  \item \code{'area'}: The global rank envelope based on the area rank (Mrkvička et al., 2019)
#'  which is based on area between continuous pointwise ranks and minimum pointwise ranks
#'  for those argument (r) values for which pointwise ranks achieve the minimum
#'  (it is a combination of erl and cont).
#'  It is contructed as the convex hull in a similar way as the \code{'erl'} and \code{'area'} envelopes.
#'  \item \code{'qdir'}: The directional quantile envelope based on
#'  the directional quantile maximum absolute deviation (MAD) test (Myllymäki et al., 2017, 2015),
#' which takes into account the unequal variances of the test function T(r) for
#' different distances r and is also protected against asymmetry of distribution of T(r).
#'  \item \code{'st'}: The studentised envelope based on the studentised MAD
#'  measure (Myllymäki et al., 2017, 2015),
#'  which takes into account the unequal variances of the test function T(r) for different distances r.
#'  \item \code{'unscaled'}: The unscaled envelope (Ripley, 1981),
#' which leads to envelopes with constant width. It corresponds to the classical
#' maximum deviation test without scaling. This test suffers from unequal variance
#' of T(r) over the distances r and from the asymmetry of distribution of T(r).
#' We recommend to use the other alternatives instead. This unscaled global envelope is
#' provided for reference.
#' }
#'
#' The values of the chosen measure M are determined for each curve in the \code{curve_set}, and
#' based on the chosen measure, the central region, i.e. the global envelope, is constructed
#' for the given curves.
#'
#' If a list of (suitable) objects are provided in the argument \code{curve_sets},
#' then by default (\code{nstep = 2}) the two-step combining procedure is used to
#' construct the combined global envelope as described in Myllymäki and Mrkvička (2020, Section 2.2.).
#' If \code{nstep = 1} and the lengths of the multivariate vectors in each component
#' of the list are equal, then the one-step combining procedure is used where the
#' functions are concatenated together into a one long vector (see again Myllymäki and Mrkvička, 2020, Section 2.2.).
#'
#' @references
#' Mrkvička, T., Myllymäki, M., Jilek, M. and Hahn, U. (2020) A one-way ANOVA test for functional data with graphical interpretation. Kybernetika 56(3), 432-458. doi: 10.14736/kyb-2020-3-0432
#'
#' Mrkvička, T., Myllymäki, M., Kuronen, M. and Narisetty, N. N. (2022) New methods for multiple testing in permutation inference for the general linear model. Statistics in Medicine 41(2), 276-297. doi: 10.1002/sim.9236
#'
#' Myllymäki, M., Grabarnik, P., Seijo, H. and Stoyan. D. (2015). Deviation test construction and power comparison for marked spatial point patterns. Spatial Statistics 11, 19-34. doi: 10.1016/j.spasta.2014.11.004
#'
#' Myllymäki, M., Mrkvička, T., Grabarnik, P., Seijo, H. and Hahn, U. (2017). Global envelope tests for spatial point patterns. Journal of the Royal Statistical Society: Series B (Statistical Methodology) 79, 381-404. doi: 10.1111/rssb.12172
#'
#' Myllymäki, M. and Mrkvička, T. (2020). GET: Global envelopes in R. arXiv:1911.06583 [stat.ME]
#'
#' Ripley, B.D. (1981). Spatial statistics. Wiley, New Jersey.
#'
#' @inheritParams forder
#' @param type The type of the global envelope with current options for 'rank', 'erl', 'cont', 'area',
#' 'qdir', 'st' and 'unscaled'. See details.
#' @param coverage A number between 0 and 1. The 100*coverage\% central region will be calculated.
#' A vector of values can also be provided, leading to the corresponding number of central regions.
#' @param central Either "mean" or "median". If the curve sets do not contain the component
#' \code{theo} for the theoretical central function, then the central function (used for plotting only)
#' is calculated either as the mean or median of functions provided in the curve sets.
#' For 'qdir', 'st' and 'unscaled' only the mean is allowed as an option, due to their definition.
#' @param nstep 1 or 2 for how to contruct a combined global envelope if list of curve sets
#' is provided. 2 (default) for a two-step combining procedure, 1 for one-step.
#' @param ... Ignored.
#' @return Either an object of class \code{global_envelope} and or an \code{combined_global_envelope} object.
#' The former class is obtained when a set of curves is provided, while the latter in the case
#' that \code{curve_sets} is a list of objects. The print and plot function are defined for the
#' returned objects (see examples).
#'
#' The \code{global_envelope} object is essentially a data frame containing columns
#' \itemize{
#' \item r = the vector of values of the argument r at which the test was made
#' \item lo = the lower envelope based on the simulated functions;
#' in case of a vector of coverage values, several 'lo' exist with names paste0("lo.", 100*coverage)
#' \item hi = the upper envelope based on the simulated functions;
#' in case of a vector of coverage values, several 'lo' exist with names paste0("hi.", 100*coverage)
#' \item central = If the \code{curve_set} (or \code{envelope} object) contains a theoretical curve,
#'       then this function is used as the central curve and returned in this component.
#'       Otherwise, the central curve is the mean or median (according to the argument \code{central})
#'       of the test functions T_i(r), i=2, ..., s+1. Used for visualization only.
#' }
#' and potentially additionally
#' \itemize{
#' \item obs = the data function, if there is only one data function in the given \code{curve_sets}.
#' Otherwise not existing.
#' }
#' (Most often \code{central_region} is directly applied to functional data where all curves are observed.)
#' Additionally, the returned object has some attributes, where
#' \itemize{
#'   \item M = A vector of the values of the chosen measure for all the function.
#'   If there is only one observed function, then M[1] gives the value of the measure for this.
#'   \item M_alpha = The critical value of M corresponding to the 100(1-alpha)\% global envelope
#'   (see Myllymäki and Mrkvička, 2020, Definition 1.1. IGI).
#' }
#' Further the object has some attributes for printing and plotting purposes, where
#' \code{alternative}, \code{type}, \code{ties}, \code{alpha} correspond to those in the function call
#' and \code{method} gives a name for the method.
#' Attributes of an object \code{res} can be obtained using the function
#' \code{\link[base]{attr}}, e.g. \code{attr(res, "M")} for the values of the ordering measure.
#'
#' If the given set of curves had the class \code{envelope} of \pkg{spatstat}, then the returned
#' \code{global_envelope} object has also the class \code{fv} of spatstat, whereby one can utilize
#' also the plotting functions of \pkg{spatstat}, see example in \code{\link{plot.global_envelope}}.
#' However, the \code{envelope} objects are most often used with \code{\link{global_envelope_test}}
#' and not with \code{central_region}.
#' For an \code{fv} object, also some further attributes exists as required by \code{fv} of \pkg{spatstat}.
#'
#' The \code{combined_global_envelope} is a list of \code{global_envelope} objects, where
#' the components correspond to the components of \code{curve_sets}.
#' The \code{combined_global_envelope} object constructed with \code{nstep = 2} contains,
#' in addition to some conventional ones (\code{method}, \code{alternative}, \code{type}, \code{alpha},
#' \code{M}, \code{M_alpha}, see above), the second level envelope information as the attributes
#' \itemize{
#' \item level2_ge = The second level envelope on which the envelope construction is based
#' \item level2_curve_set = The second level \code{curve_set} from which \code{level2_ge} is constructed
#' }
#'
#' In the case that the given curve sets are two-dimensional, i.e., their arguments values are two-dimensional,
#' then the returned objects have in addition to the class \code{global_envelope} or \code{combined_global_envelope},
#' the class \code{global_envelope2d} or \code{combined_global_envelope2d}, respectively. This class is assigned
#' for plotting purposes: For the 2d envelopes, also the default plots are 2d.
#' Otherwise the 1d and 2d objects are similar.
#' @export
#' @seealso \code{\link{forder}}, \code{\link{global_envelope_test}}
#' @aliases global_envelope
#' @examples
#' ## A central region of a set of functions
#' #----------------------------------------
#' if(requireNamespace("fda", quietly=TRUE)) {
#'   cset <- curve_set(r=as.numeric(row.names(fda::growth$hgtf)),
#'                     obs=fda::growth$hgtf)
#'   plot(cset) + ggplot2::ylab("height")
#'   cr <- central_region(cset, coverage=0.50, type="erl")
#'   plot(cr)
#' }
#'
#' ## Confidence bands for linear or polynomial regression
#' #------------------------------------------------------
#' # Simulate regression data according to the cubic model
#' # f(x) = 0.8x - 1.8x^2 + 1.05x^3 for x in [0,1]
#' par <- c(0,0.8,-1.8,1.05) # Parameters of the true polynomial model
#' res <- 100 # Resolution
#' x <- seq(0, 1, by=1/res); x2=x^2; x3=x^3;
#' f <- par[1] + par[2]*x + par[3]*x^2 + par[4]*x^3 # The true function
#' d <- f + rnorm(length(x), 0, 0.04) # Data
#' # Plot the true function and data
#' plot(f, type="l", ylim=range(d))
#' points(d)
#'
#' # Estimate polynomial regression model
#' reg <- lm(d ~ x + x2 + x3)
#' ftheta <- reg$fitted.values
#' resid0 <- reg$residuals
#' s0 <- sd(resid0)
#'
#' # Bootstrap regression
#' \donttest{B <- 2000 # Number of bootstrap samples}
#' \dontshow{B <- 20 # Number of bootstrap samples}
#' ftheta1 <- array(0, c(B,length(x)))
#' s1 <- array(0,B)
#' for(i in 1:B) {
#'   u <- sample(resid0, size=length(resid0), replace=TRUE)
#'   reg1 <- lm((ftheta+u) ~ x + x2 + x3)
#'   ftheta1[i,] <- reg1$fitted.values
#'   s1[i] <- sd(reg1$residuals)
#' }
#'
#' # Centering and scaling
#' meanftheta <- apply(ftheta1, 2, mean)
#' m <- array(0, c(B,length(x)))
#' for(i in 1:B) { m[i,] <- (ftheta1[i,]-meanftheta)/s1[i] }
#'
#' # Central region computation
#' boot.cset <- curve_set(r=1:length(x), obs=ftheta+s0*t(m))
#' cr <- central_region(boot.cset, coverage=c(0.50, 0.80, 0.95), type="erl")
#'
#' # Plotting the result
#' plot(cr) + ggplot2::labs(x=expression(italic(x)), y=expression(italic(f(x)))) +
#'   ggplot2::geom_point(data=data.frame(id=1:length(d), points=d),
#'                       ggplot2::aes(x=id, y=points)) + # data points
#'   ggplot2::geom_line(data=data.frame(id=1:length(d), points=f),
#'                      ggplot2::aes(x=id, y=points)) # true function
central_region <- function(curve_sets, type = "erl", coverage = 0.50,
                           alternative = c("two.sided", "less", "greater"),
                           probs = c(0.25, 0.75),
                           quantile.type = 7,
                           central = "median", nstep = 2, ...) {
  if(!is_a_single_curveset(curve_sets)) {
    if(length(curve_sets) > 1) { # Combined test
      if(!(nstep %in% c(1,2))) stop("Invalid number of steps (nstep) for combining. Should be 1 or 2.")
      if(nstep == 2) # Two-step combining procedure
        return(combined_CR_or_GET(curve_sets, CR_or_GET="CR", type=type, coverage=coverage,
                                  alternative=alternative,
                                  probs=probs, quantile.type=quantile.type,
                                  central=central, ...))
      else # One-step combining procedure
        return(combined_CR_or_GET_1step(curve_sets, CR_or_GET="CR", type=type, coverage=coverage,
                                        alternative=alternative,
                                        probs=probs, quantile.type=quantile.type,
                                        central=central, ...))
    }
    else if(length(curve_sets) == 1)
      curve_sets <- curve_sets[[1]]
    else
      stop("The given list of curve_sets is empty.")
  }
  # Individual test
  return(individual_central_region(curve_sets, type=type, coverage=coverage,
                                   alternative=alternative,
                                   probs=probs, quantile.type=quantile.type,
                                   central=central, ...))
}

# Internal function for global envelope test with FWER control
fwer_envelope <- function(curve_sets, type = "erl", alpha = 0.05,
                          alternative = c("two.sided", "less", "greater"),
                          ties = "erl", probs = c(0.025, 0.975), quantile.type = 7,
                          central = "mean", nstep = 2, ...) {
  if(!is_a_single_curveset(curve_sets)) {
    if(length(curve_sets) > 1) { # Combined test
      if(!(nstep %in% c(1,2))) stop("Invalid number of steps (nstep) for combining. Should be 1 or 2.")
      if(nstep == 2) # Two-step combining procedure
        return(combined_CR_or_GET(curve_sets, CR_or_GET="GET", type=type, coverage=1-alpha,
                                  alternative=alternative,
                                  probs=probs, quantile.type=quantile.type,
                                  central=central, ...))
      else # One-step combining procedure
        return(combined_CR_or_GET_1step(curve_sets, CR_or_GET="GET", type=type, coverage=1-alpha,
                                        alternative=alternative, ties=ties,
                                        probs=probs, quantile.type=quantile.type,
                                        central=central, ...))
    }
    else if(length(curve_sets) == 1)
      curve_sets <- curve_sets[[1]]
    else
      stop("The given list of curve_sets is empty.")
  }
  return(individual_global_envelope_test(curve_sets, type=type, alpha=alpha,
                                         alternative=alternative, ties=ties,
                                         probs=probs, quantile.type=quantile.type,
                                         central=central, ...))
}

#' Global envelope test
#'
#' Global envelope test, global envelopes and p-values
#'
#'
#' Given a \code{\link{curve_set}} object,
#' or an \code{envelope} object of \pkg{spatstat},
#' which contains both the data curve (or function or vector) \eqn{T_1(r)}{T_1(r)}
#' (in the component \code{obs}) and
#' the simulated curves \eqn{T_2(r),\dots,T_{s+1}(r)}{T_2(r),...,T_(s+1)(r)}
#' (in the component \code{sim_m}),
#' the function \code{global_envelope_test} performs a global envelope test,
#' either under the control of family-wise error rate (FWER) or false discovery rate (FDR),
#' as specified by the argument \code{typeone}.
#' The function \code{global_envelope_test} is the main function for global envelope tests
#' (for simple hypotheses).
#'
#' The case \code{typeone = "fdr"} corresponds to the FDR envelopes proposed by
#' Mrkvička and Myllymäki (2023). See details in \code{\link{fdr_envelope}} and
#' in the vignette \code{vignette("FDRenvelopes")}. Note there also the arguments that are
#' the relevant ones for the FDR envelope specification.
#' The descriptions below concern the FWER envelopes.
#'
#' If \code{typeone = "fwer"}, the functionality of the function is rather similar to the
#' function \code{\link{central_region}}, but in addition to ordering the functions from
#' the most extreme one to the least extreme one using different measures
#' and providing the global envelopes with intrinsic
#' graphical interpretation, p-values are calculated for the test.
#' Thus, while \code{\link{central_region}} can be used to construct global
#' envelopes in a general setting, the function \code{\link{global_envelope_test}}
#' is devoted to testing as its name suggests.
#'
#' Different \code{type} of global envelope tests under the control of FWER can be computed.
#' We use such ordering of the functions for which we are able to construct global
#' envelopes with intrinsic graphical interpretation (IGI, see Myllymäki and Mrkvička, 2023).
#' \itemize{
#'   \item \code{'rank'}: the completely non-parametric rank envelope test (Myllymäki et al., 2017)
#'   based on minimum of pointwise ranks
#'   \item \code{'erl'}: the completely non-parametric rank envelope test based on extreme rank lengths
#'   (Myllymäki et al., 2017; Mrkvička et al., 2018) based on number of minimal pointwise ranks
#'  \item \code{'cont'}: the completely non-parametric rank envelope test based on continuous rank
#'  (Hahn, 2015; Mrkvička et al., 2022) based on minimum of continuous pointwise ranks
#'  \item \code{'area'}: the completely non-parametric rank envelope test based on area rank
#'  (Mrkvička et al., 2022) based on area between continuous pointwise ranks and minimum
#'  pointwise ranks for those argument (r) values for which pointwise ranks achieve the minimum
#'  (it is a combination of erl and cont)
#'   \item "qdir", the directional quantile envelope test, protected against unequal variance and
#'   asymmetry of T(r) for different distances r (Myllymäki et al., 2015, 2017)
#'   \item "st", the studentised envelope test, protected against unequal variance of T(r) for
#'   different distances r (Myllymäki et al., 2015, 2017)
#'   \item "unscaled", the unscaled envelope (providing a baseline) that has a contant width and
#'   that corresponds to the classical maximum deviation test (Ripley, 1981).
#' }
#' The first four types are global rank envelopes.
#' The \code{'rank'} envelope test is a completely non-parametric test,
#' which provides the 100(1-alpha)\% global envelope for the chosen test function
#' T(r) on the chosen interval of distances and associated p-values.
#' The other three are modifications of \code{'rank'} to treat the ties in
#' the extreme rank ordering on which the \code{'rank'} test is based on.
#' The last three envelopes are global scaled maximum absolute difference (MAD)
#' envelope tests. The unscaled envelope test leads to envelopes with constant width over the
#' distances r. Thus, it suffers from unequal variance of T(r) over the distances r and
#' from the asymmetry of distribution of T(r). We recommend to use the other global
#' envelope tests available. The unscaled envelope is provided as a reference.
#'
#' See Myllymäki and Mrkvička (2023, Appendix.), i.e. \code{vignette("GET")}, for more detailed
#' description of the measures and the corresponding envelopes.
#'
#' See \code{vignette("pointpatterns")} for examples of point pattern analyses.
#' See \code{vignette("FDRenvelopes")} for examples of FDR envelopes obtained by
#' \code{typeone = "fdr"}.
#'
#' @section Procedure:
#' 1) First the curves are ranked from the most extreme one to the least extreme one
#' by a measure that is specified by the argument \code{type}. The options are
#' \itemize{
#' \item 'rank': extreme ranks (Myllymäki et al., 2017)
#' \item 'erl': extreme rank lengths (Myllymäki et al., 2017; Mrkvička et al., 2018)
#' \item 'cont': continuous ranks (Hahn, 2015; Mrkvička et al., 2019)
#' \item 'area': area ranks (Mrkvička et al., 2019)
#' \item 'qdir': the directional quantile maximum absolute deviation (MAD) measure (Myllymäki et al., 2015, 2017)
#' \item 'st': the studentized MAD measure (Myllymäki et al., 2015, 2017)
#' \item 'unscaled': the unscaled MAD measure (Ripley, 1981)
#' }
#'
#' 2) Based on the measures used to rank the functions, the 100(1-alpha)\% global envelope is provided.
#' It corresponds to the 100*coverage\% central region.
#'
#' 3) P-values:
#' In the case \code{type="rank"}, based on the extreme ranks \eqn{k_i, i=1, ..., s+1}{k_i, i=1, ..., s+1},
#' the p-interval is calculated. Because the extreme ranks contain ties, there is not just
#' one p-value. The p-interval is given by the most liberal and the most conservative p-value
#' estimate. Also a single p-value is calculated.
#' By default this single p-value is the extreme rank length p-value ("erl") as specified by the argument \code{ties}.
#' If the case of other measures, a (single) p-value based on the given ordering
#' of the functions is calculated and returned in the attribute \code{p}.
#'
#' @section Number of simulations:
#' For the global \code{"rank"} envelope test, Myllymäki et al. (2017) recommended to use
#' at least 2500 simulations for testing at the significance level alpha = 0.05 for single
#' function tests, based on experiments with summary functions for point processes evaluated
#' approximately at 500 argument values.
#' In this case, the width of the p-interval associated with the extreme rank measure tended
#' to be smaller than 0.01.
#' The tests \code{'erl'}, \code{'cont'} and \code{'area'}, similarly as
#' the MAD deviation/envelope tests \code{'qdir'}, \code{'st'} and \code{'unscaled'},
#' allow in principle a lower number of simulations to be used than the test based on
#' extreme ranks (\code{'rank'}), because no ties occur for these measures.
#' If affordable, we recommend in any case some thousands of simulations for all the measures
#' to achieve a good power and repeatability of the test.
#' If the dimension of the test functions is higher, also the number of simulations should
#' preferably be higher.
#'
#' @section Tests based on several functions:
#' If a list of (suitable) objects are provided in the argument \code{curve_sets},
#' then by default (\code{nstep = 2}) the two-step combining procedure is used to
#' perform the combined global test as described in Myllymäki and Mrkvička (2020).
#' If \code{nstep = 1} and the lengths of the multivariate vectors in each component
#' of the list are equal, then the one-step combining procedure is used where the
#' functions are concatenated together into a one long vector.
#'
#' @references
#' Mrkvička, T., Myllymäki, M. and Hahn, U. (2017). Multiple Monte Carlo testing, with applications in spatial point processes. Statistics & Computing 27(5), 1239-1255. doi: 10.1007/s11222-016-9683-9
#'
#' Mrkvička, T., Myllymäki, M., Jilek, M. and Hahn, U. (2020) A one-way ANOVA test for functional data with graphical interpretation. Kybernetika 56(3), 432-458. doi: 10.14736/kyb-2020-3-0432
#'
#' Mrkvička, T., Myllymäki, M., Kuronen, M. and Narisetty, N. N. (2022) New methods for multiple testing in permutation inference for the general linear model. Statistics in Medicine 41(2), 276-297. doi: 10.1002/sim.9236
#'
#' Mrkvička and Myllymäki (2023). False discovery rate envelopes. Statistics and Computing 33, 109. https://doi.org/10.1007/s11222-023-10275-7
#'
#' Myllymäki, M., Grabarnik, P., Seijo, H. and Stoyan. D. (2015). Deviation test construction and power comparison for marked spatial point patterns. Spatial Statistics 11, 19-34. doi: 10.1016/j.spasta.2014.11.004
#'
#' Myllymäki, M., Mrkvička, T., Grabarnik, P., Seijo, H. and Hahn, U. (2017). Global envelope tests for spatial point patterns. Journal of the Royal Statistical Society: Series B (Statistical Methodology) 79, 381–404. doi: 10.1111/rssb.12172
#'
#' Myllymäki, M. and Mrkvička, T. (2023). GET: Global envelopes in R. arXiv:1911.06583 [stat.ME] https://doi.org/10.48550/arXiv.1911.06583
#'
#' Ripley, B.D. (1981). Spatial statistics. Wiley, New Jersey.
#'
#' @inheritParams forder
#' @inheritParams central_region
#' @inheritParams fdr_envelope
#' @param curve_sets A \code{\link{curve_set}} object or a list of \code{\link{curve_set}}
#' objects containing a data function and simulated functions from which the envelope is
#' to be constructed.
#' Also \code{envelope} objects of \pkg{spatstat} are accepted instead of curve_set objects.
#' If an envelope object is given, it must contain the summary
#' functions from simulated patterns which can be achieved by setting
#' \code{savefuns = TRUE} when calling the \code{envelope} function.
#' @param typeone Character string indicating which type I error rate to control,
#' either the family-wise error rate ('fwer') or false discovery rate ('fdr').
#' @param alpha The significance level. The 100(1-alpha)\% global envelope will be calculated
#' under the 'fwer' or 'fdr' control.
#' If a vector of values is provided, the global envelopes are calculated for each value.
#' @param ties The method to obtain a unique p-value when \code{typeone = 'fwer'} and \code{type = 'rank'};
#' otherwise ignored.
#' Possible values are 'midrank', 'random', 'conservative', 'liberal' and 'erl'.
#' For 'conservative' the resulting p-value will be the highest possible.
#' For 'liberal' the p-value will be the lowest possible.
#' For 'random' the rank of the obs within the tied values is uniformly sampled so that the resulting
#' p-value is at most the conservative option and at least the liberal option.
#' For 'midrank' the mid-rank within the tied values is taken.
#' For 'erl' the extreme rank length p-value is calculated.
#' The default is 'erl'.
#' @param ... Additional parameters to be passed to \code{\link{central_region}} for the computation
#' of the 'fwer' envelope.
#' @return Either an object of class "global_envelope" or "combined_global_envelope",
#' similarly as the objects returned by \code{\link{central_region}}.
#' Further, if \code{typeone = "fdr"}, the objects have the further class
#' "fdr_envelope" or "combined_fdr_envelope".
#'
#' The \code{global_envelope} is essentially a data frame containing columns
#' \itemize{
#' \item the values of the argument r at which the test was made, copied from the argument \code{curve_sets} with the corresponding names
#' \item obs = values of the data function, copied from the argument \code{curve_sets}
#' (unlike for central regions, \code{obs} always exists for a global envelope test)
#' \item lo = the lower envelope; in case of a vector of alpha values, several 'lo' exist with names paste0("lo.", 100*(1-alpha))
#' \item hi = the upper envelope; in case of a vector of alpha values, several 'lo' exist with names paste0("hi.", 100*(1-alpha))
#' \item central = a central curve as specified in the argument \code{central}.
#' }
#' Moreover, the returned object has the same attributes as the \code{global_envelope} object returned by
#' \code{\link{central_region}} and in addition
#' \itemize{
#'   \item p = the p-value of the test
#' }
#' and in the case that \code{type = 'rank'} also
#' \itemize{
#'   \item p_interval = The p-value interval \eqn{[p_{liberal}, p_{conservative}]}{[p_liberal, p_conservative]}.
#'   \item ties = As the argument \code{ties}.
#' }
#'
#' The \code{combined_global_envelope} is a list of \code{global_envelope} objects
#' containing the above mentioned columns and which all together form the global envelope.
#' It has the same attributes as described in \code{\link{central_region}}, and in addition also
#' the p-value \code{p}.
#' The 2d classes are attached as described in \code{\link{central_region}}.
#' @export
#' @seealso \code{\link{plot.global_envelope}}, \code{\link{central_region}},
#' \code{\link{GET.composite}}
#' @examples
#' # Goodness-of-fit testing for simple hypothesis
#' if(require("spatstat.explore", quietly=TRUE)) {
#'   # Testing complete spatial randomness (CSR)
#'   #==========================================
#'   X <- unmark(spruces)
#'
#'   \donttest{nsim <- 1999 # Number of simulations}
#'   \dontshow{nsim <- 19 # Number of simulations}
#'
#'   # Illustration of general workflow for simple hypotheses
#'   #=======================================================
#'   # First illustrate the general workflow for the test by this example
#'   # of CSR test for a point pattern X using the empirical L-function.
#'   # Define the argument values at which the functions are evaluated
#'   obs.L <- Lest(X, correction="translate")
#'   r <- obs.L[['r']]
#'   # The test function for the data
#'   obs <- obs.L[['trans']] - r
#'   # Prepare simulations and calculate test functions for them at same r as 'obs'
#'   sim <- matrix(nrow=length(r), ncol=nsim)
#'   for(i in 1:nsim) {
#'     sim.X <- runifpoint(ex=X) # simulation under CSR
#'     sim[, i] <- Lest(sim.X, correction="translate", r=r)[['trans']] - r
#'   }
#'   # Create a curve_set containing argument values, observed and simulated functions
#'   cset <- curve_set(r=r, obs=obs, sim=sim)
#'   # Perform the test
#'   res <- global_envelope_test(cset, type="erl")
#'   plot(res) + ggplot2::ylab(expression(italic(hat(L)(r)-r)))
#'
#'   # Simple hypothesis for a point pattern utilizing the spatstat package
#'   #=====================================================================
#'   # Generate nsim simulations under CSR, calculate L-function for the data and simulations
#'   env <- envelope(X, fun="Lest", nsim=nsim,
#'                   savefuns=TRUE, # save the functions
#'                   correction="translate", # edge correction for L
#'                   transform=expression(.-r), # centering
#'                   simulate=expression(runifpoint(ex=X))) # Simulate CSR
#'   # The rank envelope test (ERL)
#'   res <- global_envelope_test(env, type="erl")
#'   # Plot the result
#'   plot(res)
#'
#'   ## Advanced use:
#'   # Choose the interval of distances [r_min, r_max] (at the same time create a curve_set from 'env')
#'   cset <- crop_curves(env, r_min=1, r_max=7)
#'   # Do the rank envelope test (erl)
#'   res <- global_envelope_test(cset, type="erl")
#'   plot(res) + ggplot2::ylab(expression(italic(L(r)-r)))
#'
#'   # A combined global envelope test
#'   #================================
#'   # As an example test CSR of the saplings point pattern by means of
#'   # L, F, G and J functions.
#'   data(saplings)
#'   X <- as.ppp(saplings, W=square(75))
#'
#'   \donttest{nsim <- 499 # Number of simulations}
#'   \dontshow{nsim <- 19 # Number of simulations}
#'   # Specify distances for different test functions
#'   n <- 500 # the number of r-values
#'   rmin <- 0; rmax <- 20; rstep <- (rmax-rmin)/n
#'   rminJ <- 0; rmaxJ <- 8; rstepJ <- (rmaxJ-rminJ)/n
#'   r <- seq(0, rmax, by=rstep)    # r-distances for Lest
#'   rJ <- seq(0, rmaxJ, by=rstepJ) # r-distances for Fest, Gest, Jest
#'   \dontshow{r <- r[1:50]; rJ <- rJ[1:50]}
#'
#'   # Perform simulations of CSR and calculate the L-functions
#'   env_L <- envelope(X, nsim=nsim,
#'    simulate=expression(runifpoint(ex=X)),
#'    fun="Lest", correction="translate",
#'    transform=expression(.-r), # Take the L(r)-r function instead of L(r)
#'    r=r,                       # Specify the distance vector
#'    savefuns=TRUE,             # Save the estimated functions
#'    savepatterns=TRUE)         # Save the simulated patterns
#'   # Take the simulations from the returned object
#'   simulations <- attr(env_L, "simpatterns")
#'   # Then calculate the other test functions F, G, J for each simulated pattern
#'   env_F <- envelope(X, nsim=nsim, simulate=simulations,
#'                     fun="Fest", correction="Kaplan", r=rJ,
#'                     savefuns=TRUE)
#'   env_G <- envelope(X, nsim=nsim, simulate=simulations,
#'                     fun="Gest", correction="km", r=rJ,
#'                     savefuns=TRUE)
#'   env_J <- envelope(X, nsim=nsim, simulate=simulations,
#'                     fun="Jest", correction="none", r=rJ,
#'                     savefuns=TRUE)
#'
#'   # Crop the curves to the desired r-interval I
#'   curve_set_L <- crop_curves(env_L, r_min=rmin, r_max=rmax)
#'   curve_set_F <- crop_curves(env_F, r_min=rminJ, r_max=rmaxJ)
#'   curve_set_G <- crop_curves(env_G, r_min=rminJ, r_max=rmaxJ)
#'   curve_set_J <- crop_curves(env_J, r_min=rminJ, r_max=rmaxJ)
#'
#'   res <- global_envelope_test(curve_sets=list(L=curve_set_L, F=curve_set_F,
#'                                               G=curve_set_G, J=curve_set_J))
#'   plot(res)
#'   plot(res, labels=c("L(r)-r", "F(r)", "G(r)", "J(r)"))
#' }
#'
#' # A test based on a low dimensional random vector
#' #================================================
#' # Let us generate some example data.
#' X <- matrix(c(-1.6,1.6),1,2) # data pattern X=(X_1,X_2)
#' if(requireNamespace("mvtnorm", quietly=TRUE)) {
#'   Y <- mvtnorm::rmvnorm(200,c(0,0),matrix(c(1,0.5,0.5,1),2,2)) # simulations
#'   plot(Y, xlim=c(min(X[,1],Y[,1]), max(X[,1],Y[,1])), ylim=c(min(X[,2],Y[,2]), max(X[,2],Y[,2])))
#'   points(X, col=2)
#'
#'   # Test the null hypothesis is that X is from the distribution of Y's (or if it is an outlier).
#'
#'   # Case 1. The test vector is (X_1, X_2)
#'   cset1 <- curve_set(r=1:2, obs=as.vector(X), sim=t(Y))
#'   res1 <- global_envelope_test(cset1)
#'   plot(res1)
#'
#'   # Case 2. The test vector is (X_1, X_2, (X_1-mean(Y_1))*(X_2-mean(Y_2))).
#'   t3 <- function(x, y) { (x[,1]-mean(y[,1]))*(x[,2]-mean(y[,2])) }
#'   cset2 <- curve_set(r=1:3, obs=c(X[,1],X[,2],t3(X,Y)), sim=rbind(t(Y), t3(Y,Y)))
#'   res2 <- global_envelope_test(cset2)
#'   plot(res2)
#' }
global_envelope_test <- function(curve_sets, typeone = c("fwer", "fdr"),
                                 alpha = 0.05, alternative = c("two.sided", "less", "greater"),
                                 type = "erl", algorithm = c("IATSE", "ATSE"),
                                 ties = "erl", probs = c(0.025, 0.975), quantile.type = 7,
                                 central = "mean", nstep = 2, ...,
                                 lower = NULL, upper = NULL) {
  typeone <- match.arg(typeone)
  if(typeone == "fwer") {
    if(!missing(algorithm)) warning("The algorithm cannot be specified under the FWER control.")
    if(!(type %in% c("rank", "erl", "cont", "area", "qdir", "st", "unscaled")))
      stop("No such type for global envelope.")
  }
  else {
    if(!missing(type)) warning("The type cannot be specified under the FDR control.")
    algorithm <- match.arg(algorithm)
  }
  if(typeone == "fdr" && central != "mean")
    warning("For the FDR envelope \'central\' is always the mean of the simulated functions. No other options available.")

  switch(typeone,
         fwer = {
           res <- fwer_envelope(curve_sets=curve_sets,
                                type = type, alpha = alpha,
                                alternative = alternative,
                                ties = ties, probs = probs, quantile.type = quantile.type,
                                central = central, nstep = nstep, ...)
         },
         fdr = {
           res <- fdr_envelope(curve_sets=curve_sets, alpha = alpha,
                               alternative = alternative,
                               algorithm = algorithm,
                               lower = lower, upper = upper)
         })
  res
}

#' The rank envelope test
#'
#' The rank envelope test, p-values and global envelopes.
#' The test corresponds to the global envelope test that can be carriet out by
#' \code{\link{global_envelope_test}} by specifying the \code{type} for which the options
#' \code{"rank"}, \code{"erl"}, \code{"cont"} and \code{"area"} are available. The last
#' three are modifications of the first one to treat the ties in the extreme rank ordering
#' used in \code{"rank"}. This function is kept for historical reasons.
#'
#' The \code{"rank"} envelope test is a completely non-parametric test, which provides
#' the 100(1-alpha)\% global envelope for the chosen test function T(r) on
#' the chosen interval of distances and associated p-values.
#' The other three types are solutions to break the ties in the extreme ranks
#' on which the \code{"rank"} envelope test is based on.
#'
#' Note: The method to break ties for the global \code{type = "rank"} envelope
#' (Myllymäki et al., 2017) can be done by the argument \code{ties} with default
#' to \code{ties = "erl"} corresponding to the extreme rank length breaking of ties.
#' In this case the global envelope corresponds to the extreme rank measure.
#' If instead choosing \code{type} to be \code{"erl"}, \code{"cont"} or \code{"area"},
#' then the global envelope corresponds to these measures.
#'
#' @section Number of simulations:
#' The global \code{"erl"}, \code{"cont"}, \code{"area"} envelope tests allow 
#' in principle a lower number of simulations to be used than the global \code{"rank"} test
#' based on extreme ranks.
#' However, if feasible, we recommend some thousands of simulations in any case to achieve
#' a good power and repeatability of the test.
#' For the global \code{"rank"} envelope test, Myllymäki et al. (2017) recommended to use
#' at least 2500 simulations for testing at the significance level alpha = 0.05 for single
#' function tests, experimented with summary functions for point processes.
#'
#' @references
#' Myllymäki, M., Mrkvička, T., Grabarnik, P., Seijo, H. and Hahn, U. (2017). Global envelope tests for spatial point patterns. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 79: 381–404. doi: 10.1111/rssb.12172
#'
#' Mrkvička, T., Myllymäki, M. and Hahn, U. (2017). Multiple Monte Carlo testing, with applications in spatial point processes. Statistics & Computing 27 (5): 1239-1255. doi: 10.1007/s11222-016-9683-9
#'
#' Mrkvička, T., Myllymäki, M., Jilek, M. and Hahn, U. (2020) A one-way ANOVA test for functional data with graphical interpretation. Kybernetika 56 (3), 432-458. doi: 10.14736/kyb-2020-3-0432
#'
#' @param curve_set A \code{\link{curve_set}} object, or an \code{envelope} object of
#' \pkg{spatstat}. If an envelope object is given, it must contain the summary
#' functions from the simulated patterns which can be achieved by setting
#' savefuns = TRUE when calling the function of \pkg{spatstat}.
#' @param type The type of the global envelope with current options for "rank", "erl", "cont" and "area".
#' If "rank", the global rank envelope accompanied by the p-interval is given (Myllymäki et al., 2017).
#' If "erl", the global rank envelope based on extreme rank lengths accompanied by the extreme rank
#' length p-value is given (Myllymäki et al., 2017, Mrkvička et al., 2018). See details and additional
#' sections thereafter.
#' @param ... Additional parameters to be passed to \code{\link{global_envelope_test}}.
#' @return An object of class \code{global_envelope} of \code{combined_global_envelope}
#' which can be printed and plotted directly. See \code{\link{global_envelope_test}} for more details.
#' @export
#' @seealso \code{\link{global_envelope_test}}
#' @examples
#' # See ?global_envelope_test for more examples
#'
#' ## Testing complete spatial randomness (CSR)
#' #-------------------------------------------
#' if(require("spatstat.explore", quietly=TRUE)) {
#'   X <- unmark(spruces)
#'   \donttest{nsim <- 2499 # Number of simulations}
#'   \dontshow{nsim <- 19 # Number of simulations for testing}
#'   # Generate nsim simulations under CSR, calculate centred L-function for the data and simulations
#'   env <- envelope(X, fun="Lest", nsim=nsim, savefuns=TRUE,
#'                   correction="translate", transform=expression(.-r),
#'                   simulate=expression(runifpoint(ex=X)))
#'   # The rank envelope test
#'   res <- rank_envelope(env)
#'   # Plot the result.
#'   plot(res)
#'
#'   ## Advanced use:
#'   # Choose the interval of distances [r_min, r_max] (at the same time create a curve_set from 'env')
#'   curve_set <- crop_curves(env, r_min=1, r_max=7)
#'   # Do the rank envelope test
#'   res <- rank_envelope(curve_set); plot(res)
#' }
rank_envelope <- function(curve_set, type = "rank", ...) {
  if(!(type %in% c("rank", "erl", "cont", "area"))) stop("No such type for the global rank envelope.")
  global_envelope_test(curve_set, typeone="fwer", type=type, ...)
}

#' Global scaled maximum absolute difference (MAD) envelope tests
#'
#' Performs the global scaled MAD envelope tests, either directional quantile or studentised,
#' or the unscaled MAD envelope test. These tests correspond to calling the
#' function \code{\link{global_envelope_test}} with \code{type="qdir"}, \code{type = "st"} and
#' \code{type="unscaled"}, respectively. The functions \code{qdir_envelope}, \code{st_envelope} and
#' \code{unscaled_envelope} have been kept for historical reasons;
#' preferably use \code{\link{global_envelope_test}} with the suitable \code{type} argument.
#'
#' The directional quantile envelope test (Myllymäki et al., 2015, 2017)
#' takes into account the unequal variances of the test function T(r)
#' for different distances r and is also protected against asymmetry of T(r).
#'
#' @references
#' Myllymäki, M., Grabarnik, P., Seijo, H. and Stoyan. D. (2015). Deviation test construction and power comparison for marked spatial point patterns. Spatial Statistics 11: 19-34. doi: 10.1016/j.spasta.2014.11.004
#'
#' Myllymäki, M., Mrkvička, T., Grabarnik, P., Seijo, H. and Hahn, U. (2017). Global envelope tests for spatial point patterns. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 79: 381–404. doi: 10.1111/rssb.12172
#'
#' @inheritParams rank_envelope
#' @return An object of class \code{global_envelope} of \code{combined_global_envelope}
#' which can be printed and plotted directly. See \code{\link{global_envelope_test}} for more details.
#' @export
#' @name qdir_envelope
#' @seealso \code{\link{global_envelope_test}}
#' @examples
#' # See more examples in ?global_envelope_test
#' ## Testing complete spatial randomness (CSR)
#' #-------------------------------------------
#' if(require("spatstat.explore", quietly=TRUE)) {
#'   X <- spruces
#'   \donttest{nsim <- 999 # Number of simulations}
#'   \dontshow{nsim <- 19 # Number of simulations for testing}
#'   ## Test for complete spatial randomness (CSR)
#'   # Generate nsim simulations under CSR, calculate centred L-function for the data and simulations
#'   env <- envelope(X, fun="Lest", nsim=nsim, savefuns=TRUE,
#'                   correction="translate", transform=expression(.-r),
#'                   simulate=expression(runifpoint(ex=X)))
#'   res_qdir <- qdir_envelope(env) # The directional quantile envelope test
#'   plot(res_qdir)
#'
#'   ## Advanced use:
#'   # Create a curve set, choosing the interval of distances [r_min, r_max]
#'   curve_set <- crop_curves(env, r_min=1, r_max=8)
#'   # The directional quantile envelope test
#'   res_qdir <- qdir_envelope(curve_set); plot(res_qdir)
#'   # The studentised envelope test
#'   res_st <- st_envelope(curve_set); plot(res_st)
#'   # The unscaled envelope test
#'   res_unscaled <- unscaled_envelope(curve_set); plot(res_unscaled)
#' }
qdir_envelope <- function(curve_set, ...) {
  args <- list(...)
  if("type" %in% names(args)) warning("type is hardcoded to be qdir here. No other options.")
  global_envelope_test(curve_set, typeone="fwer", type="qdir", ...)
}

#' Studentised envelope test
#'
#' @details The studentised envelope test (Myllymäki et al., 2015, 2017)
#' takes into account the unequal variances of the test function T(r)
#' for different distances r.
#'
#' @export
#' @rdname qdir_envelope
st_envelope <- function(curve_set, ...) {
  args <- list(...)
  if("type" %in% names(args)) warning("type is hardcoded to be st here. No other options.")
  global_envelope_test(curve_set, typeone="fwer", type="st", ...)
}

#' Unscaled envelope test
#'
#' @details The unscaled envelope test (Ripley, 1981) corresponds to the classical maximum
#' deviation test without scaling, and leads to envelopes with constant width over the distances r.
#' Thus, it suffers from unequal variance of T(r) over the distances r and from the asymmetry of
#' distribution of T(r). We recommend to use the other global envelope tests available,
#' see \code{\link{global_envelope_test}} for full list of alternatives.
#'
#' @references
#' Ripley, B.D. (1981). Spatial statistics. Wiley, New Jersey.
#' @export
#' @rdname qdir_envelope
unscaled_envelope <- function(curve_set, ...) {
  args <- list(...)
  if("type" %in% names(args)) warning("type is hardcoded to be unscaled here. No other options.")
  global_envelope_test(curve_set, typeone="fwer", type="unscaled", ...)
}
myllym/GET documentation built on Feb. 4, 2024, 10:44 p.m.