R/score.test.R
In RecordTest: Inference Tools in Time Series Based on Record Statistics

Documented in score.test

#' @title Score Test for the Likelihood of the Record Indicators
#' 
#' @importFrom stats pchisq
#' 
#' @description This function performs score (or Lagrange multiplier) tests
#'   for the likelihood of the record indicators \eqn{I_t} to study the 
#'   hypothesis of the classical record model (i.e., of IID continuous RVs).
#'   
#' @details 
#'   The null hypothesis of the score tests is that in every vector 
#'   (columns of the matrix \code{X}), the probability of record at 
#'   time \eqn{t} is \eqn{1/t} as in the classical record model, and 
#'   the alternative depends on the \code{alternative} and \code{probabilities}
#'   arguments. The probability at time \eqn{t} is any value, but equal in the
#'   \eqn{M} series if \code{probabilities = "equal"}  or different in the 
#'   \eqn{M} series if \code{probabilities = "different"}. The alternative 
#'   hypothesis is more specific in the first case than in the second one.
#'   Furthermore, the \code{"two.sided"} \code{alternative} is tested with 
#'   the usual Lagrange multiplier statistic, while the one-sided 
#'   \code{alternatives} use specific statistics based on scores. 
#'   (See Cebrián, Castillo-Mateo and Asín (2022) for details on these tests.)
#'
#'   If \code{alternative = "two.sided" & probabilities = "equal"}, under the
#'   null, the Lagrange multiplier statistic has an asymptotic \eqn{\chi^2} 
#'   distribution with \eqn{T-1} degrees of freedom. It has been seen that for
#'   the approximation to be adequate \eqn{M} should be greater than \eqn{T}. 
#'   Otherwise, a \code{simulate.p.value} can be computed.
#'   
#'   If \code{alternative = "two.sided" & probabilities = "different"}, the 
#'   asymptotic behaviour of the Lagrange multiplier statistic is not 
#'   fulfilled, but the Monte Carlo approach to simulate the p-value is 
#'   applied.
#'   
#'   If \code{alternative} is one-sided and \code{probabilities = "equal"},
#'   the statistic of the test is
#'   \deqn{\mathcal{T} = \sum_{t=2}^T \frac{(t S_t-M)^2}{M(t-1)} I_{\{S_t > M/t\}}.}
#'   The p-value of this test is estimated with Monte Carlo simulations,
#'   since the compute the exact distribution of \eqn{\mathcal{T}} is very 
#'   expensive.   
#'   
#'   If \code{alternative} is one-sided and \code{probabilities = "different"},
#'   the statistic of the test is
#'   \deqn{\mathcal{S} = \frac{\sum_{t=2}^T t (t S_t - M) / (t - 1)}{\sqrt{M \sum_{t=2}^T t^2 / (t - 1)}},}
#'   which is asymptotically standard normal distributed in \eqn{M}. It is 
#'   equivalent to the statistic of the weighted number of records 
#'   \code{\link{N.test}} with weights \eqn{\omega_t = t^2 / (t-1)}  
#'   \eqn{(t=2,\ldots,T)}.
#' 
#' @param X A numeric vector, matrix (or data frame).
#' @param record A character string indicating the type of record, 
#'   "upper" or "lower".
#' @param alternative A character indicating the alternative hypothesis 
#'   (\code{"two.sided"}, \code{"greater"} or \code{"less"}). 
#'   Different statistics are used in the one-sided and two-sided alternatives
#'   (see Details).
#' @param probabilities A character indicating if the alternative hypothesis 
#'   assume all series with \code{"equal"} or \code{"different"} probabilities
#'   of record.
#' @param simulate.p.value Logical. Indicates whether to compute p-values by
#'   Monte Carlo simulation. 
#' @param B An integer specifying the number of replicates used in the Monte 
#'   Carlo estimation.
#' @return A list of class \code{"htest"}  with the following elements:
#'   \item{statistic}{Value of the  statistic.}
#'   \item{parameter}{Degrees of freedom of the approximate \eqn{\chi^2} 
#'     distribution.}
#'   \item{p.value}{P-value.}
#'   \item{method}{A character string indicating the type of test.}
#'   \item{data.name}{A character string giving the name of the data.}
#'   \item{alternative}{A character string indicating the alternative
#'     hypothesis.}
#'     
#' @author Jorge Castillo-Mateo
#' @seealso \code{\link{lr.test}}, \code{\link{global.test}}
#' @references 
#' Cebrián AC, Castillo-Mateo J, Asín J (2022).
#' “Record Tests to Detect Non Stationarity in the Tails with an Application to Climate Change.”
#' \emph{Stochastic Environmental Research and Risk Assessment}, \strong{36}(2), 313-330. 
#' \doi{10.1007/s00477-021-02122-w}.
#' 
#' @examples
#' set.seed(23)
#' # two-sided and different probabilities of record, always simulated the p-value
#' score.test(ZaragozaSeries, probabilities = "different")
#' # equal probabilities
#' score.test(ZaragozaSeries, probabilities = "equal")
#' # equal probabilities with simulated p-value
#' score.test(ZaragozaSeries, probabilities = "equal", simulate.p.value = TRUE)
#' 
#' # one-sided and different probabilities of record
#' score.test(ZaragozaSeries, alternative = "greater", probabilities = "different")
#' # different probabilities with simulated p-value
#' score.test(ZaragozaSeries, alternative = "greater", probabilities = "different", 
#'   simulate.p.value = TRUE)
#' # equal probabilities, always simulated the p-value
#' score.test(ZaragozaSeries, alternative = "greater", probabilities = "equal")
#' @export score.test
score.test <- function(X, 
                       record = c("upper", "lower"), 
                       alternative = c("two.sided", "greater", "less"),
                       probabilities = c("different", "equal"), 
                       simulate.p.value = FALSE,
                       B = 1000) {
  
  record <- match.arg(record)
  alternative <- match.arg(alternative)
  probabilities <- match.arg(probabilities)
  METHOD <- paste("Score test for", record, "record indicators")
  DNAME <- deparse(substitute(X))
  Trows <- NROW(X)
  Mcols <- NCOL(X)
  if (Trows == 1) { stop("'NROW(X)' should be greater than 1") }
  t <- 2:Trows
  
  if (alternative == "two.sided") {
    switch(probabilities,
           "equal" = {
             # lagrange multiplier function
             LM.fun <- function(S) { 
               return(sum((t * S - Mcols)^2 / (Mcols * (t - 1)))) 
             }
             ###################################
             # score statistic
             LM0 <- LM.fun(rowSums(.I.record(X, record = record, Trows = Trows))[-1])
             ###################################
             # p-value Monte-Carlo or X-square
             if (simulate.p.value) {
               METHOD <- paste(METHOD, "with simulated p-value (based on", B, "replicates)")
               SB <- matrix(stats::rbinom(n = (Trows - 1) * B, size = Mcols, prob = 1 / t), ncol = B)
               LMB <- apply(SB, 2, LM.fun)
               pv <- sum(LMB >= LM0) / B
             } else {
               pv <- stats::pchisq(q = LM0, df = Trows - 1, lower.tail = FALSE)
             }
             ###################################
             
             names(LM0) <- "X-squared"
             names(Trows) <- "df"
             
             structure(list(statistic = LM0, parameter = Trows - 1, p.value = pv, 
                            method = METHOD, data.name = DNAME, 
                            alternative = paste(alternative, "with", probabilities, "probabilities")),
                       class = "htest")
           },
           "different" = {
             # lagrange multiplier function
             LM.fun <- function(S){ # argument S is I
               S <- sweep((sweep(S, MARGIN = 1, t, `*`) - 1)^2, MARGIN = 1, t - 1, `/`)
               return(sum(S))
             }
             ###################################
             # score statistic
             LM0 <- LM.fun(.I.record(X, record = record, Trows = Trows)[-1, , drop = FALSE])
             ###################################
             # p-value Monte-Carlo
             METHOD <- paste(METHOD, "with simulated p-value (based on", B, "replicates)")
             IB <- array(stats::rbinom(n = (Trows - 1) * Mcols * B, size = 1, prob = 1 / t), dim = c(Trows - 1, Mcols, B))
             LMB <- apply(IB, 3, LM.fun)
             pv <- sum(LMB >= LM0) / B
             ###################################
             
             names(LM0) <- "LM"
             
             structure(list(statistic = LM0, p.value = pv,
                            method = METHOD, data.name = DNAME, 
                            alternative = paste(alternative, "with", probabilities, "probabilities")),
                       class = "htest")
           })
  } else { # alternative %in% c("greater", "less")
    switch(probabilities,
           "equal" = {
             # lagrange multiplier function
             LM.fun <- function(S) { 
               return(sum((t * S - Mcols)^2 / (t - 1) * (S > Mcols / t)) / Mcols)
             }
             ###################################
             # score statistic
             LM0 <- LM.fun(rowSums(.I.record(X, record = record, Trows = Trows)[-1, , drop = FALSE]))
             ###################################
             # p-value Monte-Carlo
             METHOD <- paste(METHOD, "with simulated p-value (based on", B, "replicates)")
             SB <- matrix(stats::rbinom(n = (Trows - 1) * B, size = Mcols, prob = 1 / t), ncol = B)
             LMB <- apply(SB, 2, LM.fun)
             pv <- switch (alternative,
                           "greater" = {sum(LMB >= LM0) / B},
                           "less"    = {sum(LMB <= LM0) / B}
             )
             ###################################
             
             names(LM0) <- "LM"
             
             structure(list(statistic = LM0, p.value = pv, 
                            method = METHOD, data.name = DNAME, 
                            alternative = paste(alternative, "with", probabilities, "probabilities")),
                       class = "htest")
           },
           "different" = {
             # lagrange multiplier function
             LM.fun <- function(S){
               return(sum(t * (t * S - Mcols) / (t - 1)))
             }
             ###################################
             # score statistic (need to divide a constant)
             LM0 <- LM.fun(rowSums(.I.record(X, record = record, Trows = Trows)[-1, , drop = FALSE])) 
             ###################################
             # p-value Monte-Carlo or Normal
             if (simulate.p.value) {
               METHOD <- paste(METHOD, "with simulated p-value (based on", B, "replicates)")
               SB <- matrix(stats::rbinom(n = (Trows - 1) * B, size = Mcols, prob = 1 / t), ncol = B)
               LMB <- apply(SB, 2, LM.fun)
               pv <- switch (alternative,
                             "greater" = {sum(LMB >= LM0) / B},
                             "less"    = {sum(LMB <= LM0) / B}
               )
               LM0 <- LM0 / sqrt(Mcols * sum(t^2 / (t - 1)))
             } else {
               LM0 <- LM0 / sqrt(Mcols * sum(t^2 / (t - 1)))
               pv <- stats::pnorm(LM0, lower.tail = (alternative == 'less'))
             }
             ###################################
             
             names(LM0) <- "Z"
             
             structure(list(statistic = LM0, p.value = pv,
                            method = METHOD, data.name = DNAME, 
                            alternative = paste(alternative, "with", probabilities, "probabilities")),
                       class = "htest")
           })
  }
}