#' @title Score Test for the Likelihood of the Record Indicators
#'
#' @importFrom stats pchisq
#'
#' @description This function performs score (or Lagrange multiplier) tests
#' for the likelihood of the record indicators \eqn{I_t} to study the
#' hypothesis of the classical record model (i.e., of IID continuous RVs).
#'
#' @details
#' The null hypothesis of the score tests is that in every vector
#' (columns of the matrix \code{X}), the probability of record at
#' time \eqn{t} is \eqn{1/t} as in the classical record model, and
#' the alternative depends on the \code{alternative} and \code{probabilities}
#' arguments. The probability at time \eqn{t} is any value, but equal in the
#' \eqn{M} series if \code{probabilities = "equal"} or different in the
#' \eqn{M} series if \code{probabilities = "different"}. The alternative
#' hypothesis is more specific in the first case than in the second one.
#' Furthermore, the \code{"two.sided"} \code{alternative} is tested with
#' the usual Lagrange multiplier statistic, while the one-sided
#' \code{alternatives} use specific statistics based on scores.
#' (See Cebrián, Castillo-Mateo and Asín (2022) for details on these tests.)
#'
#' If \code{alternative = "two.sided" & probabilities = "equal"}, under the
#' null, the Lagrange multiplier statistic has an asymptotic \eqn{\chi^2}
#' distribution with \eqn{T-1} degrees of freedom. It has been seen that for
#' the approximation to be adequate \eqn{M} should be greater than \eqn{T}.
#' Otherwise, a \code{simulate.p.value} can be computed.
#'
#' If \code{alternative = "two.sided" & probabilities = "different"}, the
#' asymptotic behaviour of the Lagrange multiplier statistic is not
#' fulfilled, but the Monte Carlo approach to simulate the p-value is
#' applied.
#'
#' If \code{alternative} is one-sided and \code{probabilities = "equal"},
#' the statistic of the test is
#' \deqn{\mathcal{T} = \sum_{t=2}^T \frac{(t S_t-M)^2}{M(t-1)} I_{\{S_t > M/t\}}.}
#' The p-value of this test is estimated with Monte Carlo simulations,
#' since the compute the exact distribution of \eqn{\mathcal{T}} is very
#' expensive.
#'
#' If \code{alternative} is one-sided and \code{probabilities = "different"},
#' the statistic of the test is
#' \deqn{\mathcal{S} = \frac{\sum_{t=2}^T t (t S_t - M) / (t - 1)}{\sqrt{M \sum_{t=2}^T t^2 / (t - 1)}},}
#' which is asymptotically standard normal distributed in \eqn{M}. It is
#' equivalent to the statistic of the weighted number of records
#' \code{\link{N.test}} with weights \eqn{\omega_t = t^2 / (t-1)}
#' \eqn{(t=2,\ldots,T)}.
#'
#' @param X A numeric vector, matrix (or data frame).
#' @param record A character string indicating the type of record,
#' "upper" or "lower".
#' @param alternative A character indicating the alternative hypothesis
#' (\code{"two.sided"}, \code{"greater"} or \code{"less"}).
#' Different statistics are used in the one-sided and two-sided alternatives
#' (see Details).
#' @param probabilities A character indicating if the alternative hypothesis
#' assume all series with \code{"equal"} or \code{"different"} probabilities
#' of record.
#' @param simulate.p.value Logical. Indicates whether to compute p-values by
#' Monte Carlo simulation.
#' @param B An integer specifying the number of replicates used in the Monte
#' Carlo estimation.
#' @return A list of class \code{"htest"} with the following elements:
#' \item{statistic}{Value of the statistic.}
#' \item{parameter}{Degrees of freedom of the approximate \eqn{\chi^2}
#' distribution.}
#' \item{p.value}{P-value.}
#' \item{method}{A character string indicating the type of test.}
#' \item{data.name}{A character string giving the name of the data.}
#' \item{alternative}{A character string indicating the alternative
#' hypothesis.}
#'
#' @author Jorge Castillo-Mateo
#' @seealso \code{\link{lr.test}}, \code{\link{global.test}}
#' @references
#' Cebrián AC, Castillo-Mateo J, Asín J (2022).
#' “Record Tests to Detect Non Stationarity in the Tails with an Application to Climate Change.”
#' \emph{Stochastic Environmental Research and Risk Assessment}, \strong{36}(2), 313-330.
#' \doi{10.1007/s00477-021-02122-w}.
#'
#' @examples
#' set.seed(23)
#' # two-sided and different probabilities of record, always simulated the p-value
#' score.test(ZaragozaSeries, probabilities = "different")
#' # equal probabilities
#' score.test(ZaragozaSeries, probabilities = "equal")
#' # equal probabilities with simulated p-value
#' score.test(ZaragozaSeries, probabilities = "equal", simulate.p.value = TRUE)
#'
#' # one-sided and different probabilities of record
#' score.test(ZaragozaSeries, alternative = "greater", probabilities = "different")
#' # different probabilities with simulated p-value
#' score.test(ZaragozaSeries, alternative = "greater", probabilities = "different",
#' simulate.p.value = TRUE)
#' # equal probabilities, always simulated the p-value
#' score.test(ZaragozaSeries, alternative = "greater", probabilities = "equal")
#' @export score.test
score.test <- function(X,
record = c("upper", "lower"),
alternative = c("two.sided", "greater", "less"),
probabilities = c("different", "equal"),
simulate.p.value = FALSE,
B = 1000) {
record <- match.arg(record)
alternative <- match.arg(alternative)
probabilities <- match.arg(probabilities)
METHOD <- paste("Score test for", record, "record indicators")
DNAME <- deparse(substitute(X))
Trows <- NROW(X)
Mcols <- NCOL(X)
if (Trows == 1) { stop("'NROW(X)' should be greater than 1") }
t <- 2:Trows
if (alternative == "two.sided") {
switch(probabilities,
"equal" = {
# lagrange multiplier function
LM.fun <- function(S) {
return(sum((t * S - Mcols)^2 / (Mcols * (t - 1))))
}
###################################
# score statistic
LM0 <- LM.fun(rowSums(.I.record(X, record = record, Trows = Trows))[-1])
###################################
# p-value Monte-Carlo or X-square
if (simulate.p.value) {
METHOD <- paste(METHOD, "with simulated p-value (based on", B, "replicates)")
SB <- matrix(stats::rbinom(n = (Trows - 1) * B, size = Mcols, prob = 1 / t), ncol = B)
LMB <- apply(SB, 2, LM.fun)
pv <- sum(LMB >= LM0) / B
} else {
pv <- stats::pchisq(q = LM0, df = Trows - 1, lower.tail = FALSE)
}
###################################
names(LM0) <- "X-squared"
names(Trows) <- "df"
structure(list(statistic = LM0, parameter = Trows - 1, p.value = pv,
method = METHOD, data.name = DNAME,
alternative = paste(alternative, "with", probabilities, "probabilities")),
class = "htest")
},
"different" = {
# lagrange multiplier function
LM.fun <- function(S){ # argument S is I
S <- sweep((sweep(S, MARGIN = 1, t, `*`) - 1)^2, MARGIN = 1, t - 1, `/`)
return(sum(S))
}
###################################
# score statistic
LM0 <- LM.fun(.I.record(X, record = record, Trows = Trows)[-1, , drop = FALSE])
###################################
# p-value Monte-Carlo
METHOD <- paste(METHOD, "with simulated p-value (based on", B, "replicates)")
IB <- array(stats::rbinom(n = (Trows - 1) * Mcols * B, size = 1, prob = 1 / t), dim = c(Trows - 1, Mcols, B))
LMB <- apply(IB, 3, LM.fun)
pv <- sum(LMB >= LM0) / B
###################################
names(LM0) <- "LM"
structure(list(statistic = LM0, p.value = pv,
method = METHOD, data.name = DNAME,
alternative = paste(alternative, "with", probabilities, "probabilities")),
class = "htest")
})
} else { # alternative %in% c("greater", "less")
switch(probabilities,
"equal" = {
# lagrange multiplier function
LM.fun <- function(S) {
return(sum((t * S - Mcols)^2 / (t - 1) * (S > Mcols / t)) / Mcols)
}
###################################
# score statistic
LM0 <- LM.fun(rowSums(.I.record(X, record = record, Trows = Trows)[-1, , drop = FALSE]))
###################################
# p-value Monte-Carlo
METHOD <- paste(METHOD, "with simulated p-value (based on", B, "replicates)")
SB <- matrix(stats::rbinom(n = (Trows - 1) * B, size = Mcols, prob = 1 / t), ncol = B)
LMB <- apply(SB, 2, LM.fun)
pv <- switch (alternative,
"greater" = {sum(LMB >= LM0) / B},
"less" = {sum(LMB <= LM0) / B}
)
###################################
names(LM0) <- "LM"
structure(list(statistic = LM0, p.value = pv,
method = METHOD, data.name = DNAME,
alternative = paste(alternative, "with", probabilities, "probabilities")),
class = "htest")
},
"different" = {
# lagrange multiplier function
LM.fun <- function(S){
return(sum(t * (t * S - Mcols) / (t - 1)))
}
###################################
# score statistic (need to divide a constant)
LM0 <- LM.fun(rowSums(.I.record(X, record = record, Trows = Trows)[-1, , drop = FALSE]))
###################################
# p-value Monte-Carlo or Normal
if (simulate.p.value) {
METHOD <- paste(METHOD, "with simulated p-value (based on", B, "replicates)")
SB <- matrix(stats::rbinom(n = (Trows - 1) * B, size = Mcols, prob = 1 / t), ncol = B)
LMB <- apply(SB, 2, LM.fun)
pv <- switch (alternative,
"greater" = {sum(LMB >= LM0) / B},
"less" = {sum(LMB <= LM0) / B}
)
LM0 <- LM0 / sqrt(Mcols * sum(t^2 / (t - 1)))
} else {
LM0 <- LM0 / sqrt(Mcols * sum(t^2 / (t - 1)))
pv <- stats::pnorm(LM0, lower.tail = (alternative == 'less'))
}
###################################
names(LM0) <- "Z"
structure(list(statistic = LM0, p.value = pv,
method = METHOD, data.name = DNAME,
alternative = paste(alternative, "with", probabilities, "probabilities")),
class = "htest")
})
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.