Nothing
# Regress the given data (true or permuted) against the full model and
# get an effect of interest at all tau values in a matrix.
# @param Y True or permuted values of Y.
# @param df data
# @param nameinteresting Names of the interesting coefficients
#' @importFrom stats coef
genCoefmeans.rq <- function(Y, df, taus, formula.full, nameinteresting, ...) {
anyNAY <- anyNA(Y)
effect.a <- sapply(seq_along(taus), function(i) {
if(is.vector(Y)) {
df$response__ <- Y
} else {
df$response__ <- Y[,i]
}
if(anyNAY) {
df <- df[!is.na(df$response__),]
}
M.full <- quantreg::rq(formula.full, data=df, tau=taus[i], model=FALSE, ...)
allcoef <- coef(M.full)
unlist(allcoef[nameinteresting])
})
if(length(nameinteresting)==1) {
name <- names(effect.a)[1]
dim(effect.a) <- c(length(effect.a), 1)
dimnames(effect.a) = list(NULL, name)
effect.a
} else t(effect.a)
}
#' Global quantile regression
#'
#' Global tests of significance for the effect of covariates in quantile regression
#'
#'
#' The possible permutation strategies include
#' "Freedman-Lane" (FL), "Freedman-Lane+remove zeros" (FL+), "within nuisance" (WN),
#' "remove location" (RL), "remove location scale" (RLS), "remove quantile" (RQ),
#' which correspond to those in Mrkvička et al. (Section 4.1-4.6 and Table 1).
#'
#' @inheritParams graph.flm
#' @param data data.frame where to look for variables.
#' @param taus The quantiles to be used.
#' @param permutationstrategy The permutation strategy to be used. See details.
#' @param rq.args Additional arguments passed to \code{rq}.
#' @param contrasts Passed directly to \code{rq}.
#' @return A \code{global_envelope} or \code{combined_global_envelope} object,
#' which can be printed and plotted directly.
#' @export
#' @references
#' Mrkvička, T., Konstantinou, K., Kuronen, M. and Myllymäki, M. (2023) Global quantile regression. arXiv:2309.04746 [stat.ME]. https://doi.org/10.48550/arXiv.2309.04746
#'
#' Myllymäki, M and Mrkvička, T. (2023). GET: Global envelopes in R. arXiv:1911.06583 [stat.ME]. https://doi.org/10.48550/arXiv.1911.06583
#'
#' Freedman, D., & Lane, D. (1983) A nonstochastic interpretation of reported significance levels. Journal of Business & Economic Statistics, 1(4), 292-298. doi:10.2307/1391660
#' @importFrom parallel mclapply
#' @importFrom parallel parLapply
#' @importFrom parallel clusterEvalQ
#' @importFrom stats as.formula model.matrix update.formula
#' @examples
#' if(require("quantreg", quietly=TRUE)) {
#' data("stackloss")
#' res <- global_rq(nsim = 19, # Increase nsim for serious analysis!
#' formula.full = stack.loss ~ Air.Flow + Water.Temp + Acid.Conc.,
#' formula.reduced = stack.loss ~ Water.Temp,
#' taus = seq(0.1, 0.9, length=10), permutationstrategy = "remove quantile",
#' data = stackloss, GET.args = list(typeone = "fwer"))
#' plot(res)
#' }
#'
global_rq <- function(nsim, formula.full, formula.reduced, taus,
data, contrasts = NULL,
permutationstrategy = c("Freedman-Lane", "Freedman-Lane+remove zeros",
"within nuisance",
"remove location", "remove location scale",
"remove quantile"),
savefuns = FALSE, rq.args = NULL, lm.args = NULL, GET.args = NULL,
mc.cores = 1L, mc.args = NULL, cl = NULL) {
if(!requireNamespace("quantreg", quietly = TRUE)) {
stop("Package 'quantreg' is required, but not installed.")
}
if(missing(permutationstrategy)) stop("Permutation strategy has to be specified.")
permutationstrategy = match.arg(permutationstrategy)
check_isnested(formula.full, formula.reduced)
if(nsim < 1) stop("Not a reasonable value of nsim.")
genCoef <- genCoefmeans.rq
ylab <- expression(italic(hat(beta)[i](tau)))
nameinteresting <- setdiff(
colnames(model.matrix(formula.full, data=data, contrasts.arg=contrasts)),
colnames(model.matrix(formula.reduced, data=data, contrasts.arg=contrasts)))
# Fit the full model to the data and obtain the coefficients
# genCoef will use response__ as the response variable
response_variable <- formula.full[[2]]
formula.full <- update.formula(formula.full, response__ ~ .)
Y <- eval(response_variable, data)
# TODO: contrasts for variables not in reduced model will cause warning, but maybe are needed if there are interactions
# Maybe make another contrasts where the extra variables are not included
if(startsWith(permutationstrategy, "Freedman-Lane") || permutationstrategy=="remove quantile") {
#-- Freedman-Lane procedure
# Fit the reduced model at each argument value to get the fitted values and residuals
fit <- do.call(quantreg::rq, c(list(formula.reduced, data=data, tau=taus, model=FALSE, contrasts=contrasts), rq.args))
fitted.m <- fit$fitted.values
res.m <- fit$residuals
if(length(taus)==1) {
fitted.m <- matrix(fitted.m, ncol=1)
res.m <- matrix(res.m, ncol=1)
}
if(permutationstrategy=="remove quantile") {
# Note that formula.full is actually formula.interesting
Y <- res.m
fitted.m <- 0
formula.full <- update.formula(formula.full, as.formula(paste("~.-(", as.character(formula.reduced)[3], ")")))
} else if(permutationstrategy=="Freedman-Lane+remove zeros") {
# Find the indices of the zero residuals that will be removed
# Since there can be arbitrarily many zeros, take a random sample
names.reduced <- if(length(taus)==1) {names(coef(fit))} else {rownames(coef(fit))}
zerostoremove <- length(names.reduced) - 1
if(zerostoremove >= 1) {
zerores <- abs(res.m) < 1e-10
stopifnot(all(colSums(zerores) >= zerostoremove))
# Set residuals to be removed to NA, any rows with NA are removed in gencoef
for(i in seq_along(taus)) {
ind <- sample(which(zerores[,i]), zerostoremove, replace=FALSE)
res.m[ind, i] <- NA
}
}
}
fit <- NULL
} else if(permutationstrategy=="simple") {
fitted.m <- 0
res.m <- matrix(Y, ncol=1)
} else if(permutationstrategy=="within nuisance") {
Nuisance_name = as.character(formula.reduced[[3]])
if(length(Nuisance_name) != 1) stop("formula_reduced may only contain 1 variable if 'within nuisance' permutation strategy is used.")
Nuisance.levels <- unique(data[[Nuisance_name]])
## TODO: Maybe check that the nuisance has a reasonable amount of data on each level?
# permutation strategy ( Works only for one categorical covariate )
Yperm <- function() {
result <- Y_temp <- Y
for( i in Nuisance.levels){
Index_i = which(data[[Nuisance_name]]==i)
permutation <- Index_i[sample.int(length(Index_i))]
result[Index_i] = Y_temp[permutation]
}
result
}
} else if(startsWith(permutationstrategy, "remove location")) {
# These are implemented by computing the corresponding residuals and
# then performing the permutation as simple permutation for the residuals and
# the formula where nuisance are removed.
#simulated data
fit <- do.call(lm, c(list(formula.reduced, data = data, contrasts=contrasts),lm.args)) # Fit the reduced model
res.m <- fit$residuals #residuals
#NP + permutation
if(permutationstrategy == "remove location scale") {
data.temp = data
data.temp$abs_residuals = abs(res.m)
formula.res <- update.formula(formula.reduced, abs_residuals ~ .)
fit.abs<- do.call(lm, c(list( formula.res,data = data.temp, contrasts=contrasts),lm.args))
fitted.m = fit.abs$fitted.values
res.m <- res.m / fitted.m
}
formula.full <- update.formula(formula.full, as.formula(paste("~.-(", as.character(formula.reduced)[3], ")")))
Y <- res.m
res.m <- matrix(res.m, ncol=1)
fitted.m <- 0
}
obs <- do.call(genCoef, c(list(Y, data, taus, formula.full, nameinteresting, contrasts=contrasts), rq.args))
if(permutationstrategy != "within nuisance") {
# Simulations by permuting the residuals + calculate the coefficients
Yperm <- function() { # Permutation
permutation <- sample(1:nrow(res.m), size=nrow(res.m), replace=FALSE)
# Permute the residuals (rows in res.m) and create new 'y'
fitted.m + res.m[permutation, ]
}
}
loopfun2 <- function(i) {
# Regress the permuted data against the full model and get a new effect of interest
do.call(genCoef, c(list(Yperm(), data, taus, formula.full, nameinteresting, contrasts=contrasts), rq.args))
}
if(is.null(cl)) sim <- do.call(parallel::mclapply, c(list(X=1:nsim, FUN=loopfun2, mc.cores=mc.cores), mc.args))
else sim <- parLapply(cl, 1:nsim, loopfun2)
dn <- dimnames(sim[[1]])
# sim <- simplify2array(sim, except=NULL) # except is only available starting from 4.2.0
sim <- simplify2array(sim)
if(is.null(dimnames(sim))) {
dim(sim) <- c(1,1,length(sim))
dimnames(sim) <- c(dn, list(NULL))
}
complabels <- colnames(obs)
csets <- NULL
for(i in 1:ncol(obs)) {
simi <- array(sim[,i,], dim=dim(sim)[c(1,3)]) # Extract the slice even if some dimensions are 1
csets[[complabels[i]]] <- create_curve_set(list(r = taus,
obs = obs[,i],
sim_m = simi))
}
res <- do.call(global_envelope_test, c(list(curve_sets=csets,
alternative="two.sided",
nstep=1), # nstep=1 concerns only 'fwer'
GET.args))
attr(res, "method") <- "Global quantile regression"
attr(res, "permutationstrategy") <- permutationstrategy
attr(res, "labels") <- complabels
# Re-define the default ylab
res <- envelope_set_labs(res, ylab=ylab)
attr(res, "call") <- match.call()
if(savefuns) attr(res, "simfuns") <- csets
res
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.