Nothing
#' @title Direction Dependence Analysis: Residual Distributions
#' @description \code{dda.resdist} evaluates patterns of asymmetry of error
#' distributions of causally competing models (\code{y ~ x}
#' vs. \code{x ~ y}).
#' @name dda.resdist
#'
#' @param formula Symbolic formula of the target model to be tested or a \code{lm} object.
#' @param pred Variable name of the predictor which serves as the outcome in the alternative model.
#' @param data An optional data frame containing the variables in the
#' model (by default variables are taken from the environment
#' which \code{dda.resdist} is called from).
#' @param B Number of bootstrap samples.
#' @param boot.type A vector of character strings representing the type of
#' bootstrap confidence intervals required. Must be one of
#' the two values \code{c("perc", "bca")};
#' \code{boot.type = "perc"} is the default.
#' @param prob.trans A logical value indicating whether a probability integral
#' transformation should be performed prior computation of
#' skewness and kurtosis difference tests.
#' @param conf.level Confidence level for bootstrap confidence intervals.
#' @param ... Additional arguments to be passed to the function.
#'
#' @returns An object of class \code{ddaresdist} containing the results of
#' direction dependence tests of error distributions.
#'
#' @examples
#' set.seed(123)
#' n <- 500
#' x <- rchisq(n, df = 4) - 4
#' e <- rchisq(n, df = 3) - 3
#' y <- 0.5 * x + e
#' d <- data.frame(x, y)
#'
#' result <- dda.resdist(y ~ x, pred = "x", data = d,
#' B = 50, conf.level = 0.90, prob.trans = TRUE)
#'
#' @references Wiedermann, W., & von Eye, A. (2025). Direction Dependence Analysis: Foundations and Statistical Methods. Cambridge, UK: Cambridge University Press.
#' @export
#' @rdname dda.resdist
dda.resdist <- function(formula,
pred = NULL,
data = list(),
B = 200,
boot.type = "perc",
prob.trans = FALSE,
conf.level = 0.95
){
### --- helper function for bootstrap CIs
mysd <- function(x){sqrt(sum((x-mean(x))^2)/length(x))}
cor.ij <- function(x,y, i=1, j=1){
n <- length(x)
mx <- mean(x)
my <- mean(y)
Cov <- sum((x - mx)^i * (y - my)^j)/n
Cov/(mysd(x)^i * mysd(y)^j)
}
prob.int <- function(x, y) {
fval <- ecdf(y)(y)
xval <- y
quant_y <- approxfun(fval, xval)
cum_x <- ecdf(x)
transform <- function(x) quant_y(cum_x(x))
x <- transform(x)
x[is.na(x)] <- min(y)
list(x = x, y = y)
}
boot.diff <- function(dat, g, prob.trans){
dat <- dat[g, ]
x <- dat[, 1] # "purified" alternative error
y <- dat[, 2] # "purified" target error
x <- as.vector(scale(x))
y <- as.vector(scale(y))
if( isTRUE(prob.trans) ){
xboot <- dat[, 3] # tentative (cov-adjusted) predictor
yboot <- dat[, 4] # tentative (cov-adjusted) outcome
trans.boot <- prob.int(xboot, yboot)
xboot.trans <- trans.boot$x
yboot.trans <- trans.boot$y
tar.boot.trans <- lm(yboot.trans ~ xboot.trans)
alt.boot.trans <- lm(xboot.trans ~ yboot.trans)
xtrans <- as.vector(scale(resid(alt.boot.trans)))
ytrans <- as.vector(scale(resid(tar.boot.trans)))
skew.diff <- (moments::skewness(xtrans)^2) - (moments::skewness(ytrans)^2)
kurt.diff <- (moments::kurtosis(xtrans)-3)^2 - (moments::kurtosis(ytrans)-3)^2
} else{
skew.diff <- (moments::skewness(x)^2) - (moments::skewness(y)^2)
kurt.diff <- (moments::kurtosis(x)-3)^2 - (moments::kurtosis(y)-3)^2
}
#cor12.diff <- (cor.ij(x, y, i = 2, j = 1)^2) - (cor.ij(x, y, i = 1, j = 2)^2)
cor12.diff <- (cor.ij(y, x, i = 2, j = 1)^2) - (cor.ij(y, x, i = 1, j = 2)^2) # rescaled to ensure > 0 under target model
#cor13.diff <- ((cor.ij(x, y, i = 3, j = 1)^2) - (cor.ij(x, y, i = 1, j = 3)^2)) * sign(moments::kurtosis(x)-3)
cor13.diff <- ((cor.ij(y, x, i = 3, j = 1)^2) - (cor.ij(y, x, i = 1, j = 3)^2)) * sign(moments::kurtosis(y)-3) # rescaled to ensure > 0 under target model
xx <- sign(moments::skewness(x)) * x
yy <- sign(moments::skewness(y)) * y
RHS3 <- cor(xx, yy) * mean( (yy^2 * xx) - (yy * xx^2) ) # rescaled to ensure > 0 under target model
#Rtanh <- cor(x, y) * mean(x * tanh(y) - tanh(x) * y)
RHS4 <- cor(x, y) * mean( (y^3 * x) - (y * x^3) ) * sign(moments::kurtosis(y)-3)
C1 <- mean(y^3 * x) - 3*cor(x,y)*var(y) # with x = alternative error and y = target error
C2 <- mean(y * x^3) - 3*cor(x,y)*var(x)
RCC <- (C1 + C2) * (C1 - C2) # scaled to ensure that > 0 under target model
result <- c(skew.diff, kurt.diff, cor12.diff, cor13.diff, RHS3, RCC, RHS4)
names(result) <- c("skew.diff", "kurt.diff", "cor21.diff", "cor13.diff", "RHS3", "RCC", "RHS4")
return(result)
}
skew.diff.test <- function(x, y){
agostino.zvalue <- function(x){
n <- length(x)
s3 <- (sum((x - mean(x))^3)/n)/(sum((x - mean(x))^2)/n)^(3/2)
y <- s3 * sqrt((n + 1) * (n + 3)/(6 * (n - 2)))
b2 <- 3 * (n * n + 27 * n - 70) * (n + 1) * (n + 3)/((n - 2) * (n + 5) * (n + 7) * (n + 9))
w <- -1 + sqrt(2 * (b2 - 1))
d <- 1/sqrt(log(sqrt(w)))
a <- sqrt(2/(w - 1))
z <- d * log(y/a + sqrt((y/a)^2 + 1))
return(z)
}
zval <- (agostino.zvalue(x) - agostino.zvalue(y))/sqrt(2 - 2*cor(x,y)^3)
pval <- (1 - pnorm(abs(zval))) * 2 # two-sided pvalue
return(list(z.value = zval, p.value = pval))
}
kurt.diff.test <- function(x, y){
anscombe.zvalue <- function(x){
n <- length(x)
b <- n * sum((x - mean(x))^4)/(sum((x - mean(x))^2)^2)
eb2 <- 3 * (n - 1)/(n + 1)
vb2 <- 24 * n * (n - 2) * (n - 3)/((n + 1)^2 * (n + 3) * (n + 5))
m3 <- (6 * (n^2 - 5 * n + 2)/((n + 7) * (n + 9))) * sqrt((6 * (n + 3) * (n + 5))/(n * (n - 2) * (n - 3)))
a <- 6 + (8/m3) * (2/m3 + sqrt(1 + 4/m3^2))
xx <- (b - eb2)/sqrt(vb2)
# modification when cube rooted value is negative: use sign(x) * abs(x)^(1/3) instead of (-x)^(1/3)
cr <- sign((1 - 2/a)/(1 + xx * sqrt(2/(a - 4)))) * abs((1 - 2/a)/(1 + xx * sqrt(2/(a - 4))))^(1/3)
z <- (1 - 2/(9 * a) - cr ) / sqrt(2/(9 * a))
#z <- (1 - 2/(9 * a) - ((1 - 2/a)/(1 + xx * sqrt(2/(a - 4))))^(1/3) ) / sqrt(2/(9 * a))
return(z)
}
zval <- (anscombe.zvalue(x) - anscombe.zvalue(y))/sqrt(2 - 2*cor(x,y)^4)
pval <- (1 - pnorm(abs(zval))) * 2 # two-sided pvalue
return(list(z.value = zval, p.value = pval))
}
myanscombe.test <- function(x, alternative = c("two.sided", "less", "greater")){
# modified anscombe.test() function to handle cube roots of negative numbers
n <- length(x)
DNAME <- deparse(substitute(x))
x <- sort(x[complete.cases(x)])
n <- length(x)
s <- match.arg(alternative)
alter <- switch(s, two.sided = 0, less = 1, greater = 2)
b <- n * sum((x - mean(x))^4)/(sum((x - mean(x))^2)^2)
eb2 <- 3 * (n - 1)/(n + 1)
vb2 <- 24 * n * (n - 2) * (n - 3)/((n + 1)^2 * (n + 3) * (n + 5))
m3 <- (6 * (n^2 - 5 * n + 2)/((n + 7) * (n + 9))) * sqrt((6 * (n + 3) * (n + 5))/(n * (n - 2) * (n - 3)))
a <- 6 + (8/m3) * (2/m3 + sqrt(1 + 4/m3^2))
xx <- (b - eb2)/sqrt(vb2)
# modification when cube rooted value is negative: use sign(x) * abs(x)^(1/3) instead of (-x)^(1/3)
cr <- sign((1 - 2/a)/(1 + xx * sqrt(2/(a - 4)))) * abs((1 - 2/a)/(1 + xx * sqrt(2/(a - 4))))^(1/3)
z <- (1 - 2/(9 * a) - cr ) / sqrt(2/(9 * a))
pval <- pnorm(z, lower.tail = FALSE)
if (alter == 0) {
pval <- 2 * pval
if (pval > 1)
pval <- 2 - pval
alt <- "kurtosis is not equal to 3"
}
else if (alter == 1) {
alt <- "kurtosis is greater than 3"
}
else {
pval <- 1 - pval
alt <- "kurtosis is lower than 3"
}
RVAL <- list(statistic = c(kurt = b, z = z), p.value = pval,
alternative = alt, method = "Anscombe-Glynn kurtosis test",
data.name = DNAME)
class(RVAL) <- "htest"
return(RVAL)
}
if(is.null(pred)) stop( "Tentative predictor is missing." )
if(B < 0) stop( "Number of resamples 'B' must be positive." )
if(conf.level < 0 || conf.level > 1) stop("'conf.level' must be between 0 and 1")
if( !boot.type %in% c("bca", "perc") ) stop( "Unknown argument in boot.type." )
### --- prepare outcome, predictor, and model matrix for covariates
if (!inherits(formula, "formula") ) {
X <- if (is.matrix(formula$x) ) formula$x
else model.matrix(terms(formula), model.frame(formula) )
y <- if (is.vector(formula$y) ) formula$y
else model.response(model.frame(formula))
delete.pred <- which( colnames(X) == pred ) # get position of tentative predictor
if ( length(delete.pred) == 0 ) stop( "Specified predictor not found in the target model." )
x <- X[, delete.pred] # tentative predictor
X <- X[ , -delete.pred] # model matrix with covariates
if ( !is.matrix(X) ) X <- as.matrix(X)
}
else {
mf <- model.frame(formula, data = data)
y <- model.response(mf) # tentative outcome
X <- model.matrix(formula, data = data)
delete.pred <- which( colnames(X) == pred ) # get position of tentative predictor
if ( length(delete.pred) == 0 ) stop( "Specified predictor not found in the target model." )
x <- X[, delete.pred] # tentative predictor
X <- X[ , -delete.pred] # model matrix with covariates
if (!is.matrix(X)) X <- as.matrix(X)
}
ry <- lm.fit(X, y)$residuals
rx <- lm.fit(X, x)$residuals
### --- estimate competing models
ry <- as.vector(scale(ry))
rx <- as.vector(scale(rx))
tar <- lm(ry ~ rx)
alt <- lm(rx ~ ry)
dat <- data.frame(alternative = as.vector(scale(resid(alt))),
target = as.vector(scale(resid(tar))),
pred.adj = rx,
out.adj = ry
) # note: residuals are standardized prior DDA
if( isTRUE(prob.trans) ){
trans <- prob.int(rx, ry)
rx.trans <- trans$x
ry.trans <- trans$y
tar.trans <- lm(ry.trans ~ rx.trans)
alt.trans <- lm(rx.trans ~ ry.trans)
dat$alternative.trans <- as.vector(scale(resid(alt.trans)))
dat$target.trans <- as.vector(scale(resid(tar.trans)))
}
### --- run separate normality tests
if( isTRUE(prob.trans) ){
agostino.out <- apply(dat[c("alternative.trans", "target.trans")], 2, moments::agostino.test)
names(agostino.out) <- c("alternative", "target")
agostino.out <- lapply(agostino.out, unclass)
anscombe.out <- apply(dat[c("alternative.trans", "target.trans")], 2, myanscombe.test)
names(anscombe.out) <- c("alternative", "target")
anscombe.out <- lapply(anscombe.out, unclass)
} else {
agostino.out <- apply(dat[c("alternative", "target")], 2, moments::agostino.test)
agostino.out <- lapply(agostino.out, unclass)
anscombe.out <- apply(dat[c("alternative", "target")], 2, myanscombe.test)
anscombe.out <- lapply(anscombe.out, unclass)
}
agostino.out$alternative[3:5] <- NULL
agostino.out$target[3:5] <- NULL
anscombe.out$alternative[3:5] <- NULL
anscombe.out$target[3:5] <- NULL
output <- list(agostino.out, anscombe.out)
names(output) <- c("agostino", "anscombe")
output$anscombe$target$statistic[1] <- output$anscombe$target$statistic[1] - 3 # change kurtosis to ex-kurtosis
output$anscombe$alternative$statistic[1] <- output$anscombe$alternative$statistic[1] - 3 # change kurtosis to ex-kurtosis
### --- run asymptotic difference tests
if(isTRUE(prob.trans)){
output <- c(output,
list(skewdiff = unlist(skew.diff.test(dat$alternative.trans, dat$target.trans))),
list(kurtdiff = unlist(kurt.diff.test(dat$alternative.trans, dat$target.trans)))
)
output$skewdiff <- c( moments::skewness(dat$alternative.trans)^2 - moments::skewness(dat$target.trans)^2, output$skewdiff) # add point estimtes of skew and kurt differences to vector
output$kurtdiff <- c((moments::kurtosis(dat$alternative.trans)-3)^2 - (moments::kurtosis(dat$target.trans)-3)^2, output$kurtdiff)
} else {
output <- c(output,
list(skewdiff = unlist(skew.diff.test(dat$alternative, dat$target))),
list(kurtdiff = unlist(kurt.diff.test(dat$alternative, dat$target)))
)
output$skewdiff <- c( moments::skewness(dat$alternative)^2 - moments::skewness(dat$target)^2, output$skewdiff) # add point estimtes of skew and kurt differences to vector
output$kurtdiff <- c((moments::kurtosis(dat$alternative)-3)^2 - (moments::kurtosis(dat$target)-3)^2, output$kurtdiff)
}
### --- run bootstrap confidence intervals
if(B > 0){
suppressWarnings(boot.res <- boot::boot(dat, boot.diff, R = B, prob.trans = prob.trans))
if( boot.type == "bca" && any( is.na( boot::empinf( boot.res ) ) ) ) stop("Acceleration constant cannot be calculated. Increase the number of resamples or use boot.type = 'perc'")
suppressWarnings(boot.out <- lapply(as.list(1:7), function(i, boot.res) boot::boot.ci(boot.res, conf=conf.level, type=boot.type, t0=boot.res$t0[i], t=boot.res$t[,i]), boot.res=boot.res))
names(boot.out) <- c("skew.diff", "kurt.diff", "cor12.diff", "cor13.diff", "RHS3", "RCC", "RHS4")
ci.skewdiff <- unclass(boot.out$skew.diff)[[4]][4:5] ; names(ci.skewdiff) <- c("lower", "upper")
ci.kurtdiff <- unclass(boot.out$kurt.diff)[[4]][4:5] ; names(ci.kurtdiff) <- c("lower", "upper")
ci.cor12diff <- unclass(boot.out$cor12.diff)[[4]][4:5] ; names(ci.cor12diff) <- c("lower", "upper")
ci.cor13diff <- unclass(boot.out$cor13.diff)[[4]][4:5] ; names(ci.cor13diff) <- c("lower", "upper")
ci.RHS3 <- unclass(boot.out$RHS3)[[4]][4:5] ; names(ci.RHS3) <- c("lower", "upper")
ci.RCC <- unclass(boot.out$RCC)[[4]][4:5] ; names(ci.RCC) <- c("lower", "upper")
ci.RHS4 <- unclass(boot.out$RHS4)[[4]][4:5] ; names(ci.RHS4) <- c("lower", "upper")
output$skewdiff <- c(output$skewdiff, ci.skewdiff)
output$kurtdiff <- c(output$kurtdiff, ci.kurtdiff)
output <- c(output,
list(cor12diff = c(boot.res$t0[3], ci.cor12diff)),
list(cor13diff = c(boot.res$t0[4], ci.cor13diff)),
list(RHS3 = c(boot.res$t0[5], ci.RHS3)),
list(RCC = c(boot.res$t0[6], ci.RCC)),
list(RHS4 = c(boot.res$t0[7], ci.RHS4)),
list(boot.args = c(boot.type, conf.level, B)),
list(boot.warning = FALSE)
)
if(sign(output$anscombe$alternative$statistic[1]) != sign(output$anscombe$target$statistic[1])) { output$boot.warning <- TRUE }
}
response.name <- all.vars(formula(formula))[1] # get name of response variable
output <- c(output, list(var.names = c(response.name, pred), probtrans = prob.trans))
class(output) <- "dda.resdist"
return(output)
}
#' @name print.dda.resdist
#' @title Print Method for \code{dda.resdist} Objects
#'
#' @description \code{print} returns DDA test statistics associated with \code{dda.resdist} objects.
#'
#' @param x An object of class \code{dda.resdist} when using \code{print}.
#' @param ... Additional arguments to be passed to the method.
#'
#' @examples
#' print(result)
#'
#' @export
#' @rdname dda.resdist
#' @method print dda.resdist
print.dda.resdist <- function(x, ...){
object <- x
varnames <- object$var.names
cat("\n")
cat("DIRECTION DEPENDENCE ANALYSIS: Residual Distributions", "\n", "\n")
cat("Skewness and kurtosis tests:", "\n")
sigtests <- rbind( c(object[[1]]$target$statistic, object[[1]]$target$p.value, object[[1]]$alternative$statistic, object[[1]]$alternative$p.value),
c(object[[2]]$target$statistic, object[[2]]$target$p.value, object[[2]]$alternative$statistic, object[[2]]$alternative$p.value)
)
sigtests <- round(sigtests, 4)
rownames(sigtests) <- c("Skewness", "Kurtosis")
colnames(sigtests) <- c("target", "z-value", "Pr(>|z|)", "alternative", "z-value", "Pr(>|z|)")
print.default(format( sigtests, digits = max(3L, getOption("digits") - 3L), scientific = NA, scipen = 999), print.gap = 2L, quote = FALSE)
if(is.null(object$boot.args)){
cat("\n")
cat("Skewness and kurtosis difference tests:", "\n")
citests <- rbind(object$skewdiff, object$kurtdiff)
citests <- round(citests, 4)
rownames(citests) <- c("Skewness", "Kurtosis")
colnames(citests) <- c("diff", "z-value", "Pr(>|z|)")
print.default(format( citests, digits = max(3L, getOption("digits") - 3L)), print.gap = 2L, quote = FALSE)
cat("\n")
}
if(!is.null(object$boot.args)){
ci.level <- as.numeric(object$boot.args[2]) * 100
cat("\n")
# Print skewness and kurtosis results
if(object$boot.args[1] == "bca") cat("Skewness and kurtosis difference tests and ", ci.level, "% ", "BCa bootstrap CIs:", "\n", "\n", sep = "")
if(object$boot.args[1] == "perc") cat("Skewness and kurtosis difference tests and ", ci.level, "% ", "Percentile bootstrap CIs:", "\n", "\n", sep = "")
citests <- rbind(object$skewdiff, object$kurtdiff)
citests <- round(citests, 4)
rownames(citests) <- c("Skewness", "Kurtosis")
colnames(citests) <- c("diff", "z-value", "Pr(>|z|)", "lower", "upper")
print.default(format( citests, digits = max(3L, getOption("digits") - 3L)), print.gap = 2L, quote = FALSE)
cat("\n")
# Print co-skewness and co-kurtosis results
if(object$boot.args[1] == "bca") cat(ci.level, "% ", "BCa bootstrap CIs for joint higher moment differences:", "\n", sep = "")
if(object$boot.args[1] == "perc") cat(ci.level, "% ", "Percentile bootstrap CIs for joint higher moment differences:", "\n", sep = "")
jointtests <- rbind(object$cor12diff, object$RHS3, object$cor13diff, object$RHS4, object$RCC)
jointtests <- round(jointtests, 4)
rownames(jointtests) <- c("Co-Skewness", "Hyvarinen-Smith (Co-Skewness)", "Co-Kurtosis", "Hyvarinen-Smith (Co-Kurtosis)", "Chen-Chan (Co-Kurtosis)")
colnames(jointtests) <- c("estimate", "lower", "upper")
print.default(format( jointtests, digits = max(3L, getOption("digits") - 3L)), print.gap = 2L, quote = FALSE)
# hoctests <- rbind(object$cor12diff, object$cor13diff)
# hoctests <- round(hoctests, 4)
# rownames(hoctests) <- c("Cor^2[2,1] - Cor^2[1,2]", "Cor^2[3,1] - Cor^2[1,3]" )
# colnames(hoctests) <- c("estimate", "lower", "upper")
# print.default(format( hoctests, digits = max(3L, getOption("digits") - 3L)), print.gap = 2L, quote = FALSE)
# cat("\n")
# # Print LR approximative measures
# if(object$boot.args[1] == "bca") cat(ci.level, "% ", "BCa bootstrap CIs for Likelihood Ratio approximations:", "\n", sep = "")
# if(object$boot.args[1] == "perc") cat(ci.level, "% ", "Percentile bootstrap CIs for Likelihood Ratio approximations:", "\n", sep = "")
# LRtests <- rbind(object$RHS3, object$RHS4, object$RCC )
# LRtests <- round(LRtests, 4)
# rownames(LRtests) <- c("Hyvarinen-Smith (co-skewness)", "Hyvarinen-Smith (co-kurtosis)", "Chen-Chan (co-kurtosis)")
# colnames(LRtests) <- c("estimate", "lower", "upper")
# print.default(format( LRtests, digits = max(3L, getOption("digits") - 3L)), print.gap = 2L, quote = FALSE)
cat("\n")
cat(paste("Number of resamples:", object$boot.args[3]))
cat("\n")
}
cat("---")
cat("\n")
cat(paste("Note: Target is", varnames[2], "->", varnames[1], sep = " "))
cat("\n")
cat(paste(" Alternative is", varnames[1], "->", varnames[2], sep = " "))
cat("\n")
if(isFALSE(object$probtrans)){
cat(paste(" Difference statistics > 0 suggest the model", varnames[2], "->", varnames[1], sep = " "))
} else {
cat(paste(" Under prob.trans = TRUE, skewness and kurtosis differences < 0 and", "\n", " co-skewness and co-kurtosis differences > 0 suggest", varnames[2], "->", varnames[1], sep = " "))
}
cat("\n")
if(object$boot.warning) { cat("Warning: Excess-kurtosis values of residuals have unequal signs", "\n", " Also compute Co-Kurtosis and Hyvarinen-Smith Co-Kurtosis for", varnames[1], "->", varnames[2], "\n") }
cat("\n")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.