Nothing
##' Tests whether the conditional distribution of Y given X^S is
##' invariant across time, by allowing for arbitrary non-linear
##' additive dependence models.
##'
##' The function can be applied to models of the form\cr Y_i =
##' f(X_i^S) + N_i\cr with iid noise N_i and f is from a specific
##' function class, which the regression procedure given by the
##' parameter \code{regression.fun} should be able to approximate.\cr
##'
##' For both models the hypothesis test specified by the \code{test}
##' parameter specifies the hypothesis test used to test whether the
##' set S leads to an invariant model. For futher details see the
##' references.
##' @title Non-linear Invariant Causal Prediction for an individual
##' set S
##' @param X matrix of predictor variables. Each column corresponds to
##' one predictor variable.
##' @param Y vector of target variable, with length(Y)=nrow(X).
##' @param S vector containing the indicies of predictors to be tested
##' @param test string specifying the hypothesis test used to test for
##' invariance of a parent set S (i.e. the null hypothesis
##' H0_S). The following tests are available: "block.mean",
##' "block.variance", "block.decoupled", "smooth.mean",
##' "smooth.variance", "smooth.decoupled" and "hsic".
##' @param par.test parameters specifying hypothesis test. The
##' following parameters are available: \code{grid},
##' \code{complements}, \code{link}, \code{alpha} and \code{B}. The
##' parameter \code{grid} is an increasing vector of gridpoints used
##' to construct enviornments for change point based tests. If the
##' parameter \code{complements} is 'TRUE' each environment is
##' compared against its complement if it is 'FALSE' all
##' environments are compared pairwise. The parameter \code{link}
##' specifies how to compare the pairwise test statistics, generally
##' this is either max or sum. The parameter \code{alpha} is a
##' numeric value in (0,1) indicting the significance level of the
##' hypothesis test. The parameter \code{B} is an integer and
##' specifies the number of Monte-Carlo samples used in the
##' approximation of the null distribution.
##' @param regression.fun regression function used to fit the function
##' f. This should be a function which takes the argument (X,Y) and
##' outputs the predicted values f(Y).
##'
##' @return list containing the following elements
##'
##' \item{test.stat}{value of the test statistic.}
##'
##' \item{crit.value}{critical value computed using a Monte-Carlo
##' simulation of the null distribution.}
##'
##' \item{p.value}{p-value.}
##'
##' @export
##'
##' @import stats utils
##'
##' @author Niklas Pfister and Jonas Peters
##'
##' @references
##' Pfister, N., P. Bühlmann and J. Peters (2017).
##' Invariant Causal Prediction for Sequential Data. ArXiv e-prints (1706.08058).
##'
##' Peters, J., P. Bühlmann, and N. Meinshausen (2016).
##' Causal inference using invariant prediction: identification and confidence intervals.
##' Journal of the Royal Statistical Society, Series B (with discussion) 78 (5), 947–1012.
##'
##' @seealso To estimate the set of causal parents use the function
##' \code{\link{seqICPnl}}. For linear models use the corresponding
##' functions \code{\link{seqICP}} and \code{\link{seqICP.s}}.
##'
##' @examples
##' set.seed(1)
##'
##' # environment 1
##' na <- 130
##' X1a <- rnorm(na,0,0.1)
##' Ya <- 5*X1a+rnorm(na,0,0.5)
##' X2a <- Ya+rnorm(na,0,0.1)
##'
##' # environment 2
##' nb <- 70
##' X1b <- rnorm(nb,-1,1)
##' Yb <- X1b^2+rnorm(nb,0,0.5)
##' X2b <- rnorm(nb,0,0.1)
##'
##' # combine environments
##' X1 <- c(X1a,X1b)
##' X2 <- c(X2a,X2b)
##' Y <- c(Ya,Yb)
##' Xmatrix <- cbind(X1, X2)
##'
##' # use GAM as regression function
##' GAM <- function(X,Y){
##' d <- ncol(X)
##' if(d>1){
##' formula <- "Y~1"
##' names <- c("Y")
##' for(i in 1:(d-1)){
##' formula <- paste(formula,"+s(X",toString(i),")",sep="")
##' names <- c(names,paste("X",toString(i),sep=""))
##' }
##' data <- data.frame(cbind(Y,X[,-1,drop=FALSE]))
##' colnames(data) <- names
##' fit <- fitted.values(mgcv::gam(as.formula(formula),data=data))
##' } else{
##' fit <- rep(mean(Y),nrow(X))
##' }
##' return(fit)
##' }
##'
##' # apply seqICPnl.s to all possible sets using the regression
##' # function GAM - only the true parent set S=1 is
##' # invariant in this example
##' seqICPnl.s(Xmatrix, Y, S=numeric(), par.test=list(grid=c(0,50,100,150,200)), regression.fun=GAM)
##' seqICPnl.s(Xmatrix, Y, S=1, par.test=list(grid=c(0,50,100,150,200)), regression.fun=GAM)
##' seqICPnl.s(Xmatrix, Y, S=2, par.test=list(grid=c(0,50,100,150,200)), regression.fun=GAM)
##' seqICPnl.s(Xmatrix, Y, S=c(1,2), par.test=list(grid=c(0,50,100,150,200)), regression.fun=GAM)
seqICPnl.s <- function(X, Y, S,
test="block.variance",
par.test=list(grid=c(0,round(nrow(X)/2),nrow(X)),
complements=FALSE,
link=sum,
alpha=0.05,
B=100),
regression.fun=function(X,Y) fitted.values(lm.fit(X,Y)))
{
# add defaults if missing in parameter lists
if(!exists("grid",par.test)){
par.test$grid <- seq(0,nrow(X),length.out=3)
}
if(!exists("complements",par.test)){
par.test$complements <- FALSE
}
if(!exists("link",par.test)){
par.test$link <- sum
}
if(!exists("alpha",par.test)){
par.test$alpha <- 0.05
}
if(!exists("B",par.test)){
par.test$B <- 100
}
# read out parameters from par.test
grid <- par.test$grid
complements <- par.test$complements
link <- par.test$link
alpha <- par.test$alpha
B <- par.test$B
# number of observations
n <- nrow(X)
# convert vectors to matrices
Y <- as.matrix(Y)
###
# Adjust X, Y based on model
###
# compute new design matrix
X <- cbind(matrix(1,n,1),X[,S,drop=FALSE])
# number of variables
d <- ncol(X)
###
# Compute block environments if necessary
###
if(test=="block.mean"||test=="block.variance"||test=="block.decoupled"){
# Comparisons against the complement or general comparisons
if(complements){
# Compare environments with complement environments
# number of grid points
L <- length(grid)
# check whether grid was specified
if(L<=1){
stop("not enough grid points given, specify correct grid")
}
# if first grid point is 1 change it to 0
if(grid[1]==1){
grid[1] <- 0
}
# total number of environments
L.tot <- choose(L,2)
# compute all combinations of elements on the grid
t <- combn(L,2,simplify=TRUE)
# initialize variables
Z <- vector("list",2*L.tot)
env <- vector("list",2*L.tot)
fullenv <- L-1
for(i in 1:L.tot){
cp1 <- grid[t[1,i]]
cp2 <- grid[t[2,i]]
env[[i]] <- (cp1+1):cp2
if(cp1==0 & cp2!=n){
env[[i+L.tot]] <- (cp2+1):n
}
else if(cp2==n & cp1!=0){
env[[i+L.tot]] <- 1:cp1
}
else if(cp1==0 & cp2==n){
env[[i+L.tot]] <- 1:n
}
else{
env[[i+L.tot]] <- c(1:cp1,(cp2+1):n)
}
}
# write grid to CP
CP <- grid
# compute relevant comparisons
pairwise <- cbind((1:L.tot)[-fullenv],(1:L.tot)[-fullenv]+L.tot)
# set L.tot to the full number of environments
L.tot <- 2*L.tot
}
else{
# Compare environements against all non-intersecting environments
# number of grid points
L <- length(grid)
# check whether grid was specified
if(L<=1){
stop("not enough grid points given, specify correct grid")
}
# if first grid point is 1 change it to 0
if(grid[1]==1){
grid[1] <- 0
}
# total number of environments
L.tot <- choose(L,2)
# compute all combinations of elements on the grid
t <- combn(L,2,simplify=TRUE)
# initialize variables
Z <- vector("list",L.tot)
env <- vector("list",L.tot)
for(i in 1:L.tot){
cp1 <- grid[t[1,i]]
cp2 <- grid[t[2,i]]
env[[i]] <- (cp1+1):cp2
}
# write grid to CP
CP <- grid
# compute relevant comparisons
index <- t[1,]>=t[2,1]
pairwise <- rbind(rep(1,sum(index)),(1:L.tot)[index])
for(i in 2:L.tot){
index <- t[1,]>=t[2,i]
pairwise <- cbind(pairwise,rbind(rep(i,sum(index)),(1:L.tot)[index]))
}
pairwise <- t(cbind(pairwise,rbind(pairwise[2,],pairwise[1,])))
}
}
###
# Define function to compute test statistic (depends on test)
###
if(test=="hsic"){
test.stat.fun <- function(R.scaled){
test.stat <- dHSIC::dhsic(1:n,R.scaled)$dHSIC
return(test.stat)
}
# size of output
num <- 1
}
else if(test=="block.mean"){
test.stat.fun <- function(R.scaled){
mean.env <- vector("numeric",L.tot)
for(i in 1:L.tot){
mean.env[i] <- mean(R.scaled[env[[i]],,drop=FALSE])
}
# go over all pairs of non-intersecting environments
statfun <- function(i,j){
T <- abs(mean.env[i]-mean.env[j])
return(T)
}
T.pairs <- mapply(statfun,pairwise[,1],pairwise[,2])
# combine the pairwise test statistics with the link function
test.stat <- link(T.pairs)
return(test.stat)
}
# size of output
num <- 1
}
else if(test=="block.variance"){
test.stat.fun <- function(R.scaled){
var.env <- vector("numeric",L.tot)
for(i in 1:L.tot){
var.env[i] <- var(R.scaled[env[[i]],,drop=FALSE])
}
# go over all pairs of non-intersecting environments
statfun <- function(i,j){
T <- abs(var.env[i]/var.env[j]-1)
return(T)
}
T.pairs <- mapply(statfun,pairwise[,1],pairwise[,2])
# combine the pairwise test statistics with the link function
test.stat <- link(T.pairs)
return(test.stat)
}
# size of output
num <- 1
}
else if(test=="block.decoupled"){
test.stat.fun <- function(R.scaled){
var.env <- vector("numeric",L.tot)
mean.env <- vector("numeric",L.tot)
for(i in 1:L.tot){
var.env[i] <- var(R.scaled[env[[i]],,drop=FALSE])
mean.env[i] <- mean(R.scaled[env[[i]],,drop=FALSE])
}
# go over all pairs of non-intersecting environments
statfun <- function(i,j){
T1 <- abs(mean.env[i]-mean.env[j])
T2 <- abs(var.env[i]/var.env[j]-1)
T <- c(T1,T2)
return(T)
}
T.pairs <- mapply(statfun,pairwise[,1],pairwise[,2])
# combine the pairwise test statistics with the link function
test.stat <- rbind(link(T.pairs[1,]),link(T.pairs[2,]))
return(test.stat)
}
# size of output
num <- 2
}
else if(test=="smooth.variance"){
test.stat.fun <- function(R.scaled){
time <- 1:n
fit <- mgcv::gam(R.scaled^2~s(time))
y <- fitted.values(fit)
test.stat <- sum(y^2)
return(test.stat)
}
# size of output
num <- 1
}
else if(test=="smooth.mean"){
test.stat.fun <- function(R.scaled){
time <- 1:n
fit <- mgcv::gam(R.scaled~s(time))
y <- fitted.values(fit)
test.stat <- sum(y^2)
return(test.stat)
}
# size of output
num <- 1
}
else if(test=="smooth.decoupled"){
test.stat.fun <- function(R.scaled){
time <- 1:n
fit.m <- mgcv::gam(R.scaled~s(time))
fit.v <- mgcv::gam(R.scaled^2~s(time))
y.m <- fitted.values(fit.m)
y.v <- fitted.values(fit.v)
test.stat <- c(sum(y.m^2),sum(y.v^2))
return(test.stat)
}
# size of output
num <- 2
}
else{
stop("given test unknown")
}
###
# Compute test statistic
###
Ypred <- regression.fun(X,Y)
R <- Y-Ypred
R.scaled <- R/sqrt(sum(R^2))
test.stat <- test.stat.fun(R.scaled)
## ###
## # Diagnostic plots
## ###
## if(verbose){
## ### plot the cross correlation in the after ar fit
## fit <- lm(Y~-1+X)
## res <- residuals(fit)
## par(mfrow=c(1,2))
## ccf(Y[,1],res)
## acf(res)
## par(mfrow=c(1,1))
## #readline(prompt="Press [enter] to continue")
## ### plot the scaled residuals versus time
## time <- 1:n
## Rs <- list(R.scaled,p)
## #save(Rs,file="Rs.Rda")
## plot(R.scaled^2)
## #lines(fitted.values(smooth.spline(time,R.scaled^2)),col="red")
## #lines(lokerns(time,R.scaled^2,hetero=TRUE)$y,col="red")
## lines(fitted.values(gam(R.scaled^2~s(time))),col="red")
## }
###
# Use permutation of the scaled residuals to approximate null distribution
###
boot.fun <- function(i){
return(test.stat.fun(matrix(R.scaled[sample(1:n)],n,1)))
}
test.stat.boot <- matrix(sapply(1:B,boot.fun),num,B)
crit.value <- apply(test.stat.boot,1,function(x) quantile(x,1-alpha))
p.value <- (rowSums(test.stat.boot>=matrix(rep(test.stat,B),num,B))+1)/(B+1)
# perform Bonferroni adjustment incase method uses multiple testing
min.ind <- which.min(p.value)
test.stat <- test.stat[min.ind]
crit.value <- crit.value[min.ind]
p.value <- min(num*p.value[min.ind],1)
return(list(test.stat=as.numeric(test.stat),
crit.value=crit.value,
p.value=as.numeric(p.value)))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.