tests/test_gSquareBin.R
In pcalg: Methods for Graphical Models and Causal Inference

library(pcalg)

cat("doExtras:", (doExtras <- pcalg:::doExtras()), "\n")

##################################################
## Simple 2 Variables  x1 -> x2
##################################################
set.seed(123)

## only 2 Variables
b0 <- 0
b1 <- 1
n <- 1000
x1 <- rbinom(n, 1, p = 1/2) ## == sample(c(0,1), n, replace=TRUE)
x2 <- rbinom(n, 1, p <- plogis(b0 + b1* x1))

res1 <- fisher.test(x1,x2)$p.value         # 6.14 e-21
res2 <- gSquareBin(1,2,NULL, cbind(x1,x2)) # 3.74 e-21

(eq1 <- all.equal(c(res1,     res2),
                  c(6.142e-21, 3.7401e-21), tol = 1e-5))
ok1 <- isTRUE(eq1)

##################################################
## chain of 3 variables: x1 -> x2 -> x3
##################################################
b0 <- 0
b1 <- 1
b2 <- 1
n <- 10000

set.seed(12)
x1 <- rbinom(n, 1, p=1/2)
x2 <- rbinom(n, 1, p2 <- plogis(b0 + b1* x1))
x3 <- rbinom(n, 1, p3 <- plogis(b0 + b2* x2))
##
dat <- cbind(x1,x2,x3)
##
system.time(
pv.2 <- c(gSquareBin(3,1, 2, dat),
          gSquareBin(1,3, 2, dat),
          gSquareBin(1,2, 3, dat),
          gSquareBin(3,2, 1, dat))
)

(eq2 <- c(all.equal(pv.2, c(rep(0.89393365, 2), 0, 0), tol=1e-8),
          all.equal(pv.2[1], pv.2[2], tol=1e-6)))

ok2 <- is.logical(eq2) && all(eq2)

ok3 <- ok4 <- ok5 <- ok6 <- TRUE
if (doExtras) {
    suppressWarnings(RNGversion("3.5.0"))
##################################################
## collider: x1 -> x3 <- x2
##################################################
b0 <- 0
b1 <- 1.3
b2 <- 1.7
n <- 10000
set.seed(13)
x1 <- rbinom(n, 1, p=1/2)
x2 <- rbinom(n, 1, p=1/2)
x3 <- rbinom(n, 1, p3 <- plogis(b0 + b1*x1 + b2*x2))
##
dat <- cbind(x1,x2,x3)
##
system.time(
pv3 <- c(gSquareBin(1,2, 3,  dat),
         gSquareBin(1,2,NULL,dat),
         gSquareBin(2,1, 3,  dat),
         gSquareBin(1,3, 2,  dat))
)
(eq3 <- all.equal(pv3, c(0, 0.9054922, 0,0), tol=1e-7))
ok3 <- isTRUE(eq3)


##################################################
## add noise variables to collider model
##################################################
x4 <- sample(c(0,1),n, replace=TRUE)
x5 <- sample(c(0,1),n, replace=TRUE)
x6 <- sample(c(0,1),n, replace=TRUE)

dat <- cbind(x1,x2,x3,x4,x5,x6)

system.time(
pv4 <- c(gSquareBin(1,2, c(3,4,5,6), dat),
         gSquareBin(1,3, c(2,4,5,6), dat),
         gSquareBin(2,3, c(1,4,5,6), dat),
         gSquareBin(2,4, c(1,3,5,6), dat),
         gSquareBin(4,2, c(1,3,5,6), dat), # same
         gSquareBin(3,4, c(1,2,5,6), dat),
         ##
         gSquareBin(4,2, c(3,5,6),   dat))
)

(eq4 <- c(all.equal(pv4, c(0, 0, 0, 0.0269101, 0.0269101, 0.7260011, 0.1121826),
                   tol = 2e-7),
          all.equal(pv4[4], pv4[5], tol=1e-6)))

ok4 <- is.logical(eq4) && all(eq4)


##################################################
## rectangle model
##################################################
b0 <- 0
b1 <- 1
b2 <- 1
b3 <- 0.5
b4 <- 0.5
n <- 10000
set.seed(31)
x1 <- rbinom(n, size=1, prob = 1/2)# = sample(c(0,1), n, replace=TRUE)
x2 <- rbinom(n, size=1, prob = (p2 <- plogis(b0 + b1* x1)))
x3 <- rbinom(n, size=1, prob = (p3 <- plogis(b0 + b2* x1)))
x4 <- rbinom(n, size=1, prob = (p4 <- plogis(b0 + b3* x2 + b4* x3)))
##
dat <- cbind(x1,x2,x3,x4)

system.time(
pv5 <- c(gSquareBin(1,4,c(2,3),dat),
         gSquareBin(1,4,  2,   dat),
         gSquareBin(1,4,  3,   dat),
         gSquareBin(1,4,NULL,  dat),
         gSquareBin(2,3,  4,   dat),
         gSquareBin(2,3,  1,   dat),
         gSquareBin(2,3,c(1,4),dat))
)

(eq5 <- all.equal(pv5, c(0.38484, 0.13601, 0.10974, 0, 0.00023, 0.56828, 0.43436), 4e-4))
ok5 <- isTRUE(eq5)

##################################################
## rectangle model -- extended
##################################################
x5 <- rbinom(n, size=1, prob = (p5 <- plogis(b0 + x2)))
x6 <- rbinom(n, size=1, prob = (p6 <- plogis(b0 + x3/2 - x4)))
x7 <- rbinom(n, size=1, prob = (p7 <- plogis(b0 + x5 - x6)))
x8 <- rbinom(n, size=1, prob = (p8 <- plogis(b0 - x3 + x6/2)))
x9 <- rbinom(n, size=1, prob = (p9 <- plogis(b0 + x2 - x7)))
##
dat <- cbind(x1,x2,x3,x4,x5,x6,x7,x8,x9)

## do many tests:
p <- ncol(dat) # = 9
## all (x,y) pairs:
xy <- combn(p,2)
"%w/o%" <- function(x, y) x[!x %in% y] #--  x without y
ip <- 1:p
str(rest <- apply(xy, 2, function(e) ip %w/o% e)) # 7 x 36
set.seed(101)
in2 <- seq_len(ncol(xy))
args <- c(lapply(in2, function(i) list(x=xy[1,i], y=xy[2,i], S=rest[,i])),
          lapply(in2, function(i) list(x=xy[1,i], y=xy[2,i], S=sample(rest[,i], size= p-2-1))),
          lapply(in2, function(i) list(x=xy[1,i], y=xy[2,i], S=sample(rest[,i], size= sample(0:(p-2), 1)))),
          lapply(in2, function(i) list(x=xy[1,i], y=xy[2,i], S=sample(rest[,i], size= sample(2:(p-2), 1)))))

ia <- TRUE

system.time(
pv6 <- vapply(args, function(L) do.call(gSquareBin, c(L, list(dm=dat))), 1)
)# if(doExtras) about one minute

if(FALSE)
    dput(signif(zapsmall(pv6), 4))
pv6.ok <- c(0, 0, 0.6427, 0.4745, 0.9698, 0.4586, 0.5739, 0.6701, 0.1889,
            0, 0, 0.6476, 0.00601, 0.3586, 0, 0, 0.08916, 0, 0.7642, 0, 0.04021,
            0.4694, 0, 0.8799, 0.3966, 0.358, 0.01514, 0, 0.3615, 0.3244,
            0, 0, 0.6647, 0.6839, 0, 0.4136, 0, 0, 0.7023, 0.303, 0.9189,
            0.5501, 0.7896, 0.8239, 0.04095, 0, 0, 0.5531, 0.6374, 0.755,
            0, 0, 0.629, 0, 0.8377, 0, 0.5749, 0.6346, 0, 0.9513, 0.3138,
            0.6293, 0.000153, 0, 0.7855, 0.1503, 0, 0, 0.2033, 0.9071, 0,
            0.2587, 0, 0, 1.08e-05, 0.0002067, 0.03795, 0.5501, 3.5e-06,
            0.001827, 0.5683, 0, 0, 0.005468, 0.4085, 0.1476, 0, 0, 0.6822,
            0, 0.7642, 0, 0.7184, 0.4596, 0, 0.8799, 0.02051, 0.03413, 0.01689,
            0, 0.2786, 0.07084, 0, 0, 5.17e-05, 0.4915, 0, 0.5029, 0, 0,
            0.1891, 0.09992, 0.9698, 0.3937, 5.94e-05, 0.5473, 0.3894, 0,
            0, 0.2236, 2.3e-06, 0.784, 0, 0, 0.3504, 0, 0.7642, 0, 0.3572,
            0.4259, 0, 0.8139, 0.3326, 0.01518, 0.01514, 0, 0.3541, 0.021,
            0, 0, 0.6647, 0.6167, 0, 0.4136)

(eq6 <- all.equal(pv6, pv6.ok[ia], tol= 1e-4))
ok6 <- isTRUE(eq6)

}
if (!all(ok1,ok2,ok3,ok4,ok5,ok6))
    stop("Test gSquareBin wrong: Some dependence was not estimated correctly!")