# R/pcalg.R In pcalg: Methods for Graphical Models and Causal Inference

```#### Main functions pc(), fci(), fciPlus(), ...
####                ----  -----
### Auxiliaries       -->  ./Aaux.R
### Classes & Methods -->  ./AllClasses.R

trueCov <- function(dag, back.compatible = FALSE)
{
##  as(.,"matrix") now {for some versions of 'graph' pkg} is 0/1
## weightMatrix <- t(as(dag,"matrix"))
wm <- if(back.compatible) wgtMatrix.0(dag) else wgtMatrix(dag)
p <- length(dag@nodes)
## SS'  where S = (I - W)^{-1} :
tcrossprod(solve(diag(p) - wm))
}

## buggy randomDAG:
## randomDAG <- function (n, prob, lB = 0.1, uB = 1, V = as.character(1:n))
## {
##     stopifnot(n >= 2, is.numeric(prob), length(prob) == 1,
## 	      0 <= prob, prob <= 1,
## 	      is.numeric(lB), is.numeric(uB), lB <= uB)
##     edL <- vector("list", n)
##     nmbEdges <- 0L
##     for (i in seq_len(n - 2)) {
##         listSize <- rbinom(1, n - i, prob)
##         nmbEdges <- nmbEdges + listSize
##         edgeList <- sample(seq(i + 1, n), size = listSize)
##         weightList <- runif(length(edgeList), min = lB, max = uB)
##         edL[[i]] <- list(edges = edgeList, weights = weightList)
##     }
##     if (nmbEdges > 0) {
## 	edL[[n-1]] <-
## 	    if (rbinom(1, 1, prob) == 1)
## 		list(edges = n,
## 		     weights = runif(1, min = lB, max = uB))
## 	    else
## 		list(edges = integer(0), weights = numeric(0))
## 	edL[[n]] <- list(edges = integer(0), weights = numeric(0))
## 	names(edL) <- V
## 	new("graphNEL", nodes = V, edgeL = edL, edgemode = "directed")
##     }
##     else
## 	new("graphNEL", nodes = V, edgemode = "directed")
## }

randomDAG <- function (n, prob, lB = 0.1, uB = 1, V = as.character(1:n))
{
stopifnot(n >= 2, is.numeric(prob), length(prob) == 1,
0 <= prob, prob <= 1,
is.numeric(lB), is.numeric(uB), lB <= uB)
edL <- vector("list", n)
nmbEdges <- 0L
for (i in seq_len(n - 2)) {
listSize <- rbinom(1, n - i, prob)
nmbEdges <- nmbEdges + listSize
edgeList <- sample(seq(i + 1, n), size = listSize)
weightList <- runif(length(edgeList), min = lB, max = uB)
edL[[i]] <- list(edges = edgeList, weights = weightList)
}
## i=n-1 separately
## (because of sample(7,1) is actually sample(1:7,1) and not 7)
listSize <- rbinom(1, 1, prob)
if (listSize > 0) {
nmbEdges <- nmbEdges + 1
edgeList <- n
weightList <- runif(1, min = lB, max = uB)
} else {
edgeList <- integer(0)
weightList <- numeric(0)
}
edL[[n-1]] <- list(edges = edgeList, weights = weightList)
if (nmbEdges > 0) {
edL[[n]] <- list(edges = integer(0), weights = numeric(0))
names(edL) <- V
new("graphNEL", nodes = V, edgeL = edL, edgemode = "directed")
}
else
new("graphNEL", nodes = V, edgemode = "directed")
}

## A version of this is also in	/u/maechler/R/MM/Pkg-ex/graph/weightmatrix.R
## another on in  Matrix/R/sparseMatrix.R  function graph.wgtMatrix() :
## No longer in use __apart__ for  rmvDAG(..., back.compatible=TRUE)
wgtMatrix.0 <- function(g, transpose = TRUE)
{
## Purpose: work around "graph" package's  as(g, "matrix") bug
## ----------------------------------------------------------------------
## ACHTUNG: mat_[i,j]==1 iff j->i,
## whereas with as(g,"matrix") mat_[i,j]==1 iff i->j
## ----------------------------------------------------------------------
## Arguments: g: an object inheriting from (S4) class "graph"
## ----------------------------------------------------------------------
## Author: Martin Maechler, based on Seth Falcon's code;  Date: 12 May 2006

## MM: another buglet for the case of  "no edges":
if(numEdges(g) == 0) {
p <- length(nd <- nodes(g))
return( matrix(0, p,p, dimnames = list(nd, nd)) )
}
## Usual case, when there are edges:
w <- unlist(edgeData(g, attr = "weight"))
## we need the *transposed* matrix typically:
tm <- if(transpose) t(as(g, "matrix")) else as(g, "matrix")
## now is a 0/1 - matrix (instead of 0/wgts) with the 'graph' bug
if(any(w != 1)) ## fix it
tm[tm != 0] <- w
## tm_[i,j]==1 iff i->j
tm
}

wgtMatrix <- function(g, transpose = TRUE) {
res <- as(g, "matrix") # from 'graph' package, now reliable (we hope)
if (transpose) ## default!
t(res) else res
}

rmvDAG <-
function(n, dag,
errDist = c("normal", "cauchy", "t4", "mix", "mixt3", "mixN100"),
mix = 0.1, errMat = NULL, back.compatible = FALSE,
use.node.names = !back.compatible)
{
## Purpose: Generate data according to a given DAG (with weights) and
## given node distribution (rows: number of samples; cols: node values in
## topological order)
## ----------------------------------------------------------------------
## Arguments:
## - n     : Number of samples
## - dag   : Graph object containing the DAG and weights
## - errDist: "normal" or "mix" for pure standard normal node distribution
##           or mixing with standard cauchy
## - mix   : Percentage of points sampled from standard cauchy
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 26 Jan 2006;  Martin Maechler

## check input &  initialize variables
stopifnot(is(dag, "graph"),
(p <- length(nodes(dag))) >= 2)

##  as(.,"matrix") now {for some versions of 'graph' pkg} is 0/1
## weightMatrix <- t(as(dag,"matrix"))
weightMatrix <- if(back.compatible) wgtMatrix.0(dag) else wgtMatrix(dag)

## check if top. sorted
nonZeros <- which(weightMatrix != 0, arr.ind = TRUE)
if (nrow(nonZeros) > 0) {
if (any(nonZeros[,1] - nonZeros[,2] < 0) || any(diag(weightMatrix) != 0))
stop("Input DAG must be topologically ordered!")
}

errDist <- match.arg(errDist)
if(grepl("^mix", errDist))
eMat <- function(outs) { # (n,p)
X <- c(rnorm(n*p - length(outs)), outs)
matrix(sample(X), nrow = n)
}
if(is.null(errMat)) {
## generate errors e_i
errMat <-
switch(errDist,
"normal" = matrix(rnorm  (n*p),  nrow = n),
"cauchy" = matrix(rcauchy(n*p),  nrow = n),
"t4" =     matrix(rt(n*p, df = 4), nrow = n),
"mix"    = eMat(rcauchy(round(mix*n*p))),
"mixt3"  = eMat(     rt(round(mix*n*p), df = 3)),
"mixN100"= eMat(  rnorm(round(mix*n*p), sd = 10)))
}
else { ## check & use 'errMat' argument:
stopifnot(!is.null(dim.eM <- dim(errMat)),
dim.eM == c(n,p), is.numeric(errMat))
}
if(use.node.names)
colnames(errMat) <- nodes(dag) # == colnames(weightMatrix)

## compute X matrix X_i
if (sum(weightMatrix) > 0) {
X <- errMat
for (j in 2:p) { ## uses X[*, 1:(j-1)] -- "recursively" !
ij <- 1:(j-1)
X[,j] <- X[,j] + X[, ij, drop = FALSE] %*% weightMatrix[j, ij]
}
X
}
else
errMat
}

pcSelect <- function(y, dm, alpha, corMethod = "standard",
verbose = FALSE, directed = FALSE)
{
## Purpose: Find columns in dm, that have nonzero parcor with y given
## any other set of columns in dm
## ----------------------------------------------------------------------
## Arguments:
## - y: Response Vector (length(y)=nrow(dm))
## - dm: Data matrix (rows: samples, cols: nodes)
## - alpha: Significance level of individual partial correlation tests
## - corMethod: "standard" or "Qn" for standard or robust correlation
##              estimation
## - verbose: 0-no output, 1-small output, 2-details
## ----------------------------------------------------------------------
## Value: List
## - G: boolean vector with connected nodes
## - zMin: Minimal z values
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 27.4.07

stopifnot((n <- nrow(dm)) >= 1,
(p <- ncol(dm)) >= 1)
vNms <- colnames(dm)
## cl <- match.call()

zMin <- c(0,rep.int(Inf,p))
C <- mcor(cbind(y,dm), method = corMethod)
cutoff <- qnorm(1 - alpha/2)
n.edgetests <- numeric(1)# final length = max { ord}
## G := complete graph :
G <- c(FALSE,rep.int(TRUE,p))
seq_p <- seq_len(p+1L) # = 1:(p+1)

done <- FALSE
ord <- 0
while (!done && any(G)) {
n.edgetests[ord+1] <- 0
done <- TRUE
ind <- which(G)
remEdges <- length(ind)
if(verbose >= 1)
cat("Order=",ord,"; remaining edges:",remEdges,"\n", sep = '')
for (i in 1:remEdges) {
if(verbose && (verbose >= 2 || i%%100 == 0)) cat("|i=",i,"|iMax=",remEdges,"\n")
y <- 1
x <- ind[i]

if (G[x]) {
nbrsBool <- G
nbrsBool[x] <- FALSE
nbrs <- seq_p[nbrsBool]
## neighbors of y without itself and x
length_nbrs <- length(nbrs)

if (length_nbrs >= ord) {
if (length_nbrs > ord) done <- FALSE
S <- seq_len(ord)

## now includes special cases (ord == 0) or (length_nbrs == 1):
repeat {
n.edgetests[ord+1] <- n.edgetests[ord+1]+1
z <- zStat(x,y, nbrs[S], C,n)
if(abs(z) < zMin[x]) zMin[x] <- abs(z)
if (verbose >= 2)
cat(paste("x:",vNms[x-1],"y:",(ytmp <- round((p+1)/2)),"S:"),
c(ytmp,vNms)[nbrs[S]], paste("z:",z,"\n"))
if (abs(z) <= cutoff) {
G[x] <- FALSE
break
}
else {
nextSet <- getNextSet(length_nbrs, ord, S)
if(nextSet\$wasLast)
break
S <- nextSet\$nextSet
}
} ## {repeat}
}
} ## end if( G )
} ## end for(i ..)
ord <- ord+1
} ## end while

## return
list(G = setNames(G[-1L], vNms),
zMin = zMin[-1])
}## pcSelect

zStat <- function(x,y, S, C, n)
{
## Purpose: Fisher's z-transform statistic of partial corr.(x,y | S)
## ----------------------------------------------------------------------
## Arguments:
## - x,y,S: Are x,y cond. indep. given S?
## - C: Correlation matrix among nodes
## - n: Samples used to estimate correlation matrix
## ----------------------------------------------------------------------
## Author: Martin Maechler, 22 May 2006; Markus Kalisch

## for C use:
##  res <- 0

##   if (length(S) < 4) {
r <- pcorOrder(x,y, S, C)
##  } else {
##    k <- solve(C[c(x,y,S),c(x,y,S)])
##    r <- -k[1,2]/sqrt(k[1,1]*k[2,2])
##      r <- .C("parcorC",as.double(res),as.integer(x-1),as.integer(y-1),as.integer(S-1),as.integer(length(S)),as.integer(dim(C)[1]),as.double(as.vector(C)))[[1]]
##  }

res <- sqrt(n- length(S) - 3) * 0.5*log.q1pm(r)
if (is.na(res)) 0 else res
}

condIndFisherZ <- function(x,y,S,C,n, cutoff,
verbose = isTRUE(getOption("verbose.pcalg.condIFz")))
{
## Purpose: Return boolean result on conditional independence using
## Fisher's z-transform
## ----------------------------------------------------------------------
## Arguments:
## - x,y,S: Are x,y cond. indep. given S?
## - C: Correlation matrix among nodes
## - n: Samples used to estimate correlation matrix
## - cutoff: Cutoff for significance level for individual
##           partial correlation tests
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 26 Jan 2006, 17:32

## R Variante
r <- pcorOrder(x,y, S, C)

## C Variante
## C res <- 0
## C p <- dim(C)[1]
## C r <- .C("parcor",as.double(res),as.integer(x-1),as.integer(y-1),as.integer(S-1),as.integer(length(S)),as.integer(p),as.double(C))[[1]]

T <- sqrt(n-length(S)-3)* 0.5*log.q1pm(r)

## cat(" (",x,",",y,") | ",S," : T = ",T,"\n", sep='')

## MM: T is only NA when 'r' already is (r = +- 1  <==>  T = +- Inf) -- better check there (FIXME?)
## is.na(T) <==>  T <- 0 # if problem, delete edge: be conservative
is.na(T) || abs(T) <= cutoff
}

pcorOrder <- function(i,j, k, C, cut.at = 0.9999999) {
## Purpose: Compute partial correlation
## ----------------------------------------------------------------------
## Arguments:
## - i,j,k: Partial correlation of i and j given k
## - C: Correlation matrix among nodes
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 26 Jan 2006; Martin Maechler
if (length(k) == 0) {
r <- C[i,j]
} else if (length(k) == 1) {
r <- (C[i, j] - C[i, k] * C[j, k])/sqrt((1 - C[j, k]^2) * (1 - C[i, k]^2))
} else { ## length(k) >= 2
PM <- pseudoinverse(C[c(i,j,k), c(i,j,k)])
r <- -PM[1, 2]/sqrt(PM[1, 1] * PM[2, 2])
}
if(is.na(r)) 0 else min(cut.at, max(-cut.at, r))
}

compareGraphs <- function(gl,gt) {
## Purpose: Return TPR, FPR and TDR of comparison of two undirected graphs
## ----------------------------------------------------------------------
## Arguments:
## - gl: Estimated graph (may be directed, but the direction will
##       be dropped internally)
## - gt: True graph (may be directed, but the direction will
##       be dropped internally)
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 26 Jan 2006, 17:35

## When 'graph' returns the 'weight matrix' again:
##   ml <- as(ugraph(gl), "matrix")
##   mt <- as(ugraph(gt), "matrix")
##   p <- dim(ml)[1]
ml <- wgtMatrix(ugraph(gl))
mt <- wgtMatrix(ugraph(gt))
p <- dim(ml)[2]

mt[mt != 0] <- rep(1,sum(mt != 0))
ml[ml != 0] <- rep(1,sum(ml != 0)) ## inserted to fix bug

## FPR :=  #{misplaced edges} / #{true gaps}
diffm <- ml-mt
nmbTrueGaps <- (sum(mt == 0)-p)/2
##  print(list(p=p,sum=sum(mt==0),mt=mt,ml=ml,nmbTrueGaps=nmbTrueGaps,diffm=diffm))
fpr <- if (nmbTrueGaps == 0) 1 else (sum(diffm > 0)/2)/nmbTrueGaps

## TPR := #{correctly found edges} / #{true edges}
diffm2 <- mt-ml
nmbTrueEdges <- (sum(mt == 1)/2)
tpr <- if (nmbTrueEdges == 0) 0 else 1 - (sum(diffm2 > 0)/2)/nmbTrueEdges

## TDR := #{correctly found edges} / #{found edges}
trueEstEdges <- (nmbTrueEdges-sum(diffm2 > 0)/2) ## #{true edges} - #{not detected}
tdr <-
if (sum(ml == 1) == 0) { ## no edges detected
if (trueEstEdges == 0) 1 ## no edges in true graph
else 0
} else trueEstEdges/(sum(ml == 1)/2)

## return named vector:
c(tpr = tpr, fpr = fpr, tdr = tdr)
}

getNextSet <- function(n,k,set) {
## Purpose: Generate the next set in a list of all possible sets of size
##          k out of 1:n;
##  Also returns a boolean whether this set was the last in the list.
## ----------------------------------------------------------------------
## Arguments:
## - n,k: Choose a set of size k out of numbers 1:n
## - set: previous set in list
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 26 Jan 2006, 17:37

## chInd := changing Index
chInd <- k - (zeros <- sum((seq(n-k+1,n)-set) == 0))
wasLast <- (chInd == 0)
if (!wasLast) {
set[chInd] <- s.ch <- set[chInd] + 1
if (chInd < k)
set[(chInd+1):k] <- seq(s.ch +1L, s.ch +zeros)
}
list(nextSet = set, wasLast = wasLast)
}

mcor <- function(dm, method = c("standard", "Qn", "QnStable",
"ogkScaleTau2", "ogkQn", "shrink"))
{
## Purpose: Compute correlation matrix (perhaps elementwise)
## ----------------------------------------------------------------------
## Arguments:
## - dm: Data matrix; rows: samples, cols: variables
## - method: "Qn" or "standard" (default) envokes robust (based on Qn
##           scale estimator) or standard correlation estimator, respectively.
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 27 Jan 2006

p <- ncol(dm)
method <- match.arg(method)
switch(method,
"Qn" = {
res <- diag(nrow = p)
Qn. <- apply(dm, 2L, Qn)
for (i in seq_len(p-1)) {
for (j in i:p) {
qnSum  <- Qn(dm[,i] + dm[,j])
qnDiff <- Qn(dm[,i] - dm[,j])
res[j,i] <- res[i,j] <-
max(-1,
min(1, (qnSum^2 - qnDiff^2) / (4*Qn.[i]*Qn.[j])))
}
}
res
},
"QnStable" = {
res <- diag(nrow = p)
Qn. <- apply(dm, 2L, Qn) # Qn(.) for all columns
## xQn := each column divided by its Qn(.)
xQn <- dm / rep(Qn., each=nrow(dm))
for (i in seq_len(p-1)) {
xQn.i <- xQn[,i]
for (j in i:p) {
qnSum  <- Qn(xQn.i + xQn[,j])
qnDiff <- Qn(xQn.i - xQn[,j])
res[j,i] <- res[i,j] <-
max(-1,
min(1, (qnSum^2 - qnDiff^2) / (qnSum^2 + qnDiff^2)))
}
}
res
},
"ogkScaleTau2" = {
cov2cor(covOGK(dm, n.iter = 2, sigmamu = scaleTau2,
weight.fn = hard.rejection)\$cov)
},
"ogkQn" = {
cov2cor(covOGK(dm, n.iter = 2, sigmamu = s_Qn,
weight.fn = hard.rejection)\$cov)
},
"standard" = cor(dm),
"shrink" = {
CM <- cor(dm)
n <- nrow(dm)
p <- ncol(dm)
S1 <- sum(CM[1,-1]^2)
S2 <- sum((1-CM[1,-1]^2)^2)

g <- S1 / (S1 + S2/n) # is in [0,1]
scor3 <- CM
for(i in 2:p) {
scor3[1,i] <- g*CM[1,i]
scor3[i,1] <- g*CM[i,1]
}
scor3
}
)# {switch}
}

pcSelect.presel <- function(y, dm, alpha, alphapre, corMethod = "standard",
verbose = 0, directed = FALSE)
{
## Purpose: Find columns in dm, that have nonzero parcor with y given
## any other set of columns in dm with use of some kind of preselection
## ----------------------------------------------------------------------
## Arguments:
## - y: Response Vector (length(y)=nrow(dm))
## - dm: Data matrix (rows: samples, cols: nodes)
## - alpha   : Significance level of individual partial correlation tests
## - alphapre: Significance level in preselective use of pcSelect
## - corMethod: "standard" or "Qn" for standard or robust correlation
##              estimation
## ----------------------------------------------------------------------
## Author: Philipp Ruetimann, Date: 5 Mar 2008

tmp <- pcSelect(y, dm, alpha=alphapre,
corMethod=corMethod, verbose=verbose, directed=directed)
pcs <- tmp\$G
zmi <- tmp\$zMin
ppcs <- which(pcs == 1L)
Xnew <- dm[, ppcs, drop=FALSE]
lang <- length(pcs)
tmp2 <- pcSelect(y, dm=Xnew, alpha=alpha,
corMethod=corMethod, verbose=verbose, directed=directed)
pcSnew <- tmp2\$G
zminew <- tmp2\$zMin
zmi[ppcs] <- zminew
## MM FIXME -- do without for() :
k <- 1
for (i in 1:lang) {
if(pcs[i] == 1) {
pcs[i] <- pcSnew[k]
k <- k+1
}
}
list(pcs = pcs, Xnew = Xnew, zMin = zmi)
}

corGraph <- function(dm, alpha = 0.05, Cmethod = "pearson")
{
## Purpose: Computes a correlation graph. Significant correlations are
## shown using the given correlation method.
## ----------------------------------------------------------------------
## Arguments:
## - dm: Data matrix with rows as samples and cols as variables
## - alpha: Significance level for correlation test
## - Cmethod: a character string indicating which correlation coefficient
##          is to be  used for the test.  One of '"pearson"',
##          '"kendall"', or '"spearman"', can be abbreviated.
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 30 Jan 2006; Martin Maechler (preserve cns)

stopifnot(is.numeric(p <- ncol(dm)), p >= 2)
if(is.null(cns <- colnames(dm))) cns <- as.character(1:p)
mat <- matrix(0, p,p)
for (i in 1:(p-1)) {
for (j in (i+1):p) {
mat[i,j] <- cor.test(dm[,i], dm[,j], alternative = "two.sided",
method = Cmethod)\$p.value < alpha
}
}
mat <- mat + t(mat)
dimnames(mat) <- list(cns,cns)
as(mat, "graphNEL")
}

##################################################
## CPDAG
##################################################

##################################################
## dag2cpdag
##################################################

dag2cpdag <- function(g)
{
## Purpose: Compute the (unique) completed partially directed graph (CPDAG)
## that corresponds to the input DAG; result is a graph object
## In the current implementation, this function is just a wrapper
## function for 'dag2essgraph'
## ----------------------------------------------------------------------
## Arguments:
## - dag: input DAG (graph object)
## ----------------------------------------------------------------------
## Author: Alain Hauser, Date: 14 Mar 2015
dag2essgraph(g)
}

#dag2cpdag <- function(g)
#{
#  ## Purpose: Compute the (unique) completed partially directed graph (CPDAG)
#  ## that corresponds to the input DAG; result is a graph object
#  ## ----------------------------------------------------------------------
#  ## Arguments:
#  ## - dag: input DAG (graph object)
#  ## ----------------------------------------------------------------------
#  ## Author: Diego Colombo, Date: 10 Jun 2013, 11:06
#
#    amat <- as(g, "matrix")
#    amat[amat != 0] <- 1
#    skel.amat <- amat + t(amat)
#    skel.amat[skel.amat == 2] <- 1
#    cpdag <- skel.amat
#
#    ## search the v-structures in the DAG
#    ind <- which((amat == 1 & t(amat) == 0), arr.ind = TRUE)
#    tripleMatrix <- matrix(,0,3)
#    ## Go through all edges
#    for (i in seq_len(nrow(ind))) { ## MM(FIXME): growth of tripleMatrix
#        x <- ind[i,1]
#        y <- ind[i,2]
#        indY <- setdiff(which((amat[,y] == 1 & amat[y,] == 0), arr.ind = TRUE),x) ## x-> y <- z
#	if(length(newZ <- indY[amat[x,indY] == 0])) ## deparse.l.=0: no colnames
#	  tripleMatrix <- rbind(tripleMatrix, cbind(x, y, newZ, deparse.level=0),
#				deparse.level=0)
#    }
#    if ((m <- nrow(tripleMatrix)) > 0) {
#        deleteDupl <- logical(m)# all FALSE
#        for (i in seq_len(m))
#            if (tripleMatrix[i,1] > tripleMatrix[i,3])
#                deleteDupl[i] <- TRUE
#        if(any(deleteDupl))
#          tripleMatrix <- tripleMatrix[!deleteDupl,, drop=FALSE]
#
#        ## orient the v-structures in the CPDAG
#        for (i in seq_len(nrow(tripleMatrix))) {
#                x <- tripleMatrix[i,1]
#                y <- tripleMatrix[i,2]
#                z <- tripleMatrix[i,3]
#                cpdag[x,y] <- cpdag[z,y] <- 1
#                cpdag[y,x] <- cpdag[y,z] <- 0
#        }
#    }
#
#    ## orient the edges with the 3 orientation rules
#    repeat {
#        old_cpdag <- cpdag
#        ## Rule 1
#        ind <- which((cpdag == 1 & t(cpdag) == 0), arr.ind = TRUE)
#        for (i in seq_len(nrow(ind))) {
#            a <- ind[i, 1]
#            b <- ind[i, 2]
#            isC <- ((cpdag[b, ] == 1 & cpdag[, b] == 1) &
#                    (cpdag[a, ] == 0 & cpdag[, a] == 0))
#            if (any(isC)) {
#                indC <- which(isC)
#                cpdag[b, indC] <- 1
#                cpdag[indC, b] <- 0
#            }
#        }
#        ## Rule 2
#        ind <- which((cpdag == 1 & t(cpdag) == 1), arr.ind = TRUE)
#        for (i in seq_len(nrow(ind))) {
#            a <- ind[i, 1]
#            b <- ind[i, 2]
#            isC <- ((cpdag[a, ] == 1 & cpdag[, a] == 0) &
#                    (cpdag[, b] == 1 & cpdag[b, ] == 0))
#            if (any(isC)) {
#                cpdag[a, b] <- 1
#                cpdag[b, a] <- 0
#            }
#        }
#        ## Rule 3
#        ind <- which((cpdag == 1 & t(cpdag) == 1), arr.ind = TRUE)
#        for (i in seq_len(nrow(ind))) {
#            a <- ind[i, 1]
#            b <- ind[i, 2]
#            indC <- which((cpdag[a, ] == 1 & cpdag[, a] == 1) &
#                          (cpdag[, b] == 1 & cpdag[b, ] == 0))
#            if (length(indC) >= 2) {
#                cmb.C <- combn(indC, 2)
#                cC1 <- cmb.C[1, ]
#                cC2 <- cmb.C[2, ]
#                for (j in seq_along(cC1)) {
#                    c1 <- cC1[j]
#                    c2 <- cC2[j]
#                    if (c1 != c2 && cpdag[c1, c2] == 0 && cpdag[c2,c1] == 0) {
#                        cpdag[a, b] <- 1
#                        cpdag[b, a] <- 0
#                        break
#                    }
#                }
#            }
#        }
#        if (all(cpdag == old_cpdag))
#            break
#    }
#    as(cpdag,"graphNEL")
#}

## dag2cpdag <- function(dag) {
## Purpose: Compute the (unique) completed partially directed graph (CPDAG)
## that corresponds to the input DAG; result is a graph object
## ----------------------------------------------------------------------
## Arguments:
## - dag: input DAG (graph object)
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 31 Oct 2006, 15:30

##  p <- numNodes(dag)
##  ## transform DAG to adjacency matrix if any edges are present
##  if (numEdges(dag)==0) {
##    cpdag.res <- dag
##  } else {
##    dag <- as(dag,"matrix")
##    dag[dag!=0] <- 1

##    ## dag is adjacency matrix
##    e.df <- labelEdges(dag)
##    cpdag <- matrix(0, p,p)
##    for (i in seq_len(nrow(e.df))) {
##      if (e.df\$label[i]) {
##      } else {
##      }
##    }
##    rownames(cpdag) <- colnames(cpdag) <- as.character(seq(1,p))
##    cpdag.res <- as(cpdag,"graphNEL")
##  }
##  cpdag.res
##}

## make.edge.df <- function(amat) {
## Purpose: Generate a data frame describing some properties of a DAG
## (for extending to a CPDAG)
## The output contains xmin,xmax,head,tail,order (NA or number),
## type (1="d",0="u") in lexikographic order
## ----------------------------------------------------------------------
## Arguments:
## - amat: Adjacency matrix of DAG [x_ij=1 means i->j]
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 31 Oct 2006, 15:43

##  stopifnot(sum(amat)>0)
##  e <- which(amat==1,arr.ind=TRUE)
##  e.dup <- duplicated(t(apply(e,1,sort)))
##  nmb.edges <- sum(!e.dup)
##  res <- data.frame(xmin=rep(NA,nmb.edges),xmax=rep(NA,nmb.edges),
##                    order=rep(NA,nmb.edges),type=rep(1,nmb.edges))
##  pure.edges <- e[!e.dup,]
##  if(length(pure.edges)==2) dim(pure.edges) <- c(1,2)
##  for (i in seq_len(nrow(pure.edges))) {
##    if (all(amat[pure.edges[i,1],pure.edges[i,2]]==
##            amat[pure.edges[i,2],pure.edges[i,1]])) {
##      res\$type[i] <- 0
##      res\$tail[i] <- NA
##    } else {
##      res\$tail[i] <- pure.edges[i,1]
##    }
##  }
##  s.pure.edges <- t(apply(pure.edges,1,sort))
##  ii <- order(s.pure.edges[,1],s.pure.edges[,2])
##  res <- res[ii,]
##  res\$xmin <- s.pure.edges[ii,1]
##  res\$xmax <- s.pure.edges[ii,2]
##  res
##}

## orderEdges <- function(amat) {
## Purpose: Order the edges of a DAG according to Chickering
## (for extension to CPDAG)
## ----------------------------------------------------------------------
## Arguments:
## - amat: Adjacency matrix of DAG
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 31 Oct 2006, 15:42

##  stopifnot(isAcyclic(amat))
##  ordered.nodes <- topOrder(amat) ## parents before children
##  edge.df <- make.edge.df(amat)

##  eOrder <- 0
##  while(any(unOrdered <- is.na(edge.df\$order))) {
##    counter <- 0
## find y
##    y <- NA
##    found <- FALSE
##    while(!found) {
##      counter <- counter+1
##      node <- ordered.nodes[counter]
## which edges are incident to node?
##      nbr.nodes <- which(amat[,node]==1)
##      if(length(nbr.nodes)>0) {
##        unlabeled <- rep.int(FALSE, length(nbr.nodes))
##        for(i in seq_along(nbr.nodes)) {
##          x <- nbr.nodes[i]
## is edge edge x-y unlabeled?
##	  unlabeled[i] <- length(intersect(which(edge.df\$xmin==min(node,x) &
##						 edge.df\$xmax==max(node,x)),
##					   which(unOrdered))) > 0
##        }
## choose unlabeled edge with highest order node
##        if(any(unlabeled)) {
##          nbr.unlab <- nbr.nodes[unlabeled] # nbrnodes w. unlabeled edges
##          tmp <- ordered.nodes[ordered.nodes %in% nbr.unlab]
##          y <- tmp[length(tmp)]
## y <- last(ordered.nodes[which(ordered.nodes %in% nbr.unlab)])
##          edge.df\$order[edge.df\$xmin==min(node,y) &
##                        edge.df\$xmax==max(node,y)] <- eOrder
##          eOrder <- eOrder+1
##          found <- TRUE
##        }
##      }

##    } ## while !found

##  } ## while any(unOrdered)
##  edge.df
##}

## labelEdges <- function(amat) {
## Purpose: Label the edges in a DAG with "compelled" and "reversible"
## (for extension to a CPDAG)
## ----------------------------------------------------------------------
## Arguments:
## - amat: Adjacency matrix of DAG
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 31 Oct 2006;  prettified: MMaechler

## label=TRUE -> compelled
## label=FALSE -> reversible
##  edge.df <- orderEdges(amat)
##  lab <- rep(NA,dim(edge.df)[1])
##  edge.df <- edge.df[order(edge.df\$order),]
##  Tail <- edge.df\$tail

##  while(any(ina <- is.na(lab))) {
##    x.y <- which(ina)[1]
##    x <- Tail[x.y]
##    e1 <- which(Head == x & lab)
##    for(ee in e1) {
##      w <- Tail[ee]
##      if (any(wt.yh <- w == Tail & y.is.head))
##        lab[wt.yh] <- TRUE
##      else {
##        break
##      }
##    }
## edges going to y not starting from x
##    cand <- which(y.is.head  &  Tail != x)
##    if (length(cand) > 0) {
##      valid.cand <- rep(FALSE,length(cand))
##      for (iz in seq_along(cand)) {
##        z <- Tail[cand[iz]]
##        if (!any(Tail==z & Head==x)) ## NOT.parent.of.x :
##          valid.cand[iz] <- TRUE
##      }
##      cand <- cand[valid.cand]
##    }
##    lab[which(y.is.head & is.na(lab))] <- (length(cand) > 0)
##  }
##  edge.df\$label <- lab
##  edge.df
##}

##################################################
## pdag2dag
##################################################
find.sink <- function(gm) {
## Purpose: Find sink of an adj matrix; return numeric(0) if there is none;
## a sink may have incident undirected edges, but no directed ones
## ----------------------------------------------------------------------
## Arguments:
## - gm: Adjacency matrix (gm_i_j is edge from j to i)
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 31 Oct 2006;  speedup: Martin Maechler, Dec.2013

## treat undirected edges
gm[gm == t(gm) & gm == 1] <- 0
## treat directed edges
which(colSums(gm) == 0)
}

## Purpose:  Return "TRUE", if:
## For every vertex y, adj to x, with (x,y) undirected, y is adjacent to
## all the other vertices which are adjacent to x
## ----------------------------------------------------------------------
## Arguments:
## - gm: adjacency matrix of graph
## - x: node number (number)
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 31 Oct 2006;
## several smart speedups: Martin Maechler, Dec.2013

gm.1 <- (gm == 1)
xr <- gm.1[x,]
xc <- gm.1[,x]
nx <- which(xr | xc)
## undirected neighbors of x
un <- which(xr & xc)
for(y in un) {
adj.y <- setdiff(which(gm.1[y,] | gm.1[,y]), x)
return(FALSE)
}
TRUE
}

amat2dag <- function(amat) {
## Purpose: Transform the adjacency matrix of an PDAG to the adjacency
## matrix of a SOME DAG in the equiv. class
## Used in pdag2dag if extension is not possible
## ----------------------------------------------------------------------
## Arguments:
## - amat: adjacency matrix; x -> y if amat[x,y]=1,amat[y,x]=0
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 31 Oct 2006, 15:23

p <- dim(amat)[1]
## amat to skel
skel <- amat+t(amat)
skel[which(skel > 1)] <- 1

## permute skel
ord <- sample.int(p)
skel <- skel[ord,ord]

## skel to dag
for (i in 2:p) {
for (j in 1:(i-1)) {
if(skel[i,j] == 1) skel[i,j] <- 0
}
}
## inverse permutation
i.ord <- order(ord)
skel[i.ord,i.ord]
}

##################################################
## udag2pdag
##################################################
udag2pdag <- function(gInput, verbose = FALSE) {
## Purpose: Transform the Skeleton of a pcAlgo-object to a PDAG using
## the rules of Pearl. The output is again a pcAlgo-object.
## ----------------------------------------------------------------------
## Arguments:
## - gInput: pcAlgo object
## - verbose: 0 - no output, 1 - detailed output
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: Sep 2006, 15:03

res <- gInput
if (numEdges(gInput@graph) > 0) {
g <- as(gInput@graph,"matrix") ## g_ij if i->j
p <- as.numeric(dim(g)[1])
pdag <- g
ind <- which(g == 1,arr.ind = TRUE)

## Create minimal pattern
for (i in seq_len(nrow(ind))) {
x <- ind[i,1]
y <- ind[i,2]
allZ <- setdiff(which(g[y,] == 1),x) ## x-y-z
for (z in allZ) {
if (g[x,z] == 0  &&
!(y %in% gInput@sepset[[x]][[z]] ||
y %in% gInput@sepset[[z]][[x]])) {
if (verbose) {
cat("\n",x,"->",y,"<-",z,"\n")
cat("Sxz=",gInput@sepset[[z]][[x]],"Szx=",gInput@sepset[[x]][[z]])
}
pdag[x,y] <- pdag[z,y] <- 1
pdag[y,x] <- pdag[y,z] <- 0
}
}
}

## Test whether this pdag allows a consistent extension
res2 <- pdag2dag(as(pdag,"graphNEL"))

if (res2\$success) {
## Convert to complete pattern: use rules by Pearl
old_pdag <- matrix(0, p,p)
while (!all(old_pdag == pdag)) {
old_pdag <- pdag
## rule 1
ind <- which((pdag == 1 & t(pdag) == 0), arr.ind = TRUE) ## a -> b
for (i in seq_len(nrow(ind))) {
a <- ind[i,1]
b <- ind[i,2]
indC <- which( (pdag[b,] == 1 & pdag[,b] == 1) & (pdag[a,] == 0 & pdag[,a] == 0))
if (length(indC) > 0) {
pdag[b,indC] <- 1
pdag[indC,b] <- 0
if (verbose)
cat("\nRule 1:",a,"->",b," and ",b,"-",indC,
" where ",a," and ",indC," not connected: ",b,"->",indC,"\n")
}
}
## x11()
## plot(as(pdag,"graphNEL"), main="After Rule1")

## rule 2
ind <- which((pdag == 1 & t(pdag) == 1), arr.ind = TRUE) ## a -> b
for (i in seq_len(nrow(ind))) {
a <- ind[i,1]
b <- ind[i,2]
indC <- which( (pdag[a,] == 1 & pdag[,a] == 0) & (pdag[,b] == 1 & pdag[b,] == 0))
if (length(indC) > 0) {
pdag[a,b] <- 1
pdag[b,a] <- 0
if (verbose) cat("\nRule 2: Kette ",a,"->",indC,"->",
b,":",a,"->",b,"\n")
}
}
## x11()
## plot(as(pdag,"graphNEL"), main="After Rule2")

## rule 3
ind <- which((pdag == 1 & t(pdag) == 1), arr.ind = TRUE) ## a - b
for (i in seq_len(nrow(ind))) {
a <- ind[i,1]
b <- ind[i,2]
indC <- which( (pdag[a,] == 1 & pdag[,a] == 1) & (pdag[,b] == 1 & pdag[b,] == 0))
if (length(indC) >= 2) {
## cat("R3: indC = ",indC,"\n")
g2 <- pdag[indC,indC]
## print(g2)
if (length(g2) <= 1) {
g2 <- 0
} else {
diag(g2) <- rep(1,length(indC)) ## no self reference
}
if (any(g2 == 0)) { ## if two nodes in g2 are not connected
pdag[a,b] <- 1
pdag[b,a] <- 0
if (verbose) cat("\nRule 3:",a,"->",b,"\n")
}
}
}
## x11()
## plot(as(pdag,"graphNEL"), main="After Rule3")

## rule 4
##-         ind <- which((pdag==1 & t(pdag)==1), arr.ind=TRUE) ## a - b
##-         if (length(ind)>0) {
##-           for (i in seq_len(nrow(ind))) {
##-             a <- ind[i,1]
##-             b <- ind[i,2]
##-             indC <- which( (pdag[a,]==1 & pdag[,a]==1) & (pdag[,b]==0 & pdag[b,]==0))
##-             l.indC <- length(indC)
##-             if (l.indC>0) {
##-               found <- FALSE
##-               ic <- 0
##-               while(!found & (ic < l.indC)) {
##-                 ic <- ic + 1
##-                 c <- indC[ic]
##-                 indD <- which( (pdag[c,]==1 & pdag[,c]==0) & (pdag[,b]==1 & pdag[b,]==0))
##-                 if (length(indD)>0) {
##-                   found <- TRUE
##-                   pdag[b,a] = 0
##-                   if (verbose) cat("Rule 4 applied \n")
##-                 }
##-               }
##-             }
##-           }
##-         }

}
res@graph <- as(pdag,"graphNEL")
} else {
## was not extendable; random DAG chosen
res@graph <- res2\$graph
## convert to CPDAG
res@graph <- dag2cpdag(res@graph)
}
}
return(res)
} ## udag2pdag

shd <- function(g1,g2)
{
## Purpose: Compute Structural Hamming Distance between graphs g1 and g2
## ----------------------------------------------------------------------
## Arguments:
## - g1, g2: Input graphs
## (graph objects;connectivity matrix where m[x,y]=1 iff x->1
## and m[x,y]=m[y,x]=1 iff x-y; pcAlgo-objects)
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date:  1 Dec 2006, 17:21

## Idea: Transform g1 into g2
## Transform g1 and g2 into adjacency matrices
if (is(g1, "pcAlgo")) g1 <- g1@graph
if (is(g2, "pcAlgo")) g2 <- g2@graph

if (is(g1, "graphNEL")) {
m1 <- wgtMatrix(g1, transpose = FALSE)
m1[m1 != 0] <- 1
}
if (is(g2, "graphNEL")) {
m2 <- wgtMatrix(g2, transpose = FALSE)
m2[m2 != 0] <- 1
}

shd <- 0
## Remove superfluous edges from g1
s1 <- m1 + t(m1)
s2 <- m2 + t(m2)
s1[s1 == 2] <- 1
s2[s2 == 2] <- 1
ds <- s1 - s2
ind <- which(ds > 0)
m1[ind] <- 0
shd <- shd + length(ind)/2
## Add missing edges to g1
ind <- which(ds < 0)
m1[ind] <- m2[ind]
shd <- shd + length(ind)/2
## Compare Orientation
d <- abs(m1-m2)
## return
shd + sum((d + t(d)) > 0)/2
}

################################################################################
## New in V8 ; uses  vcd  package
################################################################################
ci.test <- function(x,y, S = NULL, dm.df) {
stopifnot(is.data.frame(dm.df), ncol(dm.df) > 1)
tab <- table(dm.df[,c(x,y,S)])
if (any(dim(tab) < 2))
1
else if (length(S) == 0)
fisher.test(tab, simulate.p.value = TRUE)\$p.value
else
vcd::coindep_test(tab,3:(length(S)+2))\$p.value
}

flipEdges <- function(amat,ind) {
res <- amat
for (i in seq_len(nrow(ind))) {
x <- ind[i,]
res[x[1],x[2]] <- amat[x[2],x[1]]
res[x[2],x[1]] <- amat[x[1],x[2]]
}
res
}

pdag2dag <- function(g, keepVstruct = TRUE) {
## Purpose: Generate a consistent extension of a PDAG to a DAG; if this
## is not possible, a random extension of the skeleton is returned and
## a warning is issued.
## ----------------------------------------------------------------------
## Arguments:
## - g: PDAG (graph object)
## - keepVstruct: TRUE - vStructures are kept
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: Sep 2006; tweaks: Martin M

not.yet <- FALSE
if (numEdges(g) == 0) {
graph <- g
} else {
gm <- wgtMatrix(g) ## gm_i_j is edge from j to i
storage.mode(gm) <- "integer"
gm[which(gm > 0 & gm != 1)] <- 1L

a <- gm2 <- gm
cn2 <- colnames(gm2)
go.on <- TRUE
while(go.on && length(a) > 1 && sum(a) > 0) {
not.yet <- TRUE
sinks <- find.sink(a)
if (length(sinks) > 0) {
counter <- 1L
while(not.yet && counter <= length(sinks)) {
x <- sinks[counter]
not.yet <- FALSE
## orient edges
inc.to.x <- which(a[,x] == 1L & a[x,] == 1L) ## undirected
if (length(inc.to.x) > 0) {
## map var.names to col pos in orig adj matrix
## bug: real.inc.to.x <- as.numeric(rownames(a)[inc.to.x])
real.inc.to.x <- which(cn2 %in% rownames(a)[inc.to.x])
real.x        <- which(cn2 %in% rownames(a)[x])
gm2[real.x, real.inc.to.x] <- 1L
gm2[real.inc.to.x, real.x] <- 0L
}
## remove x and all edges connected to it
a <- a[-x,-x]
}
counter <- counter + 1L
}
}
go.on <- !not.yet
}## { while }

graph <- if (not.yet) {
## warning("PDAG not extendible: Random DAG on skeleton drawn")
as(amat2dag(gm), "graphNEL")
} else ## success :
as(t(gm2), "graphNEL")
}
list(graph = graph, success = !not.yet)
}

udag2pdagSpecial <- function(gInput, verbose = FALSE, n.max = 100) {
## Purpose: Transform the Skeleton of a pcAlgo-object to a PDAG using
## the rules of Pearl. The output is again a pcAlgo-object. Ambiguous
## v-structures are reoriented until extendable or max number of tries
## is reached. If still not extendable, a DAG is produced starting from the
## current PDAG even if introducing new v-structures.
##
## ----------------------------------------------------------------------
## Arguments:
## - gInput: pcAlgo object
## - verbose: 0 - no output, 1 - detailed output
## - n.max: Maximal number of tries to reorient v-strucutres
## ----------------------------------------------------------------------
## Values:
## - pcObj: Oriented pc-Object
## - evisit: Matrix counting the number of orientation attemps per edge
## - xtbl.orig: Is original graph with v-structure extendable
## - xtbl: Is final graph with v-structure extendable
## - amat0: Adj.matrix of original graph with v-structures
## - amat1: Adj.matrix of graph with v-structures after reorienting
##          edges from double edge visits
## - status:
##   0: original try is extendable
##   1: reorienting double edge visits helps
##   2: orig. try is not extendable; reorienting double visits don't help;
##      result is acyclic, has orig. v-structures, but perhaps
## - counter: Number of reorientation tries until success or max.tries
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: Sep 2006, 15:03
counter <- 0
res <- gInput
status <- 0
p <- length(nodes(res@graph))
evisit <- amat0 <- amat1 <- matrix(0,p,p)
xtbl <- xtbl.orig <- TRUE
if (numEdges(gInput@graph) > 0) {
g <- as(gInput@graph,"matrix") ## g_ij if i->j
p <- dim(g)[1]
pdag <- g
ind <- which(g == 1,arr.ind = TRUE)
## ind <- unique(t(apply(ind,1,sort)))

## Create minimal pattern
for (i in seq_len(nrow(ind))) {
x <- ind[i,1]
y <- ind[i,2]
allZ <- setdiff(which(g[y,] == 1),x) ## x-y-z
for(z in allZ) {
if ((g[x,z] == 0) &&
!(y %in% gInput@sepset[[x]][[z]] ||
y %in% gInput@sepset[[z]][[x]])) {
if (verbose) {
cat("\n",x,"->",y,"<-",z,"\n")
cat("Sxz=",gInput@sepset[[z]][[x]],"Szx=",gInput@sepset[[x]][[z]])
}
## check if already in other direction directed
if (pdag[x,y] == 0 && pdag[y,x] == 1) {
evisit[x,y] <- evisit[x,y] + 1
evisit[y,x] <- evisit[y,x] + 1
}
if (pdag[z,y] == 0 && pdag[y,z] == 1) {
evisit[z,y] <- evisit[z,y] + 1
evisit[y,z] <- evisit[y,z] + 1
}
pdag[x,y] <- pdag[z,y] <- 1
pdag[y,x] <- pdag[y,z] <- 0
} ## if
} ## for
} ## for ( i )

amat0 <- pdag
## Test whether this pdag allows a consistent extension
res2 <- pdag2dag(as(pdag,"graphNEL"))
xtbl <- res2\$success
xtbl.orig <- xtbl

if (!xtbl && (max(evisit) > 0)) {
tmp.ind2 <- unique(which(evisit > 0,arr.ind = TRUE))
ind2 <- unique(t(apply(tmp.ind2,1,sort)))
## print(ind2)
n <- nrow(ind2)
n.max <- min(2^n-1,n.max)
counter <- 0
## xtbl is FALSE because of if condition
while((counter < n.max) & !xtbl) {
## if (counter%%100 == 0) cat("\n counter=",counter,"\n")
counter <- counter + 1
dgBase <- digitsBase(counter)
dgBase <- dgBase[length(dgBase):1]
## print(dgBase)
indBase <- matrix(0,1,n)
indBase[1,seq_along(dgBase)] <- dgBase
## indTmp <- ind2[ss[[counter]],,drop=FALSE]
indTmp <- ind2[(indBase == 1),,drop = FALSE]
## print(indTmp)
pdagTmp <- flipEdges(pdag,indTmp)
resTmp <- pdag2dag(as(pdagTmp,"graphNEL"))
xtbl <- resTmp\$success
}
pdag <- pdagTmp
status <- 1
}
amat1 <- pdag

if (xtbl) {
## Convert to complete pattern: use rules by Pearl
old_pdag <- matrix(0, p,p)
while (any(old_pdag != pdag)) {
old_pdag <- pdag
## rule 1
ind <- which((pdag == 1 & t(pdag) == 0), arr.ind = TRUE) ## a -> b
for (i in seq_len(nrow(ind))) {
a <- ind[i,1]
b <- ind[i,2]
indC <- which( (pdag[b,] == 1 & pdag[,b] == 1) & (pdag[a,] == 0 & pdag[,a] == 0))
if (length(indC) > 0) {
pdag[b,indC] <- 1
pdag[indC,b] <- 0
if (verbose)
cat("\nRule 1:",a,"->",b," and ",b,"-",indC,
" where ",a," and ",indC," not connected: ",b,"->",indC,"\n")
}
}
## x11()
## plot(as(pdag,"graphNEL"), main="After Rule1")

## rule 2
ind <- which((pdag == 1 & t(pdag) == 1), arr.ind = TRUE) ## a -> b
for (i in seq_len(nrow(ind))) {
a <- ind[i,1]
b <- ind[i,2]
indC <- which( (pdag[a,] == 1 & pdag[,a] == 0) & (pdag[,b] == 1 & pdag[b,] == 0))
if (length(indC) > 0) {
pdag[a,b] <- 1
pdag[b,a] <- 0
if (verbose) cat("\nRule 2: Kette ",a,"->",indC,"->",
b,":",a,"->",b,"\n")
}
}
## x11()
## plot(as(pdag,"graphNEL"), main="After Rule2")

## rule 3
ind <- which((pdag == 1 & t(pdag) == 1), arr.ind = TRUE) ## a - b
for (i in seq_len(nrow(ind))) {
a <- ind[i,1]
b <- ind[i,2]
indC <- which( (pdag[a,] == 1 & pdag[,a] == 1) & (pdag[,b] == 1 & pdag[b,] == 0))
if (length(indC) >= 2) {
## cat("R3: indC = ",indC,"\n")
g2 <- pdag[indC,indC]
## print(g2)
if (length(g2) <= 1) {
g2 <- 0
} else {
diag(g2) <- rep(1,length(indC)) ## no self reference
}
if (any(g2 == 0)) { ## if two nodes in g2 are not connected
pdag[a,b] <- 1
pdag[b,a] <- 0
if (verbose) cat("\nRule 3:",a,"->",b,"\n")
}
}
}
}
res@graph <- as(pdag,"graphNEL")
} else {
res@graph <- dag2cpdag(pdag2dag(as(pdag,"graphNEL"),keepVstruct = FALSE)\$graph)
status <- 2
## [email protected] <- res2\$graph
}
}
list(pcObj = res, evisit = evisit, xtbl = xtbl, xtbl.orig = xtbl.orig,
amat0 = amat0, amat1 = amat1, status = status, counter = counter)
}

udag2pdagRelaxed <- function(gInput, verbose = FALSE, unfVect = NULL, solve.confl = FALSE, orientCollider = TRUE, rules = rep(TRUE, 3))
{

##################################################
## Internal functions
##################################################

## replace 'else if' branch in 'if( !solve.confl )' statement
orientConflictCollider <- function(pdag, x, y, z) { ## x - y - z
## pdag: amat, pdag[x,y] = 1 and pdag[y,x] = 0 means x -> y
## x,y,z: colnumber of nodes in pdag
## only used if conflicts should be solved

## orient x - y
if (pdag[x,y] == 1) {
## x --- y, x --> y => x --> y
pdag[y,x] <- 0
} else {
## x <-- y, x <-> y => x <-> y
pdag[x,y] <- pdag[y,x] <- 2
}

## orient z - y
if (pdag[z,y] == 1) {
## z --- y, z --> y => z --> y
pdag[y,z] <- 0
} else {
## z <-- y, z <-> y => z <-> y
pdag[z,y] <- pdag[y,z] <- 2
}

pdag
}

## TODO: include correct VERBOSE statements
rule1 <- function(pdag, solve.confl = FALSE, unfVect = NULL) {
## Rule 1: a -> b - c goes to a -> b -> c
## Interpretation: No new collider is introduced
## Out: Updated pdag
search.pdag <- pdag
ind <- which(pdag == 1 & t(pdag) == 0, arr.ind = TRUE)
for (i in seq_len(nrow(ind))) {
a <- ind[i, 1]
b <- ind[i, 2]
## find all undirected neighbours of b not adjacent to a
isC <- which(search.pdag[b, ] == 1 & search.pdag[, b] == 1 &
search.pdag[a, ] == 0 & search.pdag[, a] == 0)
if (length(isC) > 0) {
for (ii in seq_along(isC)) {
c <- isC[ii]
## if the edge between b and c has not been oriented previously,
## orient it using normal R1
if (!solve.confl | (pdag[b,c] == 1 & pdag[c,b] == 1) ) { ## no conflict
## !! before, we checked search.pdag, not pdag !!
if (!is.null(unfVect)) { ## deal with unfaithful triples
if (!any(unfVect == triple2numb(p, a, b, c), na.rm = TRUE) &&
!any(unfVect == triple2numb(p, c, b, a), na.rm = TRUE)) {
## if unfaithful triple, don't orient
pdag[b, c] <- 1
pdag[c, b] <- 0
}
} else {
## don't care about unfaithful triples -> just orient
pdag[b, c] <- 1
pdag[c, b] <- 0
## cat("Rule 1\n")
}
if (verbose)
cat("\nRule 1':", a, "->", b, " and ",
b, "-", c, " where ", a, " and ", c,
" not connected and ", a, b, c, " faithful triple: ",
b, "->", c, "\n")
} else if (pdag[b,c] == 0 & pdag[c,b] == 1) {
## conflict that must be solved
## solve conflict: if the edge is b <- c because of a previous
## orientation within for loop then output <->
if (!is.null(unfVect)) { ## deal with unfaithful triples
if (!any(unfVect == triple2numb(p, a, b, c), na.rm = TRUE) &&
!any(unfVect == triple2numb(p, c, b, a), na.rm = TRUE)) {
pdag[b, c] <- 2
pdag[c, b] <- 2
if (verbose)
cat("\nRule 1':", a, "->", b, "<-",
c, " but ", b, "->", c, "also possible and",
a, b, c, " faithful triple: ", a,"->", b, "<->", c,"\n")
}
} else {
## don't care about unfaithful triples -> just orient
pdag[b, c] <- 2
pdag[c, b] <- 2
if (verbose)
cat("\nRule 1':", a, "->", b, "<-",
c, " but ", b, "->", c, "also possible and",
a, b, c, " faithful triple: ", a,"->", b, "<->", c,"\n")
} ## unfVect: if else
} ## conflict: if else
} ## for c
} ## if length(isC)
if (!solve.confl) search.pdag <- pdag
} ## for ind
pdag
}

rule2 <- function(pdag, solve.confl = FALSE) {
## Rule 2: a -> c -> b with a - b: a -> b
## Interpretation: Avoid cycle
## normal version = conservative version
search.pdag <- pdag
ind <- which(search.pdag == 1 & t(search.pdag) == 1, arr.ind = TRUE)
for (i in seq_len(nrow(ind))) {
a <- ind[i, 1]
b <- ind[i, 2]
isC <- which(search.pdag[a, ] == 1 & search.pdag[, a] == 0 &
search.pdag[, b] == 1 & search.pdag[b, ] == 0)
for (ii in seq_along(isC)) {
c <- isC[ii]
## if the edge has not been oriented yet, orient it with R2
## always do this if you don't care about conflicts
if (!solve.confl | (pdag[a, b] == 1 & pdag[b, a] == 1) ) {
pdag[a, b] <- 1
pdag[b, a] <- 0
if (verbose)
cat("\nRule 2: Chain ", a, "->", c,
"->", b, ":", a, "->", b, "\n")
}
## else if the edge has been oriented as a <- b by a previous R2
else if (pdag[a, b] == 0 & pdag[b, a] == 1) {
pdag[a, b] <- 2
pdag[b, a] <- 2
if (verbose)
cat("\nRule 2: Chain ", a, "->", c,
"->", b, ":", a, "<->", b, "\n")
}
}
if (!solve.confl) search.pdag <- pdag
}
pdag
}

rule3 <- function(pdag, solve.confl = FALSE, unfVect = NULL) {
## Rule 3: a-b, a-c1, a-c2, c1->b, c2->b but c1 and c2 not connected;
## then a-b => a -> b
search.pdag <- pdag
ind <- which(search.pdag == 1 & t(search.pdag) == 1, arr.ind = TRUE)
for (i in seq_len(nrow(ind))) {
a <- ind[i, 1]
b <- ind[i, 2]
c <- which(search.pdag[a, ] == 1 & search.pdag[, a] == 1 &
search.pdag[, b] == 1 & search.pdag[b, ] == 0)
if (length(c) >= 2) {
cmb.C <- combn(c, 2)
cC1 <- cmb.C[1, ]
cC2 <- cmb.C[2, ]
for (j in seq_along(cC1)) {
c1 <- cC1[j]
c2 <- cC2[j]
if (search.pdag[c1, c2] == 0 && search.pdag[c2,c1] == 0) {
if (!is.null(unfVect)) {
if (!any(unfVect == triple2numb(p, c1, a, c2), na.rm = TRUE) &&
!any(unfVect == triple2numb(p, c2, a, c1), na.rm = TRUE)) {
## if the edge has not been oriented yet, orient it with R3
if (!solve.confl | (pdag[a, b] == 1 & pdag[b, a] == 1) ) {
pdag[a, b] <- 1
pdag[b, a] <- 0
if (!solve.confl) search.pdag <- pdag
if (verbose)
cat("\nRule 3':", a, c1, c2, "faithful triple: ",
a, "->", b, "\n")
break
}
## else if: we care about conflicts and  the edge has been oriented as a <- b by a previous R3
else if (pdag[a, b] == 0 & pdag[b, a] == 1) {
pdag[a, b] <- pdag[b, a] <- 2
if (verbose)
cat("\nRule 3':", a, c1, c2, "faithful triple: ",
a, "<->", b, "\n")
break
} ## if solve conflict
} ## if unf. triple found
} else { ## if care about unf. triples; else don't care
if (!solve.confl | (pdag[a, b] == 1 & pdag[b, a] == 1) ) {
pdag[a, b] <- 1
pdag[b, a] <- 0
if (!solve.confl) search.pdag <- pdag
if (verbose)
cat("\nRule 3':", a, c1, c2, "faithful triple: ",
a, "->", b, "\n")
break
}
## else if: we care about conflicts and  the edge has been oriented as a <- b by a previous R3
else if (pdag[a, b] == 0 & pdag[b, a] == 1) {
pdag[a, b] <- pdag[b, a] <- 2
if (verbose)
cat("\nRule 3':", a, c1, c2, "faithful triple: ",
a, "<->", b, "\n")
break
} ## if solve conflict
} ## if care about unf. triples
} ## if c1 and c2 are not adjecent
} ## for all pairs of c's
} ## if at least two c's are found
} ## for all undirected edges
pdag
}
##################################################
## Main
##################################################

## prepare adjacency matrix of skeleton
if (numEdges(gInput@graph) == 0)
return(gInput)
g <- as(gInput@graph, "matrix")
p <- nrow(g)
pdag <- g

## orient collider
if (orientCollider) {
ind <- which(g == 1, arr.ind = TRUE)
for (i in seq_len(nrow(ind))) {
x <- ind[i, 1]
y <- ind[i, 2]
allZ <- setdiff(which(g[y, ] == 1), x) ## x - y - z
for (z in allZ) {
## check collider condition
if (g[x, z] == 0 &&
!((y %in% gInput@sepset[[x]][[z]]) ||
(y %in% gInput@sepset[[z]][[x]]))) {
if (length(unfVect) == 0) { ## no unfaithful triples
if (!solve.confl) { ## don't solve conflicts
pdag[x, y] <- pdag[z, y] <- 1
pdag[y, x] <- pdag[y, z] <- 0
} else { ## solve conflicts
pdag <- orientConflictCollider(pdag, x, y, z)
}
} else { ## unfaithful triples are present
if (!any(unfVect == triple2numb(p, x, y, z), na.rm = TRUE) &&
!any(unfVect == triple2numb(p, z, y, x), na.rm = TRUE)) {
if (!solve.confl) { ## don't solve conflicts
pdag[x, y] <- pdag[z, y] <- 1
pdag[y, x] <- pdag[y, z] <- 0
} else { ## solve conflicts
pdag <- orientConflictCollider(pdag, x, y, z)
}
}
}
}
} ## for z
} ## for i
} ## end: Orient collider

## Rules 1 - 3
repeat {
old_pdag <- pdag
if (rules[1]) {
pdag <- rule1(pdag, solve.confl = solve.confl, unfVect = unfVect)
}
if (rules[2]) {
pdag <- rule2(pdag, solve.confl = solve.confl)
}
if (rules[3]) {
pdag <- rule3(pdag, solve.confl = solve.confl, unfVect = unfVect)
}
if (all(pdag == old_pdag))
break
} ## repeat

gInput@graph <- as(pdag, "graphNEL")
gInput

}

##' @title
##' @param C covariance matrix
##' @param y column of response
##' @param x columns of expl. vars
##' @return
lm.cov <- function (C, y, x) {
solve(C[x, x], C[x, y, drop = FALSE])[1, ]
}

causalEffect <- function(g,y,x) {
## Compute true causal effect of x on y in g
wmat <- wgtMatrix(g)
p <- ncol(wmat)
vec <- matrix(0,p,1)
vec[x] <- 1
## compute and return  beta_{true} :
if(y-x > 1) {
for (i in (x+1):y) vec[i] <- wmat[i,]%*%vec
vec[y]
} else {
wmat[y,x]
}
}

has.new.coll <- function(amat,amatSkel, x, pa1, pa2.t, pa2.f) {
## Check if undirected edges that are pointed to x create a new v-structure
## Additionally check, if edges that are pointed away from x create
## new v-structure; i.e. x -> pa <- papa would be problematic
## pa1 are definit parents of x
## pa2 are undirected "parents" of x
## pa2.t are the nodes in pa2 that are directed towards pa2
## pa2.f are the nodes in pa2 that are directed away from pa2
## Value is TRUE, if new collider is introduced
res <- FALSE
if (length(pa2.t) > 0 && !all(is.na(pa2.t))) {
## check whether all pa1 and all pa2.t are connected;
## if no, there is a new collider
if (length(pa1) > 0 && !all(is.na(pa1))) {
res <- min(amatSkel[pa1, pa2.t]) == 0 ## TRUE if new collider
}
## in addition, all pa2.t have to be connected
if (!res && length(pa2.t) > 1) {
A2 <- amatSkel[pa2.t,pa2.t]
diag(A2) <- 1
res <- min(A2) == 0 ## TRUE if new collider
}
}
if (!res && length(pa2.f) > 0 && !all(is.na(pa2.f))) {
## consider here only the DIRECTED Parents of pa2.f
## remove undirected edges
A <- amat-t(amat)
A[A < 0] <- 0
## find parents of pa2.f
cA <- colSums(A[pa2.f,,drop = FALSE])
papa <- setdiff(which(cA != 0), x)
## if any node in papa is not directly connected to x, there is a new
## collider
if (length(papa) > 0)
res <- min(amatSkel[x,papa]) == 0 ## TRUE if new collider
}
res
}

pcAlgo.Perfect <- function(C, cutoff = 1e-8, corMethod = "standard", verbose = 0,
directed = FALSE, u2pd = "rand", psepset = FALSE) {
## Purpose: Perform PC-Algorithm, i.e., estimate skeleton of DAG given data
## Output is an unoriented graph object
## ----------------------------------------------------------------------
## Arguments:
## - C: True Correlation matrix
## - alpha: Significance level of individual partial correlation tests
## - corMethod: "standard" or "Qn" for standard or robust correlation
##              estimation
## - verbose: 0-no output, 1-small output, 2-details
## - u2pd: Function for converting udag to pdag
##   "rand": udag2pdag
##   "relaxed": udag2pdagRelaxed
##   "retry": udag2pdagSpecial
## - psepset: Also check possible sep sets.
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 26 Jan 2006; Martin Maechler
## Modification: Diego Colombo, Sept 2009
## backward compatibility
stopifnot((p <- nrow(C)) >= 2)
cl <- match.call()
seq_p <- seq_len(p)# 1:p
pcMin <- matrix(Inf, p,p)
diag(pcMin) <- 0
sepset <- pl <- vector("list",p)
for (i in seq_p) sepset[[i]] <- pl
n.edgetests <- numeric(1)# final length = max { ord}
## G := complete graph :
G <- matrix(TRUE, p,p)
diag(G) <- FALSE

done <- FALSE
ord <- 0L
while (!done && any(G)) {
n.edgetests[ord+1] <- 0
done <- TRUE
ind <- which(G, arr.ind = TRUE)
## For comparison with C++ sort according to first row
ind <- ind[order(ind[,1]), ]
remEdges <- nrow(ind)
if(verbose >= 1)
cat("Order=",ord,"; remaining edges:",remEdges,"\n", sep = '')
for (i in 1:remEdges) {
if(verbose && (verbose >= 2 || i%%100 == 0)) cat("|i=",i,"|iMax=",remEdges,"\n")
x <- ind[i,1]
y <- ind[i,2]
##      done <- !G[y,x] # i.e. (x,y) was not already deleted in its (y,x) "version"
##      if(!done) {
if (G[y,x]) {
nbrsBool <- G[,x]
nbrsBool[y] <- FALSE
nbrs <- seq_p[nbrsBool]
length_nbrs <- length(nbrs)
## if(verbose)

##        done <- length_nbrs < ord
##        if (!done) {
if (length_nbrs >= ord) {
if (length_nbrs > ord) done <- FALSE
S <- seq_len(ord)

## now includes special cases (ord == 0) or (length_nbrs == 1):
repeat { ## condition w.r.to all  nbrs[S] of size 'ord' :
##  if (condIndFisherZ(x,y, nbrs[S], C,n, cutoff,verbose)) {
## MM: want to use this: --- but it changes the result in some cases!!
##            cat("X=",x,"|Y=",y,"|ord=",ord,"|nbrs=",nbrs[S],"|iMax=",remEdges,"\n")
n.edgetests[ord+1] <- n.edgetests[ord+1]+1
pc.val <- pcorOrder(x,y,nbrs[S],C)
if (abs(pc.val) < pcMin[x,y]) pcMin[x,y] <- abs(pc.val)
if (verbose >= 2) cat(paste("x:",x,"y:",y,"S:"),nbrs[S],paste("pc:",pc.val,"\n"))
if (abs(pc.val) <= cutoff) {
##              ##  pnorm(abs(z), lower.tail = FALSE) is the p-value
G[x,y] <- G[y,x] <- FALSE
sepset[[x]][[y]] <- nbrs[S]
break
}
else {
nextSet <- getNextSet(length_nbrs, ord, S)
if(nextSet\$wasLast)
break
S <- nextSet\$nextSet
}
}

} } ## end if(!done)

} ## end for(i ..)
ord <- ord+1L
##    n.edgetests[ord] <- remEdges
}

if (psepset) {
amat <- G
ind <- which(G, arr.ind = TRUE)
storage.mode(amat) <- "integer" # (TRUE, FALSE) -->  (1, 0)
## Orient colliders
for (i in seq_len(nrow(ind))) {
x <- ind[i,1]
y <- ind[i,2]
allZ <- setdiff(which(amat[y,] == 1L),x) ## x-y-z

if (length(allZ) > 0) {
for (j in seq_along(allZ)) {
z <- allZ[j]
if ((amat[x,z] == 0L) && !((y %in% sepset[[x]][[z]]) | (y %in% sepset[[z]][[x]]))) {
if (verbose == 2) {
cat("\n",x,"*->",y,"<-*",z,"\n")
cat("Sxz=",sepset[[z]][[x]],"and","Szx=",sepset[[x]][[z]],"\n")
}

## x o-> y <-o z
amat[x,y] <- amat[z,y] <- 2L

} ## if
} ## for
} ## if
} ## for

## Compute poss. sepsets
for (x in 1:p) {
attr(x,'class') <- 'possibledsep'
if (any(amat[x,] != 0L)) {
tf1 <- setdiff(reach(x,-1,-1,amat),x)
for (y in seq_p[amat[x,] != 0L]) {
## tf = possible_d_sep(amat,x,y)
tf <- setdiff(tf1,y)
## test
if (length(tf) > 0) {
pc.val <- pcorOrder(x,y,tf,C)
if (abs(pc.val) < pcMin[x,y]) {
pcMin[x,y] <- abs(pc.val)
}
if (abs(pc.val) <= cutoff) {
## delete x-y
amat[x,y] <- amat[y,x] <- 0L
## save pos d-sepset in sepset
sepset[[x]][[y]] <- tf
if (verbose == 2) {
cat("Delete edge",x,"-",y,"\n")
}
}
if (verbose == 2) {
cat("Possible-D-Sep of", x, "and", y, "is", tf, " - pc = ",pc.val,"\n")
}
}
}
}
}

G[amat == 0L] <- FALSE
G[amat == 1L] <- TRUE
} ## end if(psepset)

if(verbose >= 1) { cat("Final graph adjacency matrix:\n"); print(symnum(G)) }

## transform matrix to graph object :
if (sum(G) == 0) {
Gobject <- new("graphNEL", nodes = as.character(seq_p))
}
else {
colnames(G) <- rownames(G) <- as.character(seq_p)
Gobject <- as(G,"graphNEL")
}

for (i in 1:(p-1)) {
for (j in 2:p) {
pcMin[i,j] <- pcMin[j,i] <- min(pcMin[i,j],pcMin[j,i])
}
}

res <- new("pcAlgo",
graph = Gobject,
call = cl, n = as.integer(1), max.ord = as.integer(ord-1),
n.edgetests = n.edgetests, sepset = sepset,
zMin = pcMin)

if (directed)
switch(u2pd,
"rand" = udag2pdag(res),
"retry" = udag2pdagSpecial(res)\$pcObj,
"relaxed" = udag2pdagRelaxed(res))
else
res
}## pcAlgo.Perfect

### reach(): currently only called from  pcAlgo() and pcAlgo.Perfect()
### -------  and only in "possibledsep" version
## Function that computes the Possible d-sepset, done by Spirtes
{
## reachable      set of vertices;
## edgeslist      array[1..maxvertex] of list of edges
## numvertex      integer
## labeled        array (by depth) of list of edges that have been labeled

makeedge <- function(x,y) list(list(x,y))

legal.pdsep <- function(r,s) {
## Modifying global 'edgeslist'
adjacency[s,     r[[2]]] == 2 && r[[1]] != s) ||
(adjacency[r[[1]],s] != 0 && r[[1]] != s)) {
edgeslist[[r[[2]]]] <<- setdiff(edgeslist[[r[[2]]]],s)
makeedge(r[[2]],s)
}
}

initialize.pdsep <- function(x,y) mapply(makeedge, x = x, y = y)

labeled <- list()
edgeslist <- list()
for (i in 1:numvertex)
labeled[[1]] <- initialize.pdsep(a, edgeslist[[a]])
edgeslist[[a]] <- list()
depth <- 2
repeat {
labeled[[depth]] <- list()
for (i in seq_along(labeled[[depth-1]])) {
lab.i <- labeled[[depth-1]][[i]]
edgestemp <- edgeslist[[lab.i[[2]]]]
if (length(edgestemp) == 0) break
for (j in seq_along(edgestemp))
labeled[[depth]] <- union(legal.pdsep(lab.i, edgestemp[[j]]),
labeled[[depth]])
}
if (length(labeled[[depth]]) == 0)
break
## else :
depth <- depth  + 1
}
unique(unlist(labeled))
}

skeleton <- function(suffStat, indepTest, alpha, labels, p,
method = c("stable", "original", "stable.fast"), m.max = Inf,
fixedGaps = NULL, fixedEdges = NULL,
NAdelete = TRUE, numCores = 1, verbose = FALSE)
{
## Purpose: Perform undirected part of PC-Algorithm, i.e.,
## estimate skeleton of DAG given data
## Order-independent version! NEU
## ----------------------------------------------------------------------
## Arguments:
## - suffStat: List containing all necessary elements for the conditional
##             independence decisions in the function "indepTest".
## - indepTest: predefined function for testing conditional independence
## - alpha: Significance level of individual partial correlation tests
## - fixedGaps: the adjacency matrix of the graph from which the algorithm
##      should start (logical); gaps fixed here are not changed
## - fixedEdges: Edges marked here are not changed (logical)
## - NAdelete: delete edge if pval=NA (for discrete data)
## - m.max: maximal size of conditioning set
## - numCores: number of cores to be used for calculation if
##   method = "stable.fast"
## ----------------------------------------------------------------------
## Value:
## - G, sepset, pMax, ord, n.edgetests
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 09.12.2009
## Modification: Diego Colombo; Martin Maechler; Alain Hauser

## x,y,S konstruieren
##-   tst <- try(indepTest(x,y,S, obj))
##-   if(inherits(tst, "try-error"))
##-     stop("the 'indepTest' function does not work correctly with 'obj'")

cl <- match.call()
if(!missing(p)) stopifnot(is.numeric(p), length(p <- as.integer(p)) == 1, p >= 2)
if(missing(labels)) {
if(missing(p)) stop("need to specify 'labels' or 'p'")
labels <- as.character(seq_len(p))
} else { ## use labels ==> p  from it
stopifnot(is.character(labels))
if(missing(p))
p <- length(labels)
else if(p != length(labels))
stop("'p' is not needed when 'labels' is specified, and must match length(labels)")
## Don't want message, in case this is called e.g. from fciPlus():
## else
##   message("No need to specify 'p', when 'labels' is given")
}
seq_p <- seq_len(p)
method <- match.arg(method)
## C++ version still has problems under Windows; will have to check why
##  if (method == "stable.fast" && .Platform\$OS.type == "windows") {
##    method <- "stable"
##    warning("Method 'stable.fast' is not available under Windows; using 'stable' instead.")
##  }

## G := !fixedGaps, i.e. G[i,j] is true  iff  i--j  will be investigated
if (is.null(fixedGaps)) {
G <- matrix(TRUE, nrow = p, ncol = p)
}
else if (!identical(dim(fixedGaps), c(p, p)))
stop("Dimensions of the dataset and fixedGaps do not agree.")
else if (!identical(fixedGaps, t(fixedGaps)) )
stop("fixedGaps must be symmetric")
else
G <- !fixedGaps

diag(G) <- FALSE

if (any(is.null(fixedEdges))) { ## MM: could be sparse
fixedEdges <- matrix(rep(FALSE, p * p), nrow = p, ncol = p)
}
else if (!identical(dim(fixedEdges), c(p, p)))
stop("Dimensions of the dataset and fixedEdges do not agree.")
else if (!identical(fixedEdges, t(fixedEdges)) )
stop("fixedEdges must be symmetric")

## Check number of cores
stopifnot((is.integer(numCores) || is.numeric(numCores)) && numCores > 0)
if (numCores > 1 && method != "stable.fast") {
warning("Argument numCores ignored: parallelization only available for method = 'stable.fast'")
}
if (method == "stable.fast") {
## Do calculation in C++...
if (identical(indepTest, gaussCItest))
indepTestName <- "gauss"
else
indepTestName <- "rfun"
options <- list(
verbose = as.integer(verbose),
m.max = as.integer(ifelse(is.infinite(m.max), p, m.max)),
numCores = numCores)
res <- .Call("estimateSkeleton", G, suffStat, indepTestName, indepTest, alpha, fixedEdges, options);
G <- res\$amat
## sepset <- res\$sepset
sepset <- lapply(seq_p, function(i) c(
lapply(res\$sepset[[i]], function(v) if(identical(v, as.integer(-1))) NULL else v),
vector("list", p - length(res\$sepset[[i]])))) # TODO change convention: make sepset triangular
pMax <- res\$pMax
n.edgetests <- res\$n.edgetests
ord <- length(n.edgetests) - 1L
}
else {
## Original R version

pval <- NULL
sepset <- lapply(seq_p, function(.) vector("list",p))# a list of lists [p x p]
## save maximal p value
pMax <- matrix(-Inf, nrow = p, ncol = p)
diag(pMax) <- 1
done <- FALSE
ord <- 0L
n.edgetests <- numeric(1)# final length = max { ord}
while (!done && any(G) && ord <= m.max) {
n.edgetests[ord1 <- ord+1L] <- 0
done <- TRUE
ind <- which(G, arr.ind = TRUE)
## For comparison with C++ sort according to first row
ind <- ind[order(ind[, 1]), ]
remEdges <- nrow(ind)
if (verbose)
cat("Order=", ord, "; remaining edges:", remEdges,"\n",sep = "")
if(method == "stable") {
## Order-independent version: Compute the adjacency sets for any vertex
## Then don't update when edges are deleted
G.l <- split(G, gl(p,p))
}
for (i in 1:remEdges) {
if(verbose && (verbose >= 2 || i%%100 == 0)) cat("|i=", i, "|iMax=", remEdges, "\n")
x <- ind[i, 1]
y <- ind[i, 2]
if (G[y, x] && !fixedEdges[y, x]) {
nbrsBool <- if(method == "stable") G.l[[x]] else G[,x]
nbrsBool[y] <- FALSE
nbrs <- seq_p[nbrsBool]
length_nbrs <- length(nbrs)
if (length_nbrs >= ord) {
if (length_nbrs > ord)
done <- FALSE
S <- seq_len(ord)
repeat { ## condition w.r.to all  nbrs[S] of size 'ord'
n.edgetests[ord1] <- n.edgetests[ord1] + 1
pval <- indepTest(x, y, nbrs[S], suffStat)
if (verbose)
cat("x=", x, " y=", y, " S=", nbrs[S], ": pval =", pval, "\n")
if(is.na(pval))
if (pMax[x, y] < pval)
pMax[x, y] <- pval
if(pval >= alpha) { # independent
G[x, y] <- G[y, x] <- FALSE
sepset[[x]][[y]] <- nbrs[S]
break
}
else {
nextSet <- getNextSet(length_nbrs, ord, S)
if (nextSet\$wasLast)
break
S <- nextSet\$nextSet
}
} ## repeat
}
}
}# for( i )
ord <- ord + 1L
} ## while()
for (i in 1:(p - 1)) {
for (j in 2:p)
pMax[i, j] <- pMax[j, i] <- max(pMax[i, j], pMax[j,i])
}
}

## transform matrix to graph object :
Gobject <-
if (sum(G) == 0) {
new("graphNEL", nodes = labels)
} else {
colnames(G) <- rownames(G) <- labels
as(G,"graphNEL")
}

## final object
new("pcAlgo", graph = Gobject, call = cl, n = integer(0),
max.ord = as.integer(ord - 1), n.edgetests = n.edgetests,
sepset = sepset, pMax = pMax, zMin = matrix(NA, 1, 1))
}## end{ skeleton }

pc <- function(suffStat, indepTest, alpha, labels, p,
fixedGaps = NULL, fixedEdges = NULL, NAdelete = TRUE, m.max = Inf,
u2pd = c("relaxed", "rand", "retry"),
skel.method = c("stable", "original", "stable.fast"),
conservative = FALSE, maj.rule = FALSE,
solve.confl = FALSE, numCores = 1, verbose = FALSE)
{
## Purpose: Perform PC-Algorithm, i.e., estimate skeleton of DAG given data
## ----------------------------------------------------------------------
## Arguments:
## - dm: Data matrix (rows: samples, cols: nodes)
## - C: correlation matrix (only for continuous)
## - n: sample size
## - alpha: Significance level of individual partial correlation tests
## - corMethod: "standard" or "Qn" for standard or robust correlation
##              estimation
## - G: the adjacency matrix of the graph from which the algorithm
##      should start (logical)
## - datatype: distinguish between discrete and continuous data
## - NAdelete: delete edge if pval=NA (for discrete data)
## - m.max: maximal size of conditioning set
## - u2pd: Function for converting udag to pdag
##   "rand": udag2pdag
##   "relaxed": udag2pdagRelaxed
##   "retry": udag2pdagSpecial
## - gTrue: Graph suffStatect of true DAG
## - conservative: If TRUE, conservative PC is done
## - numCores: handed to skeleton(), used for parallelization
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 26 Jan 2006; Martin Maechler
## Modifications: Sarah Gerster, Diego Colombo, Markus Kalisch

## Initial Checks
cl <- match.call()
if(!missing(p)) stopifnot(is.numeric(p), length(p <- as.integer(p)) == 1, p >= 2)
if(missing(labels)) {
if(missing(p)) stop("need to specify 'labels' or 'p'")
labels <- as.character(seq_len(p))
} else { ## use labels ==> p  from it
stopifnot(is.character(labels))
if(missing(p)) {
p <- length(labels)
} else if(p != length(labels))
stop("'p' is not needed when 'labels' is specified, and must match length(labels)")
else
message("No need to specify 'p', when 'labels' is given")
}

u2pd <- match.arg(u2pd)
skel.method <- match.arg(skel.method)
if(u2pd != "relaxed") {
if (conservative || maj.rule)
stop("Conservative PC and majority rule PC can only be run with 'u2pd = relaxed'")

if (solve.confl)
stop("Versions of PC using lists for the orientation rules (and possibly bi-directed edges)\n can only be run with 'u2pd = relaxed'")
}

if (conservative && maj.rule) stop("Choose either conservative PC or majority rule PC!")

## Skeleton
skel <- skeleton(suffStat, indepTest, alpha, labels = labels, method = skel.method,
fixedGaps = fixedGaps, fixedEdges = fixedEdges,
skel@call <- cl # so that makes it into result

## Orient edges
if (!conservative && !maj.rule) {
switch (u2pd,
"rand" = udag2pdag(skel),
"retry" = udag2pdagSpecial(skel)\$pcObj,
"relaxed" = udag2pdagRelaxed(skel, verbose = verbose, solve.confl = solve.confl))
}
else { ## u2pd "relaxed" : conservative _or_ maj.rule

## version.unf defined per default
## Tetrad CPC works with version.unf=c(2,1)
## see comment on pc.cons.intern for description of version.unf
pc. <- pc.cons.intern(skel, suffStat, indepTest, alpha,
version.unf = c(2,1), maj.rule = maj.rule, verbose = verbose)
udag2pdagRelaxed(pc.\$sk, verbose = verbose,
unfVect = pc.\$unfTripl, solve.confl = solve.confl)
}
} ## {pc}

gSquareBin <- function(x, y, S, dm, adaptDF = FALSE, n.min = 10*df,
verbose = FALSE)
{
## Purpose: G^2 statistic to test for (conditional) independence
##          of *binary* variables   X and Y given S  --> ../man/binCItest.Rd
## -------------------------------------------------------------------------
## Arguments:
## - x,y,S: Are x,y conditionally independent given S (S can be NULL)?
## - dm: data matrix (rows: samples, columns: variables) with binary entries
## - verbose: if TRUE, some additional info is outputted during the
##            computations
## - adaptDF: lower the degrees of freedom by one for each zero count.
##            The value for the DF cannot go below 1.
## -------------------------------------------------------------------------

stopifnot((n <- nrow(dm)) >= 1) # nr of samples
if(!all(as.logical(dm) == dm))
stop("'dm' must be binary, i.e. with values in {0,1}")

if(verbose) cat('Edge ',x,'--',y, ' with subset S =', S,'\n')

lenS <- length(S)
## degrees of freedom assuming no structural zeros
df <- 2^lenS

if (n < n.min) { ## not enough samples to perform the test:
warning(gettextf(
"n=%d is too small (n < n.min = %d ) for G^2 test (=> treated as independence)",
n, n.min), domain = NA)
return( 1 )
}
## else --  enough data to perform the test
d.x1 <- dm[,x] + 1L
d.y1 <- dm[,y] + 1L
if(lenS <= 5) { # bei gSquareDis lenS <= 4
n12 <- 1:2
switch(lenS+1L, {
## lenS == 0 ----------------------------------
nijk <- array(0L, c(2,2)) # really 'nij', but 'nijk' is "global" name
for (i in n12) {
d.x.i <- d.x1 == i
for (j in n12)
nijk[i,j] <- sum(d.x.i & d.y1 == j)
}
## marginal counts
## compute G^2
t.log <- n*(nijk / tcrossprod(rowSums(nijk), colSums(nijk)))
} ,

{ ## lenS == 1 ----------------------------------

## a.pos <- sort(c(x,y,S))
dmS.1 <- dm[,S] + 1L
nijk <- array(0L, c(2,2,2))
for(i in n12) {
d.x.i <- d.x1 == i
for(j in n12) {
d.x.i.y.j <- d.x.i & d.y1 == j
for(k in n12)
nijk[i,j,k] <- sum(d.x.i.y.j & dmS.1 == k)
}
}
alt <- c(x,y,S)
c <- which(alt == S)
nik <- apply(nijk,c,rowSums)
njk <- apply(nijk,c,colSums)
nk <- colSums(njk)

## compute G^2
t.log <- array(0,c(2,2,2))
if(c == 3) {
for (k in n12)
t.log[,,k] <- nijk[,,k]*(nk[k] / tcrossprod(nik[,k], njk[,k]))
} else if(c == 1) {
for (k in n12)
t.log[k,,] <- nijk[k,,]*(nk[k] / tcrossprod(nik[,k], njk[,k]))
} else { ## c == 2 (?)
for (k in n12)
t.log[,k,] <- nijk[,k,]*(nk[k] / tcrossprod(nik[,k], njk[,k]))
}
} ,

{ ## lenS == 2 ----------------------------------

## a.pos <- sort(c(x,y,S))
dmS1.1 <- dm[,S[1]] + 1L
dmS2.1 <- dm[,S[2]] + 1L
nijk2 <- array(0L, c(2,2,2,2))
for(i in n12) {
d.x.i <- d.x1 == i
for(j in n12) {
d.x.i.y.j <- d.x.i & d.y1 == j
for(k in n12) {
d.x.y.S1 <- d.x.i.y.j & dmS1.1 == k
for(l in n12)
nijk2[i,j,k,l] <- sum(d.x.y.S1 & dmS2.1 == l)
}
}
}

nijk <- array(0L, c(2,2,4))
for(i in n12) {
for(j in n12)
nijk[,,2*(i-1)+j] <- nijk2[,,i,j]
}

nik <- apply(nijk,3,rowSums)
njk <- apply(nijk,3,colSums)
nk <- colSums(njk)

## compute G^2
t.log <- array(0,c(2,2,4))
for (k in 1:4)
t.log[,,k] <- nijk[,,k]*(nk[k] / tcrossprod(nik[,k], njk[,k]))
} ,

{ ## lenS == 3 ----------------------------------
dmS1.1 <- dm[,S[1]] + 1L
dmS2.1 <- dm[,S[2]] + 1L
dmS3.1 <- dm[,S[3]] + 1L
nijk <- array(0L, c(2,2,8))
for(i1 in n12) {
d.x.i <- d.x1 == i1
for(i2 in n12) {
d.x.y.i12 <- d.x.i & d.y1 == i2
for(i3 in n12) {
d.x.y.S1 <- d.x.y.i12 & dmS1.1 == i3
for(i4 in n12) {
d.xy.S1S2 <- d.x.y.S1 & dmS2.1 == i4
for(i5 in n12)
nijk[i1,i2,4*(i3-1)+2*(i4-1)+i5] <-
sum(d.xy.S1S2 & dmS3.1 == i5)
}
}
}
}

nik <- apply(nijk, 3, rowSums)
njk <- apply(nijk, 3, colSums)
nk <- colSums(njk)

## compute G^2
t.log <- array(0,c(2,2,8))
for (k in 1:8)
t.log[,,k] <- nijk[,,k]*(nk[k] / tcrossprod(nik[,k], njk[,k]))
} ,

{ ## lenS == 4 ----------------------------------
dmS1.1 <- dm[,S[1]] + 1L
dmS2.1 <- dm[,S[2]] + 1L
dmS3.1 <- dm[,S[3]] + 1L
dmS4.1 <- dm[,S[4]] + 1L
nijk <- array(0L, c(2,2,16))
for(i1 in n12) {
d.x.i <- d.x1 == i1
for(i2 in n12) {
d.x.y.i12 <- d.x.i & d.y1 == i2
for(i3 in n12) {
d.x.y.S1 <- d.x.y.i12 & dmS1.1 == i3
for(i4 in n12) {
d.xy.S1S2 <- d.x.y.S1 & dmS2.1 == i4
for(i5 in n12) {
d.xy.S1S2S3 <- d.xy.S1S2 & dmS3.1 == i5
for(i6 in n12)
nijk[i1,i2,8*(i3-1)+4*(i4-1)+2*(i5-1)+i6] <-
sum(d.xy.S1S2S3 & dmS4.1 == i6)
}
}
}
}
}

nik <- apply(nijk,3,rowSums)
njk <- apply(nijk,3,colSums)
nk <- colSums(njk)

## compute G^2
t.log <- array(0, c(2,2,16))
for (k in 1:16)
t.log[,,k] <- nijk[,,k]*(nk[k] / tcrossprod(nik[,k], njk[,k]))

} ,

{ ## lenS == 5 ----------------------------------
dmS1.1 <- dm[,S[1]] + 1L
dmS2.1 <- dm[,S[2]] + 1L
dmS3.1 <- dm[,S[3]] + 1L
dmS4.1 <- dm[,S[4]] + 1L
dmS5.1 <- dm[,S[5]] + 1L
nijk <- array(0L, c(2,2,32))
for(i1 in n12) {
d.x.i <- d.x1 == i1
for(i2 in n12) {
d.x.y.i12 <- d.x.i & d.y1 == i2
for(i3 in n12) {
d.x.y.S1 <- d.x.y.i12 & dmS1.1 == i3
for(i4 in n12) {
d.xy.S1S2 <- d.x.y.S1 & dmS2.1 == i4
for(i5 in n12) {
d.xy.S1S2S3 <- d.xy.S1S2 & dmS3.1 == i5
for(i6 in n12) {
d.xy.S1S2S3S4 <- d.xy.S1S2S3 & dmS4.1 == i6
for(i7 in n12)
nijk[i1,i2,16*(i3-1)+8*(i4-1)+4*(i5-1)+2*(i6-1)+i7] <-
sum(d.xy.S1S2S3S4 & dmS5.1 == i7)
}
}
}
}
}
}

nik <- apply(nijk,3,rowSums)
njk <- apply(nijk,3,colSums)
nk <- colSums(njk)

## compute G^2
t.log <- array(0,c(2,2,32))
for (k in 1:32)
t.log[,,k] <- nijk[,,k]*(nk[k] / tcrossprod(nik[,k], njk[,k]))

})# end {lenS = 5}, end{switch}
}
else { # --- |S| >= 6 -------------------------------------------------
nijk <- array(0L, c(2,2,1))
## first sample 'by hand' to avoid if/else in the for-loop
i <- d.x1[1]
j <- d.y1[1]
## create directly a list of all k's  -- MM{FIXME}: there must be a better way
k <- NULL
lapply(as.list(S), function(x) { k <<- cbind(k,dm[,x]+1); NULL })
## first set of subset values
parents.count <- 1L ## counter variable corresponding to the number
## of value combinations for the subset varibales
## observed in the data
parents.val <- t(k[1,])
nijk[i,j,parents.count] <- 1L        # cell counts

## Do the same for all other samples. If there is already a table
## for the subset values of the sample, increase the corresponding
## cell count. If not, create a new table and set the corresponding
## cell count to 1.
for (it.sample in 2:n) {
new.p <- TRUE
i <- d.x1[it.sample]
j <- d.y1[it.sample]
## comparing the current values of the subset variables to all
## already existing combinations of subset variables values
t.comp <- t(parents.val[1:parents.count,]) == k[it.sample,]
## Have to be careful here. When giving dimension to a list,
## R fills column after column, and NOT row after row.
dim(t.comp) <- c(lenS,parents.count)
for (it.parents in 1:parents.count) {
## check if the present combination of value alreay exists
if(all(t.comp[,it.parents])) {
## if yes, increase the corresponding cell count
nijk[i,j,it.parents] <- nijk[i,j,it.parents] + 1L
new.p <- FALSE
break
}
}
## if the combination of subset values is new...
if (new.p) {
## ...increase the number of subset 'types'
parents.count <- parents.count + 1L
if (verbose >= 2)
cat(sprintf(' adding new parents (count = %d) at sample %d\n',
parents.count, it.sample))
## ...add the new subset to the others
parents.val <- rbind(parents.val, k[it.sample,])
## ...update the cell counts (add new array)
nijk <- abind(nijk, array(0,c(2,2,1)))
nijk[i,j,parents.count] <- 1L
}## if(new.p)
}## end for(it.sample ..)

nik <- apply(nijk,3,rowSums)
njk <- apply(nijk,3,colSums)
nk <- colSums(njk)
## compute G^2
t.log <- array(0, c(2,2,parents.count))
for (k in 1:parents.count)
t.log[,,k] <- nijk[,,k] * (nk[k] / tcrossprod(nik[,k],njk[,k]))

} ## |S| >= 6

G2 <- sum(2 * nijk * log(t.log), na.rm = TRUE)

if (adaptDF && lenS > 0) {
## lower the degrees of freedom according to the amount of zero
## counts; add zero counts corresponding to the number of parents
## combinations that are missing
zero.counts <- sum(nijk == 0L) + 4*(2^lenS-dim(nijk)[3])
ndf <- max(1, df-zero.counts)
") ==> new df = ", ndf, "\n", sep = "")
df <- ndf
}

pchisq(G2, df, lower.tail = FALSE)# i.e. == 1 - P(..)

}## gSquareBin()

gSquareDis <- function(x, y, S, dm, nlev, adaptDF = FALSE, n.min = 10*df,
verbose = FALSE) {

## Purpose: G^2 statistic to test for (conditional) independence of
##          __discrete__ X and Y given S  --> ../man/disCItest.Rd
## -------------------------------------------------------------------
## Arguments:
## - x,y,S: Are x,y conditionally independent given S (S can be NULL)?
## - dm: data matrix (rows: samples, columns: variables) with
##       discrete entries
## - nlev: vector with numbers of levels for each variable
## - verbose: if TRUE, some additional info is outputted during the
##            computations
## - adaptDF: lower the degrees of freedom by one for each zero count.
##            The value for the DF cannot go below 1.
## -------------------------------------------------------------------

stopifnot((n <- nrow(dm)) >= 1, # nr of samples
(p <- ncol(dm)) >= 2) # nr of variables or nodes
if(!all(1 <= c(x,y,S) & c(x,y,S) <= p))
stop("x, y, and S must all be in {1,..,p}, p=",p)
if(any(as.integer(dm) != dm))
stop("'dm' must be discrete, with values in {0,1,..}")
if(!any(dm == 0))
stop("'dm' must have values in {0,1,..} with at least one '0' value")

if(verbose) cat('Edge ', x,'--',y, ' with subset S =', S,'\n')

lenS <- length(S)
if(missing(nlev) || is.null(nlev))
nlev <- vapply(seq_len(p),
function(j) length(levels(factor(dm[,j]))), 1L)
else
stopifnot(is.numeric(nlev), length(nlev) == p, !is.na(nlev))
if(!all(nlev >= 2))
stop("Each variable, i.e., column of 'dm', must have at least two different values")
nl.x <- nlev[x]
nl.y <- nlev[y]
nl.S <- nlev[S]
## degrees of freedom assuming no structural zeros
df <- (nl.x-1)*(nl.y-1)*prod(nl.S)
if (n < n.min) { ## not enough samples to perform the test:
warning(gettextf(
"n=%d is too small (n < n.min = %d ) for G^2 test (=> treated as independence)",
n, n.min), domain = NA)
return( 1 )   ## gerster prob=0
}
## else --  enough data to perform the test
i.ny <- seq_len(nl.y) # = 1:nl.y
lenS <- length(S)
d.x1 <- dm[,x] + 1L
d.y1 <- dm[,y] + 1L
if(lenS <= 4) { # bei gSquareBin lenS <= 5
switch(lenS+1L, { ## lenS == 0 -------------------
nij <- array(0L, c(nl.x,nl.y))
for (i in 1:nl.x) {
d.x.i <- d.x1 == i
for (j in i.ny)
nij[i,j] <- sum(d.x.i & d.y1 == j)
}
## marginal counts
t.X <- rowSums(nij)
t.Y <- colSums(nij)
## compute G^2
t.log <- n*(nij/ tcrossprod(t.X, t.Y))
t.G2 <- 2*nij * log(t.log)
t.G2[is.nan(t.G2)] <- 0
G2 <- sum(t.G2)
} ,
{ ## lenS == 1 ----------------------------------
in.S <- seq_len(nl.S)
dmS.1 <- dm[,S] + 1L
nijk <- array(0L, c(nl.x,nl.y,nl.S))
for(i in 1:nl.x) {
d.x.i <- d.x1 == i
for(j in i.ny) {
d.x.i.y.j <- d.x.i & d.y1 == j
for(k in in.S)
nijk[i,j,k] <- sum(d.x.i.y.j & dmS.1 == k)
}
}
nik <- apply(nijk, 3, rowSums)
njk <- apply(nijk, 3, colSums)
nk <- colSums(njk)

## compute G^2
t.log <- array(0, c(nl.x,nl.y,prod(nl.S)))
for (k in 1:prod(nl.S))
t.log[,,k] <- nijk[,,k]*(nk[k] / tcrossprod(nik[,k],njk[,k]))

t.G2 <- 2 * nijk * log(t.log)
t.G2[is.nan(t.G2)] <- 0
G2 <- sum(t.G2)
} ,
{ ## lenS == 2 ----------------------------------
in.S1 <- seq_len(nl.S1 <- nl.S[1])
in.S2 <- seq_len(nl.S2 <- nl.S[2])
dmS1.1 <- dm[,S[1]] + 1L
dmS2.1 <- dm[,S[2]] + 1L
nijk <- array(0L, c(nl.x,nl.y,nl.S1*nl.S2))
for(i in 1:nl.x) {
d.x.i <- d.x1 == i
for(j in i.ny) {
d.x.i.y.j <- d.x.i & d.y1 == j
for(k in in.S1) {
d.x.y.S1 <- d.x.i.y.j & dmS1.1 == k
for(l in in.S2)
nijk[i,j,nl.S2*(k-1)+l] <- sum(d.x.y.S1 & dmS2.1 == l)
}
}
}

nik <- apply(nijk, 3, rowSums)
njk <- apply(nijk, 3, colSums)
nk <- colSums(njk)

## compute G^2
t.log <- array(0, c(nl.x,nl.y,prod(nl.S)))
for (k in 1:prod(nl.S))
t.log[,,k] <- nijk[,,k]*(nk[k] / tcrossprod(nik[,k],njk[,k]))

t.G2 <- 2 * nijk * log(t.log)
t.G2[is.nan(t.G2)] <- 0
G2 <- sum(t.G2)
} ,
{ ## lenS == 3 ----------------------------------
in.S1 <- seq_len(nl.S1 <- nl.S[1])
in.S2 <- seq_len(nl.S2 <- nl.S[2])
in.S3 <- seq_len(nl.S3 <- nl.S[3])
dmS1.1 <- dm[,S[1]] + 1L
dmS2.1 <- dm[,S[2]] + 1L
dmS3.1 <- dm[,S[3]] + 1L
nijk <- array(0L, c(nl.x,nl.y,prod(nl.S)))
for(i1 in 1:nl.x) {
d.x.i1 <- d.x1 == i1
for(i2 in i.ny) {
d.x.i1.y.i2 <- d.x.i1 & d.y1 == i2
for(i3 in in.S1) {
d.x.y.S1 <- d.x.i1.y.i2 & dmS1.1 == i3
for(i4 in in.S2) {
d.x.y.S1.2 <- d.x.y.S1 & dmS2.1 == i4
for(i5 in in.S3)
nijk[i1,i2, nl.S3*nl.S2*(i3-1)+nl.S3*(i4-1)+i5] <-
sum(d.x.y.S1.2 & dmS3.1 == i5)
}
}
}
}

nik <- apply(nijk, 3, rowSums)
njk <- apply(nijk, 3, colSums)
nk <- colSums(njk)

## compute G^2
t.log <- array(0, c(nl.x,nl.y,prod(nl.S)))
for (k in 1:prod(nl.S))
t.log[,,k] <- nijk[,,k]*(nk[k] / tcrossprod(nik[,k],njk[,k]))

t.G2 <- 2 * nijk * log(t.log)
t.G2[which(is.nan(t.G2),arr.ind = TRUE)] <- 0
G2 <- sum(t.G2)
} ,
{ ## lenS == 4 ----------------------------------
in.S1 <- seq_len(nl.S1 <- nl.S[1])
in.S2 <- seq_len(nl.S2 <- nl.S[2])
in.S3 <- seq_len(nl.S3 <- nl.S[3])
in.S4 <- seq_len(nl.S4 <- nl.S[4])
dmS1.1 <- dm[,S[1]] + 1L
dmS2.1 <- dm[,S[2]] + 1L
dmS3.1 <- dm[,S[3]] + 1L
dmS4.1 <- dm[,S[4]] + 1L
nijk <- array(0L, c(nl.x, nl.y, prod(nl.S)))
for(i1 in 1:nl.x) {
d.x.i1 <- d.x1 == i1
for(i2 in i.ny) {
d.x.i1.y.i2 <- d.x.i1 & d.y1 == i2
for(i3 in in.S1) {
d.x.y.S1 <- d.x.i1.y.i2 & dmS1.1 == i3
for(i4 in in.S2) {
d.x.y.S1.2 <- d.x.y.S1 & dmS2.1 == i4
for(i5 in in.S3) {
d.x.y.S1.2.3 <- d.x.y.S1.2 & dmS3.1 == i5
for(i6 in in.S4)
nijk[i1,i2, nl.S4*nl.S3*nl.S2*(i3-1) +
nl.S4*nl.S3*(i4-1)+
nl.S4*(i5-1)+i6] <- sum(d.x.y.S1.2.3 & dmS4.1 == i6)
}
}
}
}
}
nik <- apply(nijk, 3, rowSums)
njk <- apply(nijk, 3, colSums)
nk <- colSums(njk)

## compute G^2
t.log <- array(0, c(nl.x,nl.y,prod(nl.S)))
for (k in 1:prod(nl.S))
t.log[,,k] <- nijk[,,k]*(nk[k] / tcrossprod(nik[,k],njk[,k]))
t.G2 <- 2 * nijk * log(t.log)
t.G2[is.nan(t.G2)] <- 0
G2 <- sum(t.G2)
}) # end lens == 4, end{switch}
}
else { #  |S| = lenS >= 5  (gSquareBin: lenS >= 6)
nijk <- array(0L, c(nl.x, nl.y, 1L))
## first sample 'by hand' to avoid if/else in the for-loop
i <- d.x1[1]
j <- d.y1[1]
## create directly a list of all k's  -- MM{FIXME}: there must be a better way
k <- NULL
lapply(as.list(S), function(x) { k <<- cbind(k,d.x1); NULL })
## first set of subset values
parents.count <- 1L ## counter variable corresponding to the number
## of value combinations for the subset varibales
## observed in the data
parents.val <- t(k[1,])
nijk[i,j,parents.count] <- 1L # cell counts

## Do the same for all other samples. If there is already a table
## for the subset values of the sample, increase the corresponding
## cell count. If not, create a new table and set the corresponding
## cell count to 1.
for (it.sample in 2:n) {
flag <- 0
i <- d.x1[it.sample]
j <- d.y1[it.sample]
## comparing the current values of the subset variables to all
## already existing combinations of subset variables values
t.comp <- t(parents.val[1:parents.count,]) == k[it.sample,]
## Have to be careful here. When giving dimension to a list,
## R fills column after column, and NOT row after row.
dim(t.comp) <- c(lenS,parents.count)
for (it.parents in 1:parents.count) {
## check if the present combination of value alreay exists
if(all(t.comp[,it.parents])) {
## if yes, increase the corresponding cell count
nijk[i,j,it.parents] <- nijk[i,j,it.parents] + 1L
flag <- 1
break
}
}# end for(it.parents...)
## if the combination of subset values is new...
if (flag == 0) {
## ...increase the number of subset 'types'
parents.count <- parents.count + 1L
if (verbose >= 2)
cat(sprintf(' adding new parents (count = %d) at sample %d\n',
parents.count, it.sample))
## ...add the new subset to the others
parents.val <- rbind(parents.val,k[it.sample,])
## ...update the cell counts (add new array)
nijk <- abind(nijk, array(0L, c(nl.x,nl.y,1)))
nijk[i,j,parents.count] <- 1L
} # end if(flag==0)
} ## for(it in 2:n)
if (verbose && verbose < 2)
cat(sprintf(" added a total of %d new parents\n", parents.count))

nik <- apply(nijk,3,rowSums)
njk <- apply(nijk,3,colSums)
nk <- colSums(njk)
## compute G^2
t.log <- array(0,c(nl.x,nl.y,parents.count))
for (k in 1:parents.count)
t.log[,,k] <- nijk[,,k]*(nk[k] / tcrossprod(nik[,k],njk[,k]))
t.G2 <- 2 * nijk * log(t.log)
t.G2[which(is.nan(t.G2),arr.ind = TRUE)] <- 0
G2 <- sum(t.G2)
}

if (adaptDF && lenS > 0) {
## lower the degrees of freedom according to the amount of
## zero counts
zero.counts <-
if(lenS == 0)
length(which(nij == 0))
else
length(which(nijk == 0)) + 4*(2^lenS - dim(nijk)[3])
## add zero counts corresponding to the number of parents
## combinations that are missing
df <- max((df-zero.counts),1)

pchisq(G2, df, lower.tail = FALSE)# i.e. == 1 - P(..)

}## gSquareDis()

gaussCItest <- function(x,y,S,suffStat) {
## suffStat\$C: correlation matrix
## suffStat\$n: sample size
z <- zStat(x,y,S, C = suffStat\$C, n = suffStat\$n)
2*pnorm(abs(z), lower.tail = FALSE)
}

dsep <- function(a,b, S = NULL, g, john.pairs = NULL)
{
## Purpose: Are the set a and the set b d-separeted given the set S?
## ----------------------------------------------------------------------
## Arguments:
## - a,b,S: vectors of node names
## - g: graphNEL object
## - john.pairs: matrix from johnson.all.pairs.sp
## ----------------------------------------------------------------------
## Value:
## Boolean decision
## ----------------------------------------------------------------------
## Author: Markus Kalisch

## Check that g is a DAG
amatTmp <- wgtMatrix(g) ## i->j if amatTmp[j,i]!=0
amatTmp[amatTmp != 0] <- 1
if (max(amatTmp+t(amatTmp)) > 1) stop("dsep: Undirected edge in input graph!")
p <- numNodes(g)
## build node union of a,b,S
if(is.null(john.pairs)) john.pairs <- johnson.all.pairs.sp(g)
nodeUnion <- if(length(S) > 0) c(a,b,S) else c(a,b)
my.nodes <- nodes(g)

## find ancestor graph of nodeUnion
anc.set <- NULL
for (i in seq_len(p)) {
desc.nodes <- my.nodes[which(john.pairs[i,] < Inf)]
if (any(desc.nodes %in% nodeUnion)) anc.set <- c(anc.set,my.nodes[i])
} ## for (i in 1:p)
gS <- subGraph(anc.set,g)

## Moralize in amatM
## !!! in the following line:
## i->j if amat[i,j], i.e. different than default coding !!!
## (*)
amat <- wgtMatrix(gS, transpose = FALSE)
if(all(a0 <- amat == 0))
## if no edge in graph, nodes are d-separated
return( TRUE )
## else :
amat[!a0] <- 1
amatM <- amat
ind <- which(amat == 1,arr.ind = TRUE)

for (i in seq_len(nrow(ind))) {
## input is guaranteed to be directed
x <- ind[i,1]
y <- ind[i,2] ## x -> y
## using different coding, see (*) -> OK
allZ <- setdiff(which(amat[y,] == 0 & amat[,y] == 1), x) ## x -> y <- z
for (z in allZ)
if (amat[x,z] == 0 && amat[z,x] == 0)
amatM[x,z] <- 1 ## moralize
} ## for (i in seq_len(nrow(ind)))

## make undirected graph
## (up to now, there is NO undirected edge -> just add t(amat))
gSM <- as(amatM+t(amatM),"graphNEL")

if (length(S) > 0) { ## check separation
separates(a,b,S,gSM)
} else {
b %nin% (if(is.list(bfs. <- bfs(gSM,a))) bfs.[[1]] else bfs.)
}
} ## {dsep}

## Orakel:
dsepTest <- function(x,y, S = NULL, suffStat) {
## suffStat\$ g: True graph (graphNEL suffStatect)
## suffStat\$jp: johnson all pairs
## Returns "p-value" P =
##	0: keep edge / d-connected
##    1: drop edge / d-separated

if( x == y || x %in% S || y %in% S) {
0
} else {
stopifnot(is(g <- suffStat\$g, "graph"))
jp <- suffStat\$jp
V <- nodes(g)
## return  0 / 1
as.numeric(dsep(a = V[x], b = V[y], S = V[S], g = g, john.pairs = jp))
}
} ## {dsepTest}

disCItest <- function(x,y,S,suffStat) {
if(is.data.frame(dm <- suffStat\$dm)) dm <- data.matrix(dm)
else stopifnot(is.matrix(dm))
nlev <- suffStat\$nlev
## p-value:
gSquareDis(x = x, y = y, S = S, dm = dm, nlev = nlev, adaptDF = adaptDF,
verbose = FALSE)
}

binCItest <- function(x,y,S,suffStat) {
if(is.data.frame(dm <- suffStat\$dm)) dm <- data.matrix(dm)
else stopifnot(is.matrix(dm))
## p-value:
gSquareBin(x = x, y = y, S = S, dm = dm, adaptDF = adaptDF, verbose = FALSE)
}

hasExtension <- function(amat, amatSkel, x, pa1, pa2.t, pa2.f, type, nl) {
## nl are colnames of amat
## x, pa1, pa2.x: col positions (NULL if not existing)
## type is in c("pdag", "cpdag")
## VALUE: TRUE if amat has extension
if (type == "pdag") {
xNL <- nl[x]
fromXNL <- rep(xNL, length(pa2.f))
toXNL <- rep(xNL, length(pa2.t))
pa2.fNL <- nl[pa2.f]
pa2.tNL <- nl[pa2.t]
tmp <- addBgKnowledge(gInput = amat, x = c(fromXNL, pa2.tNL),
y = c(pa2.fNL, toXNL))
res <- !is.null(tmp) ## TRUE if amat is extendable
} else {
res <- !has.new.coll(amat, amatSkel, x, pa1, pa2.t, pa2.f)
}
res
}

revealEdge <- function(c,d,s) { ## cpdag, dag, selected edges to reveal
if (all(!is.na(s))) { ## something to reveal
for (i in 1:nrow(s)) {
c[s[i,1], s[i,2]] <- d[s[i,1], s[i,2]]
c[s[i,2], s[i,1]] <- d[s[i,2], s[i,1]]
}
}
c
}

ida <- function (x.pos, y.pos, mcov, graphEst, method = c("local", "global"),
y.notparent = FALSE, verbose = FALSE, all.dags = NA,
type = c("cpdag", "pdag"))
{
stopifnot(x.pos == (x <- as.integer(x.pos)),
y.pos == (y <- as.integer(y.pos)),
length(x) == 1, length(y) == 1,
type %in% c("pdag", "cpdag"))
method <- match.arg(method)
type <- match.arg(type)
amat[which(amat != 0)] <- 1 ## coding: amat.cpdag

## test if valid input amat
if (!isValidGraph(amat = amat, type = type)) {
message("The input graph is not a valid ",type,". See function isValidGraph() for details.\n")
}

nl <- colnames(amat) ## Node labels
## double-check that node labels exist (they should given a graph input)
stopifnot(!is.null(nl))
amatSkel <- amat + t(amat)
amatSkel[amatSkel != 0] <- 1
##local method needs changing
if (method == "local") {
if (y.notparent) {
wgt.est[x, y] <- FALSE
}
tmp <- wgt.est - t(wgt.est)
tmp[which(tmp < 0)] <- 0
wgt.unique <- tmp
##orient pa1 into x
pa1 <- which(wgt.unique[x, ] != 0)
if (y %in% pa1) {
beta.hat <- 0
}
else {
wgt.ambig <- wgt.est - wgt.unique
##orient pa2 out of x
pa2 <- which(wgt.ambig[x, ] != 0)
if (verbose)
cat("\n\nx=", x, "y=", y, "\npa1=", pa1, "\npa2=",
pa2, "\n")
if (length(pa2) == 0) {
beta.hat <- lm.cov(mcov, y, c(x, pa1))
if (verbose)
cat("Fit - y:", y, "x:", c(x, pa1), "|b.hat=",
beta.hat, "\n")
}
else {
beta.hat <- NA
ii <- 0 ## CHANGED LINE
pa2.f <- pa2
pa2.t <- NULL
if (hasExtension(amat, amatSkel, x, pa1, pa2.t, pa2.f, type, nl)) {
ii <- ii + 1 ## NEW LINE
beta.hat[ii] <- lm.cov(mcov, y, c(x, pa1))
if (verbose)
cat("Fit - y:", y, "x:", c(x, pa1), "|b.hat=",
beta.hat[ii], "\n")
}
for (i2 in seq_along(pa2)) {
pa2.f <- pa2[-i2]
pa2.t <- pa2[i2]
if (hasExtension(amat, amatSkel, x, pa1, pa2.t, pa2.f, type, nl)) {
ii <- ii + 1
if (y %in% pa2.t) {
beta.hat[ii] <- 0
}
else {
beta.hat[ii] <- lm.cov(mcov, y, c(x, pa1,
pa2[i2]))
if (verbose)
cat("Fit - y:", y, "x:", c(x, pa1, pa2[i2]),
"|b.hat=", beta.hat[ii], "\n")
}
}
}
if (length(pa2) > 1)
for (i in 2:length(pa2)) {
pa.tmp <- combn(pa2, i, simplify = TRUE)
for (j in seq_len(ncol(pa.tmp))) {
pa2.t <- pa.tmp[, j]
pa2.f <- setdiff(pa2, pa2.t)
if (hasExtension(amat, amatSkel, x, pa1, pa2.t, pa2.f, type, nl)) {
ii <- ii + 1
if (y %in% pa2.t) {
beta.hat[ii] <- 0
}
else {
beta.hat[ii] <- lm.cov(mcov, y, c(x,
pa1, pa2.t))
if (verbose)
cat("Fit - y:", y, "x:", c(x, pa1,
pa2.t), "|b.hat=", beta.hat[ii],
"\n")
}
}
}
}
}
}
}
##method global
##should stay the same for pdags
else {
p <- numNodes(graphEst)
am.pdag[am.pdag != 0] <- 1
if (y.notparent) {
am.pdag[x, y] <- 0
}
if (is.na(all.dags)) {
}
else {
}
beta.hat <- rep(NA, n.dags)
for (i in 1:n.dags) {
wgt.unique <- t(matrix(ad[i, ], p, p))
pa1 <- which(wgt.unique[x, ] != 0)
if (y %in% pa1) {
beta.hat[i] <- 0
}
else {
beta.hat[i] <- lm.cov(mcov, y, c(x, pa1))
if (verbose)
cat("Fit - y:", y, "x:", c(x, pa1), "|b.hat=",
beta.hat[i], "\n")
}
}
}
unname(beta.hat)
}

idaFast <- function(x.pos, y.pos.set, mcov, graphEst)
{
## Purpose: Estimate the causal effect of x on each element in the
## set y using the local method; graphEst and correlation matrix
## have to be precomputed; orient
## undirected edges at x in a way so that no new collider is
## introduced; if there is an undirected edge between x and y, both directions are considered;
## i.e., y might be partent of x in which case the effect is 0.
## ----------------------------------------------------------------------
## Arguments:
## - x.pos, y.pos: Column of x and y in d.mat
## - mcov: Covariance matrix that was used to estimate graphEst
## - graphEst: Fit of PC Algorithm (semidirected)
## ----------------------------------------------------------------------
## Value: list of causal values; one list element for each element of
## y.pos.set
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 7 Jan 2010, 11:18

amat[which(amat != 0)] <- 1 ## i->j if amat[j,i]==1
amatSkel <- amat + t(amat)
amatSkel[amatSkel != 0] <- 1

## find unique and ambiguous parents of x
tmp <- wgt.est-t(wgt.est)
tmp[which(tmp < 0)] <- 0
wgt.unique <- tmp
wgt.ambig <- wgt.est-wgt.unique
pa1 <- which(wgt.unique[x.pos,] != 0)
pa2 <- which(wgt.ambig[x.pos,] != 0)

## estimate beta
if (length(pa2) == 0) {
beta.tmp <- lm.cov(mcov,y.pos.set,c(x.pos,pa1)) ####
beta.tmp[y.pos.set %in% pa1] <- 0
beta.hat <- cbind(beta.tmp)
} else {    ## at least one undirected parent
## no member of pa2
pa2.f <- pa2
pa2.t <- NA
beta.hat <-
if (!has.new.coll(amat,amatSkel,x.pos,pa1,pa2.t,pa2.f)) {
beta.tmp <- lm.cov(mcov,y.pos.set,c(x.pos,pa1)) ####
beta.tmp[y.pos.set %in% pa1] <- 0
cbind(beta.tmp)
} # else NULL

## exactly one member of pa2
for (i2 in seq_along(pa2)) {
pa2.f <- pa2[-i2]
pa2.t <- pa2[i2]
if (!has.new.coll(amat,amatSkel,x.pos,pa1,pa2.t,pa2.f)) {
beta.tmp <- lm.cov(mcov,y.pos.set,c(x.pos,pa1,pa2.t)) ####
beta.tmp[y.pos.set %in% c(pa1,pa2.t)] <- 0
beta.hat <- cbind(beta.hat, beta.tmp)
}
} ## for (i2 in seq_along(pa2))

## higher order subsets of pa2
if (length(pa2) > 1)
for (i in 2:length(pa2)) {
pa.tmp <- combn(pa2,i,simplify = TRUE)
for (j in seq_len(ncol(pa.tmp))) {
pa2.t <- pa.tmp[,j]
pa2.f <- setdiff(pa2, pa2.t)
if (!has.new.coll(amat,amatSkel,x.pos,pa1,pa2.t,pa2.f)) {
beta.tmp <- lm.cov(mcov,y.pos.set,c(x.pos,pa1,pa2.t)) ####
beta.tmp[y.pos.set %in% c(pa1,pa2.t)] <- 0
beta.hat <- cbind(beta.hat, beta.tmp)
}
} ## for (j )
} ## for (i )

} ## if .. else length(pa2) > 0)

## MM: for now, maybe in the future get sensible column names:
colnames(beta.hat) <- NULL
if (nrow(beta.hat) > 0) rownames(beta.hat) <- as.character(y.pos.set)
beta.hat
}

## -> ../man/legal.path.Rd
## only called in  qreach()  with only 'c' varying
legal.path <- function(a,b,c, amat)
{
## Purpose: Is path a-b-c legal (either collider in b or a,b,c is triangle)
## !! a-b-c must be in a path !! this is not checked !!
## ----------------------------------------------------------------------
## Arguments:
## - a, b, c: nodes
## - amat: adj matrix (coding 0,1,2 for no edge, circle, arrowhead)
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 29 Oct 2009; Martin Maechler
if(a == c || (a.b <- amat[a,b]) == 0 || amat[b,c] == 0)
return(FALSE)
## else  a != c  and  amat[a,b] != 0  and   amat[b,c] != 0
## return  TRUE iff
(amat[a,c] != 0 || ## triangle
## need not check [c,a], since there must be SOME edgemark !=0 at [a,c], if
## edge is present
(a.b == 2 && amat[c,b] == 2)) ## a collider
}

##' Plot a subgraph for a specified starting node and a given graph
##' @param graphObj Graph object
##' @param y        Starting node
##' @param dist     Distance of nodes included in subgraph from starting node y
##' @param amat     Adjacency matrix of skeleton graph (optional)
##' @param directed Boolean; should the plotted subgraph be directed?
##' @param main
##' --------------- see ../man/plotSG.Rd
plotSG <- function(graphObj, y, dist, amat = NA, directed = TRUE,
main = paste("Subgraph of ", deparse(substitute(graphObj)),
"\nfrom ", y, " with distance ", dist ))
{
## Author: Daniel Stekhoven, Date: 26 Jan 2010, 08:56

check.Rgraphviz()
stopifnot(dist >= 1)

## Extract adjacency matrix (if necessary)
if (any( is.na(amat)))
amat <- wgtMatrix(graphObj)

## Diagonalise (has no effect if already diagonal)
amat[amat != 0] <- 1
amat <- amat + t(amat)
diag(amat) <- 0 # can that happen anyway??
amat[amat == 2] <- 1

## Find connected nodes hierarchically
nh <- which( amat[y,] == 1 )
rel.nodes <- c( y, nh )
for (i in seq_len(dist-1L)) {
nh <-
if ( length(nh) == 1 )
which( amat[nh,] == 1 )
else if (length(nh) != 0)
which(amat[nh,] == 1, arr.ind = TRUE)[,"col"]
## else NULL
rel.nodes <- unique( c( rel.nodes, nh ) )
}

## Name nodes
if(is(graphObj, "graphNEL"))
names(rel.nodes) <- graphObj@nodes[rel.nodes]

## subgraph - distinguish between directed edges or not
sg <- if (directed)
subGraph(as.character(rel.nodes), graphObj)
else
as(amat[rel.nodes, rel.nodes], "graphNEL")

## Plot subgraph
Rgraphviz::plot( sg )
if(!is.null(main))
title(main = main)
invisible(sg)
}

##########################################################################
##
## Functions for the conservative versions (PC, FCI (all), and RFCI)
##
#########################################################################

## Functions used by all algorithms
##########################################################
pc.cons.intern <- function(sk, suffStat, indepTest, alpha,
version.unf = c(NA,NA), maj.rule = FALSE, verbose = FALSE)
{
## Purpose:  For any unshielded triple A-B-C, consider all subsets D of
## the neighbors of A and of the neighbors of C, and record the sets
## D for which A and C are conditionally independent given D. If B
## is in none of these sets, do nothing (it is a
## v-structure) and also delete B from sepset(A,C) if present (so we are
## sure that a v-structure will be created). If B is in all sets, do nothing
## (it is not a v-structure) and also add B to sepset(A,C) if not present
## (so we are sure that a v-structure will not be created). If maj.rule=FALSE
## the normal conservative version is applied, hence if B is in
## some but not all sets, mark the triple as "ambiguous". If maj.rule=TRUE
## we mark the triple as "ambiguous" if B is in exactly 50% of the cases,
## if less than 50% define it as a v-structure, and if in more than 50%
## no v-structure.
## ----------------------------------------------------------------------
## Arguments: - sk: output returned by function "skeleton"
##            - suffStat: Sufficient statistics for independent tests
##            - indepTest: Function for independence test
##            - alpha: Significance level of test
##            - version.unf[1]: 1 it checks if b is in some sepsets,
##                              2 it also checks if there exists a sepset
##                              which is a subset of the neighbours.
##            - version.unf[2]: 1 same as in Tetrad (do not consider
##                              the initial sepset), 2 it also considers
##                              the initial sepset
##            - maj.rule: FALSE/TRUE if the majority rule idea is applied
## ----------------------------------------------------------------------
## Value: - unfTripl: Triple that were marked as unfaithful
##        - vers: vector containing the version (1 or 2) of the
##                corresponding triple saved in unfTripl (1=normal
##                unfaithful triple that is B is in some sepsets;
##                2=triple coming from version.unf[1]==2
##                that is a and c are indep given the initial sepset
##                but there doesn't exist a subset of the neighbours
##                that d-separates them)
##        - sk: updated skelet object, sepsets might have been updated
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 12 Feb 2010, 10:43
## Modifications: Diego Colombo

g <- as(sk@graph,"matrix")
stopifnot(all(g == t(g))) ## g is guaranteed to be symmetric
p <- as.numeric(dim(g)[1])
unfTripl <- vers <- rep(NA,min(p*p,100000))
counter <- 0
if (sum(g) > 0) {
ind <- which(g == 1, arr.ind = TRUE)
tripleMatrix <- NULL
## Go through all edges
for (i in seq_len(nrow(ind))) {
a <- ind[i,1]
b <- ind[i,2]
allC <- setdiff(which(g[b,] == 1),a) ## a-b-c
newC <- allC[g[a,allC] == 0]
tmpMatrix <- cbind(rep(a,length(newC)),rep(b,length(newC)),newC)
tripleMatrix <- rbind(tripleMatrix,tmpMatrix)
colnames(tripleMatrix) <- c("","","")
}
if ((m <- nrow(tripleMatrix)) > 0) {
deleteDupl <- logical(m)# all FALSE
for (i in seq_len(m))
if (tripleMatrix[i,1] > tripleMatrix[i,3])
deleteDupl[i] <- TRUE
if(any(deleteDupl))
tripleMatrix <- tripleMatrix[!deleteDupl,, drop = FALSE]

for (i in seq_len(nrow(tripleMatrix))) {
## pay attention to the size of counter
if (counter+1L == length(unfTripl)) {
n.xtra <- min(p*p, 100000)
new.len <- counter+1L + n.xtra
length(unfTripl) <- new.len
length(vers)     <- new.len
}
a <- tripleMatrix[i,1]
b <- tripleMatrix[i,2]
c <- tripleMatrix[i,3]
nbrsA <- which(g[,a] != 0) ## G symm; c no nbr of a
nbrsC <- which(g[,c] != 0)
if (verbose) {
cat("\nTriple:", a,b,c,"and sepset by skelet:",
unique(sk@sepset[[a]][[c]],sk@sepset[[c]][[a]]),"\n")
}
r.abc <- checkTriple(a, b, c, nbrsA, nbrsC,
sk@sepset[[a]][[c]], sk@sepset[[c]][[a]],
suffStat = suffStat, indepTest = indepTest, alpha = alpha,
version.unf = version.unf, maj.rule = maj.rule, verbose = verbose)
## 1: in NO set; 2: in ALL sets; 3: in SOME but not all
## Take action only if case "3"
if (r.abc\$decision == 3) {
## record ambiguous triple
counter <- counter + 1
unfTripl[counter] <- triple2numb(p,a,b,c)
vers[counter] <- r.abc\$version
}
## can happen the case in Tetrad, so we must save the triple
## as ambiguous:
## a and c independent given S but not given subsets of the
if ((version.unf[1] == 2) && (r.abc\$version == 2) && (r.abc\$decision != 3)) {
counter <- counter + 1
unfTripl[counter] <- triple2numb(p,a,b,c)
vers[counter] <- r.abc\$version
}
sk@sepset[[a]][[c]] <- r.abc\$SepsetA
sk@sepset[[c]][[a]] <- r.abc\$SepsetC
}
}
}
length(unfTripl) <- length(vers) <- counter
list(unfTripl = unfTripl, vers = vers, sk = sk)
}

## Called both from pc.cons.intern() and rfci.vStruc() :
checkTriple <- function(a, b, c, nbrsA, nbrsC, sepsetA, sepsetC,
suffStat, indepTest, alpha, version.unf = c(NA,NA),
maj.rule = FALSE, verbose = FALSE)
{
## Purpose: For each subset of nbrsA and nbrsC where a and c are cond.
## independent, it is checked if b is in the conditioning set.
## ----------------------------------------------------------------------
## Arguments: - a,b,c: Nodes (positions in adjacency matrix)
##            - nbrsA: Neighbors of a
##            - nbrsC: Neighbors of c
##            - sepsetA: sepset(a,c)
##            - sepsetC: sepset(c,a)
##            - suffStat: Sufficient statistics for independent tests
##            - indepTest: Function for independence test
##            - alpha: Significance level of test
##            - version.unf[1]: 1 it checks if b is in some sepsets,
##                              2 it also checks if there exists a sepset
##                              which is a subset of the neighbours.
##            - version.unf[2]: 1 same as Tetrad (do not consider the initial
##                              sepset), 2 consider if b is in sepsetA
##                              or sepsetC
##            - maj.rule: FALSE/TRUE if the majority rule idea is applied
## ----------------------------------------------------------------------
## Value: - decision: res
##          res = 1: b is in NO sepset (-> v-structure)
##          res = 2: b is in ALL sepsets (-> no v-structure)
##          res = 3: b is in SOME but not all sepsets (-> ambiguous triple)
##        - version: version (1 or 2) of the ambiguous triple
##                (1=normal ambiguous triple that is b is in some sepsets;
##                2=triple coming from version.unf[1]==2 that is a and c are
##                indep given the initial sepset but there doesn't exist a
##                subset of the neighbours that d-separates them)
##        - sepsetA and sepsetC: updated separation sets
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 12 Feb 2010, 12:13
## Modifications: Diego Colombo, Martin Maechler

## loop through all subsets of parents

nr.indep <- 0
stopifnot(length(version.unf) == 2, version.unf %in% 1:2)
tmp <- if (version.unf[2] == 2) ## our version
(b %in% sepsetA || b %in% sepsetC) ## else NULL = Tetrad version
version <- 0
if ((nn <- length(nbrsA)) > 0) {
allComb <- expand.grid(lapply(integer(nn), function(.) 0:1))
## loop through all subsets of neighbours
for (i in 1:nrow(allComb)) { ## == 1:(2^nn)
S <- nbrsA[which(allComb[i,] != 0)]
pval <- indepTest(a, c, S, suffStat)
## save the pval and the set that produced this pval
if (verbose) cat("a: S =",S," - pval =",pval,"\n")
if (pval >= alpha) {
nr.indep <- nr.indep + 1
## is b in set?
tmp <- c(tmp, b %in% S)
version <- 1
}
}
}
## now with the neighbours of c
if ((nn <- length(nbrsC)) > 0) {
allComb <- expand.grid(lapply(integer(nn), function(.) 0:1))
## loop through all subsets of neighbours
for (i in 1:nrow(allComb)) { ## == 1:(2^nn)
S <- nbrsC[which(allComb[i,] != 0)]
pval <- indepTest(a, c, S, suffStat)
## save the pval and the set that produced this pval
if (verbose) cat("c: S =",S," - pval =",pval,"\n")
if (pval >= alpha) {
nr.indep <- nr.indep + 1
## is b in set?
tmp <- c(tmp, b %in% S)
version <- 1
}
}
}
if (version.unf[1] == 2  && nr.indep == 0) {
version <- 2
}
if (is.null(tmp)) tmp <- FALSE

if (all(tmp)) {
res <- 2 ## in ALL sets
## therefore a - b - c is not a v-structure, hence add b to sepset(a,c)
## and sepset(c,a)
## for example it can happen that b is not in sepset(a,c) or sepset(c,a)
## but now it is in each set that separates a and c given the neighbours
if (b %nin% sepsetA) sepsetA <- c(sepsetA, b)
if (b %nin% sepsetC) sepsetC <- c(sepsetC, b)
} else {
if (all(!tmp)) {
res <- 1 ## in NO set
## therefore a - b - c is a v-structure, hence delete b from
## sepset(a,c) and sepset(c,a)
## for example it can happen that b is in sepset(a,c) or sepset(c,a)
## but now it is in no set that separates a and c given the neighbours
sepsetA <- setdiff(sepsetA,b)
sepsetC <- setdiff(sepsetC,b)
} else {
## normal conservative PC, b is in some sets hence the triple
## is unfaithful
if (!maj.rule) {
res <- 3 ## in SOME sets
} else {
## use the majority rule to test if the triple is faithful
## or not and then decide if it is a v-structure or not accordigly
## NEW check the percentage of b in the conditioning sets that
## make a and c independent
if (sum(tmp)/length(tmp) < 0.5) {
## we accept that b is in NO set
res <- 1 ## in NO set
## therefore a - b - c is a v-structure, hence delete b
## from sepset(a,c) and sepset(c,a)
## for example it can happen that b is in sepset(a,c) or
## sepset(c,a) but now it is in no set that
## separates a and c given the neighbours
sepsetA <- setdiff(sepsetA,b)
sepsetC <- setdiff(sepsetC,b)
} else if (sum(tmp)/length(tmp) > 0.5) {
## we accept that b is in ALL set
res <- 2 ## in ALL sets
## therefore a - b - c is not a v-structure, hence add b
## to sepset(a,c) and sepset(c,a)
## for example it can happen that b is not in sepset(a,c)
## or sepset(c,a) but now it is in each set that
## separates a and c given the neighbours
if (b %nin% sepsetA) sepsetA <- c(sepsetA,b)
if (b %nin% sepsetC) sepsetC <- c(sepsetC,b)
} else if (sum(tmp)/length(tmp) == 0.5) {
## define the triple as unfaithful, because half of the
## times b is in the set and half of them in not in
res <- 3 ## in SOME sets
}
}
}
}
if (verbose && res == 3) cat("Triple ambiguous\n")

## if you save a variable <- NULL into a list it will delete this element!
## The following also transforms NULL sepset* to integer(0):
lapply(list(decision = res, version = version, SepsetA = sepsetA, SepsetC = sepsetC),
as.integer)
} ## {checkTriple}

## For R5 and R9-R10
######################################################
faith.check <- function(cp, unfVect, p, boolean = TRUE)
{
## Purpose: check if every triple on the circle path  cp is unambiguous
## ----------------------------------------------------------------------
## Arguments: cp: circle path to check for unambiguity
##  boolean:  if TRUE,  return TRUE iff there is no ambiguity, i.e. "faithful"
##            if FALSE, return the number of ambiguities.
## ----------------------------------------------------------------------
## Author: Diego Colombo, Date: 25 May 2010; 'boolean' etc by Martin Maechler
if(!boolean) res <- 0L
n <- length(cp)
## MM: FIXME speedup by precomputing (l%%n)+1, ((l+1)%%n)+1, and  ((l+2)%%n)+1 as *integers*
## ok, in steps: first "slowly" but surely correct
ii <- 0:(n-1L)
i1 <- (ii      %% n)+1L
i2 <- ((ii+1L) %% n)+1L
i3 <- ((ii+2L) %% n)+1L
for (l in ii) {
if (any(unfVect == triple2numb(p,cp[i1[l]],cp[i2[l]],cp[i3[l]]), na.rm = TRUE) ||
any(unfVect == triple2numb(p,cp[i3[l]],cp[i2[l]],cp[i1[l]]), na.rm = TRUE)) {
if(boolean) return(FALSE) # the first time we found one: not faithful
## else count
res <- res + 1L
}
}
if(boolean) TRUE else res
}

###############################################################################
##
## FCI, RFCI, and fast FCI-oracle
##
###############################################################################

## Internal function used by FCI and RFCI
##############################################################################

triple2numb <- function(p,i,j,k)
{
## Purpose: transform a triple i-j-k into a unique number
## ----------------------------------------------------------------------
## Arguments:-p:number of nodes;-triple: i-j-k
## ----------------------------------------------------------------------
## Author: Diego Colombo, Date: 11 May 2010;  Martin Maechler
## as.numeric(.): not integer arithmetic which easily overflows
k + (p. <- as.numeric(p)) * (j + p.*i)
}

## Functions for R4, R5, and R9-R10 for FCI(all) and RFCI
############################################################################

updateList <- function(path, set, old.list)
{
## Purpose: update the list of all paths in the iterative functions
## minDiscrPath, minUncovCircPath and minUncovPdPath
## ----------------------------------------------------------------------
## Arguments: - path: the path under investigation
##            - set: (integer) index set of variables to be added to path
##            - old.list: the list to update
## ----------------------------------------------------------------------
## Author: Diego Colombo, Date: 21 Oct 2011; Without for() by Martin Maechler
c(old.list, lapply(set, function(s) c(path,s)))
}

## R9-R10
minUncovPdPath <- function(p, pag, a,b,c, unfVect, verbose = FALSE)
{
## Purpose: find a minimal uncovered pd path for a,b,c saved in path.
## Check also for the conservative case that it is unambiguous
## If a path exists this is the output, otherwise NA
## ----------------------------------------------------------------------
## Arguments: - p: number of nodes in the graph
##            - a,b,c : nodes under interest
##            - unfVect: vector containing the ambiguous triples
## ----------------------------------------------------------------------
## Author: Diego Colombo, Date: 19 Oct 2011; small changes: Martin Maechler

visited <- rep(FALSE, p)
visited[c(a,b,c)] <- TRUE
min.upd.path <- NA
## find all neighbours of b not visited yet
indD <- which((pag[b,] == 1 | pag[b,] == 2) &
(pag[,b] == 1 | pag[,b] == 3) &
(pag[,a] == 0) & !visited)
if (length(indD) > 0) {
path.list <- updateList(b, indD, NULL)
done <- FALSE
while ((length(path.list) > 0) && (!done)) {
## next element in the queue
mpath <- path.list[[1]]
m <- length(mpath)
d <- mpath[m]
path.list[[1]] <- NULL
visited[d] <- TRUE
if (any(pag[d,c] == 1:2) && any(pag[c,d] == c(1,3))) {
## pd path found
mpath <- c(a, mpath, c)
n <- length(mpath)
## check the path to be uncovered
uncov <- TRUE
for (l in seq_len(n - 2)) {
if (!(pag[mpath[l], mpath[l + 2]] == 0 &&
pag[mpath[l + 2], mpath[l]] == 0)) {

uncov <- FALSE
break ## speed up!
}
}
## if it is uncovered
if (uncov)
if (length(unfVect) == 0 || ## <<- normal version: just save
## conservative version, check the path to be faithful:
faith.check(mpath, unfVect, p)) {
## save the path to be returned
min.upd.path <- mpath
done <- TRUE
}
}
else {
## d and c are either not connected or connected with a "wrong" edge -----> search iteratively
## find all neighbours of d not visited yet
indR <- which((pag[d,] == 1 | pag[d,] == 2) &
(pag[,d] == 1 | pag[,d] == 3) & !visited)
if (length(indR) > 0) {
## update the queues
path.list <- updateList(mpath, indR, path.list)
}
}
} ## {while}
}
min.upd.path
} ## {minUncovPdPath}

## R5
minUncovCircPath <- function(p, pag, path, unfVect, verbose = FALSE)
{
## Purpose: find a minimal uncovered circle path for a,c,d,b saved in path.
## Check also for the conservative case that it is unambiguous
## If a path exists this is the output, otherwise NA
## ----------------------------------------------------------------------
## Arguments: - p: number of nodes in the graph
##            - path: a,c,d,b under interest
##            - unfVect: vector containing the unfaithful triples
## ----------------------------------------------------------------------
## Author: Diego Colombo, Date: 19 Oct 2011, 13:11

visited <- rep(FALSE, p)
visited[path] <- TRUE # (a,b,c,d) all 'visited'
a <- path[1]
c <- path[2]
d <- path[3]
b <- path[4]
min.ucp.path <- NA
## find all neighbours of c not visited yet
indX <- which(pag[c,] == 1 & pag[,c] == 1 & !visited) ## c o-o x
if (length(indX) > 0) {
path.list <- updateList(c, indX, NULL)
done <- FALSE
while (!done && length(path.list) > 0) {
## next element in the queue
mpath <- path.list[[1]]
x <- mpath[length(mpath)]
path.list[[1]] <- NULL
visited[x] <- TRUE
if (pag[x,d] == 1 && pag[d,x] == 1) {
## circle path found
mpath <- c(a, mpath, d, b)
n <- length(mpath)
## check the path to be uncovered
uncov <- TRUE
for (l in seq_len(n - 2)) {
if (!(pag[mpath[l], mpath[l + 2]] == 0 &&
pag[mpath[l + 2], mpath[l]] == 0)) {

uncov <- FALSE
break ## speed up!
}
}
## if it is uncovered
if (uncov)
if (length(unfVect) == 0 || ## <<- normal version: just save
## conservative version, check the path to be faithful:
faith.check(mpath, unfVect, p)) {
## save the path to be returned
min.ucp.path <- mpath
done <- TRUE
}
}
else {
## x and d are either not connected or connected with an edge which is not o--o -----> search iteratively
## find all neighbours of x not visited yet
indR <- which(pag[x,] == 1 & pag[,x] == 1 & !visited) ## x o--o r
if (length(indR) > 0) {
## update the queues
path.list <- updateList(mpath, indR, path.list)
}
}
}## {while}
}
return(min.ucp.path)
}

## R4
minDiscrPath <- function(pag, a,b,c, verbose = FALSE)
{
## Purpose: find a minimal discriminating path for a,b,c.
## If a path exists this is the output, otherwise NA
## ----------------------------------------------------------------------
## Arguments: - pag: adjacency matrix
##            - a,b,c: node positions under interest
## ----------------------------------------------------------------------
## Author: Diego Colombo, Date: 25 Jan 2011; speedup: Martin Maechler

p <- as.numeric(dim(pag)[1])
visited <- rep(FALSE, p)
visited[c(a,b,c)] <- TRUE # {a,b,c} "visited"
## find all neighbours of a  not visited yet
indD <- which(pag[a,] != 0 & pag[,a] == 2 & !visited) ## d *-> a
if (length(indD) > 0) {
path.list <- updateList(a, indD, NULL)
while (length(path.list) > 0) {
## next element in the queue
mpath <- path.list[[1]]
m <- length(mpath)
d <- mpath[m]
if (pag[c,d] == 0 & pag[d,c] == 0)
## minimal discriminating path found :
return( c(rev(mpath), b,c) )

## else :
pred <- mpath[m-1]
path.list[[1]] <- NULL
visited[d] <- TRUE

## d is connected to c -----> search iteratively
if (pag[d,c] == 2 && pag[c,d] == 3 && pag[pred,d] == 2) {
## find all neighbours of d not visited yet
indR <- which(pag[d,] != 0 & pag[,d] == 2 & !visited) ## r *-> d
if (length(indR) > 0)
## update the queues
path.list <- updateList(mpath, indR, path.list)
}
} ## {while}
}
## nothing found:  return
NA
} ## {minDiscrPath}

###############################################################################
## FCI
###############################################################################

fci <- function(suffStat, indepTest, alpha, labels, p,
skel.method = c("stable", "original", "stable.fast"),
fixedGaps = NULL, fixedEdges = NULL, NAdelete = TRUE,
m.max = Inf, pdsep.max = Inf, rules = rep(TRUE, 10),
doPdsep = TRUE, biCC = FALSE, conservative = FALSE,
maj.rule = FALSE, numCores = 1, verbose = FALSE)
{
## Purpose: Perform FCI-Algorithm, i.e., estimate PAG
## ----------------------------------------------------------------------
## Arguments:
## - suffStat, indepTest: info for the tests
## - p: number of nodes in the graph
## - alpha: Significance level of individual partial correlation tests
## - verbose: 0 - no output, 1 - detailed output
## - fixedGaps: the adjacency matrix of the graph from which the algorithm
##      should start (logical); gaps fixed here are not changed
## - fixedEdges: Edges marked here are not changed (logical)
## - NAdelete: delete edge if pval=NA (for discrete data)
## - m.max: maximum size of conditioning set
## - pdsep.max: maximaum size of conditioning set for Possible-D-SEP
## - rules: array of length 10 wich contains TRUE or FALSE corresponding
##          to each rule. TRUE means the rule will be applied.
##          If rules==FALSE only R0 (minimal pattern) will be used
## - doPdsep: compute possible dsep
## - biCC: TRUE or FALSE variable containing if biconnected components are
##         used to compute pdsep
## - conservative: TRUE or FALSE defining if
##          the v-structures after the pdsep
##          have to be oriented conservatively or nor
## - maj.rule: TRUE or FALSE variable containing if the majority rule is
##             used instead of the normal conservative
## - labels: names of the variables or nodes
## - type: it specifies the version of the FCI that has to be used.
##         Per default it is normal, the normal FCI algorithm. It can also be
##         anytime for the Anytime FCI and in this cas m.max must be specified;
##         or it can be adaptive for Adaptive Anytime FCI and in this case
##         m.max must not be specified.
## - numCores: handed to skeleton(), used for parallelization

## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: Dec 2009; update: Diego Colombo, 2012; Martin Maechler, 2013

cl <- match.call()
if(!missing(p)) stopifnot(is.numeric(p), length(p <- as.integer(p)) == 1, p >= 2)
if(missing(labels)) {
if(missing(p)) stop("need to specify 'labels' or 'p'")
labels <- as.character(seq_len(p))
} else { ## use labels ==> p  from it
stopifnot(is.character(labels))
if(missing(p)) {
p <- length(labels)
} else if(p != length(labels))
stop("'p' is not needed when 'labels' is specified, and must match length(labels)")
else
message("No need to specify 'p', when 'labels' is given")
}

## Check that the type is a valid one
type <- match.arg(type)
if (type == "anytime" && m.max == Inf)
stop("To use the Anytime FCI you must specify a finite 'm.max'.")
if (type == "adaptive" && m.max != Inf)
stop("To use the Adaptive Anytime FCI you must not specify 'm.max'.")

if (conservative && maj.rule)
stop("Choose either conservative FCI or majority rule FCI")

cl <- match.call()
if (verbose) cat("Compute Skeleton\n================\n")

skel <- skeleton(suffStat, indepTest, alpha, labels = labels, method = skel.method,
fixedGaps = fixedGaps, fixedEdges = fixedEdges,
skel@call <- cl # so that makes it into result
G <- as(skel@graph, "matrix")
sepset <- skel@sepset
pMax <- skel@pMax
n.edgetestsSKEL <- skel@n.edgetests
max.ordSKEL <- skel@max.ord
allPdsep <- NA
tripleList <- NULL

if (doPdsep) {
if (verbose) cat("\nCompute PDSEP\n=============\n")
pc.ci <- pc.cons.intern(skel, suffStat, indepTest,
alpha = alpha, version.unf = c(1,1),
maj.rule = FALSE, verbose = verbose)
## Recompute (sepsets, G, ...):
pdsepRes <- pdsep(skel@graph, suffStat, indepTest = indepTest, p = p,
sepset = pc.ci\$sk@sepset, alpha = alpha, pMax = pMax,
m.max = if (type == "adaptive") max.ordSKEL else m.max,
unfVect = pc.ci\$unfTripl, # "tripleList.pdsep"
biCC = biCC, verbose = verbose)

## update the graph & sepset :
G <- pdsepRes\$G
sepset <- pdsepRes\$sepset
pMax <- pdsepRes\$pMax
allPdsep <- pdsepRes\$allPdsep
n.edgetestsPD <- pdsepRes\$n.edgetests
max.ordPD <- pdsepRes\$max.ord
if (conservative || maj.rule) {
if (verbose)
cat("\nCheck v-structures conservatively\n=================================\n")
tmp.pdsep <- new("pcAlgo", graph = as(G, "graphNEL"), call = cl,
n = integer(0), max.ord = as.integer(max.ordSKEL),
n.edgetests = n.edgetestsSKEL, sepset = sepset,
pMax = pMax, zMin = matrix(NA, 1, 1))
sk. <- pc.cons.intern(tmp.pdsep, suffStat, indepTest, alpha,
verbose = verbose, version.unf = c(1, 1),
maj.rule = maj.rule)
tripleList <- sk.\$unfTripl
## update the sepsets
sepset <- sk.\$sk@sepset
}
}
else {## !doPdsep : "do not Pdsep"
n.edgetestsPD <- 0
max.ordPD <- 0
allPdsep <- vector("list", p)
if (conservative || maj.rule) {
if (verbose)
cat("\nCheck v-structures conservatively\n=================================\n")
nopdsep <- pc.cons.intern(skel, suffStat, indepTest, alpha,
verbose = verbose, version.unf = c(2, 1),
maj.rule = maj.rule)
tripleList <- nopdsep\$unfTripl
## update the sepsets
sepset <- nopdsep\$sk@sepset
}
}
if (verbose)
cat("\nDirect egdes:\n=============\nUsing rules:", which(rules),
"\nCompute collider:\n")
res <- udag2pag(pag = G, sepset, rules = rules, unfVect = tripleList,
verbose = verbose)
colnames(res) <- rownames(res) <- labels
new("fciAlgo", amat = res, call = cl, n = integer(0),
max.ord = as.integer(max.ordSKEL),
max.ordPDSEP = as.integer(max.ordPD),
n.edgetests = n.edgetestsSKEL, n.edgetestsPDSEP = n.edgetestsPD,
sepset = sepset, pMax = pMax, allPdsep = allPdsep)
} ## {fci}

## only called in pdsep()
qreach <- function(x,amat,verbose = FALSE)
{
## Purpose: Compute possible-d-sep(x) ("psep")
## !! The non-zero entries in amat must be symmetric !!
## ----------------------------------------------------------------------
## Arguments:
## - x: node of which psep berechnet werden soll
##         amat[i,j] = 0 iff no edge btw i,j
##         amat[i,j] = 1 iff i *-o j
##         amat[i,j] = 2 iff i *-> j
## - verbose: Show checked node sequence
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date: 29 Oct 2009, 11:54
## Stopping:
## =========
## At every iteration, Q get's reduced by one. It is only increased by
## at least one, if there are edges in A. and then, at least one
## edge in A. is removed. Edges are never inserted into A..
## Thus, either Q or A. becomes empty and the loop stops.
## Runtime:
## ========
## At least O(|V|), since look up in adjacency matrix is made. Assume O(|E|)>O
## Every edge can be visited at most twice. At each visit, there are no
## more than max(deg(V_i)) neighboring edges to look at. Thus, the runtime is
## O(2*|E| * max(deg(V_i))) = O(|E|^2) [worst case]; O(|E|) if sparse in the
## sense that max(deg(V_i)) is constant.
## Correctness:
## ============
## (A) All nodes in PSEP have a path of legal triples from x.
## (B) If there is a legal path from x to y in amat, at least one of them
## is found and y is recorded in PSEP:
## Suppose there is a node y != x that has a legal path from x, but is not in
## PSEP. y cannot be in nbrs(x), because they are added and nothing is
## deleted from PSEP. Hence, there must be a node z which
## is in PSEP but has a neighbor w in amat that has a legal path from
## x but is not in PSEP.
## Assuming that the function legal is correct, and noting that at (*) all
## neighbors of z in A. (tmp!) are analyzed, it follows that w is not
## in adj(z) in A. (but in amat). Thus, w must have been removed
## before from adj(z). Because of (+), it could only have been removed if
## u-z-w was found legal at some point. But then, w would have been added
## to PSEP. This is a contradiction to the assumption that w is not in PSEP.

## check: quadratic; x in V; edgemarks ok; non-zeroes symmetric
stopifnot((ncol(amat) == nrow(amat)),x <= ncol(amat),all(amat %in% c(0,1,2)),
all((amat != 0) == (t(amat != 0))))

A. <- (amat != 0) ## A.[i,j] is true  <===>  edge i--j
PSEP <- Q <- nb <- which(A.[x,])
P <- rep.int(x, length(Q))
A.[x,nb] <- FALSE ## delete edge to nbrs

while(length(Q) > 0) {
## Invariants:
## ===========
## (A1) length(Q) == length(P) > 0
## (A2) non-zero in A. -> non-zero in amat [no non-zero added]
## (A3) Q[i] and P[i] are adjacent in amat [using (A2)]
if (verbose) {
cat("\n-------------","\n")
cat("Queue Q:",Q,"\n")
cat("Queue P:",P,"\n")
}
a <- Q[1]
Q <- Q[-1]
pred <- P[1] ## not empty because of (A1)
P <- P[-1]
if (verbose) cat("Select",pred,"towards",a,"\n")
nb <- which(A.[a,]) ## (*)
if (verbose) cat("Check nbrs",nb,"\nLegal:")

for (b in nb) {
## Guaranteed: pred-a-b are a path because of (A3)
if (lres <- legal.path(pred,a,b,amat)) {
A.[a,b] <- FALSE ## remove b out of adj(a) in A. (+)
Q <- c(Q,b)
P <- c(P,a)
PSEP <- c(PSEP,b)
}
if (verbose) cat(if(lres)"T" else ".", "")
}
}
sort(setdiff(PSEP,x))
} ## {qreach}

## only called in fci() [by default:  doPdsep=TRUE]
pdsep <- function (skel, suffStat, indepTest, p, sepset, alpha, pMax, m.max = Inf,
pdsep.max = Inf, NAdelete = TRUE, unfVect = NULL,
biCC = FALSE, verbose = FALSE) ## FIXME: verbose : 2 --> qreach(verbose)
{
## Purpose: Compute Possible-D-SEP for each node, perform the condittional
##          independent tests and adapt graph accordingly
## ----------------------------------------------------------------------
## Arguments:
## - skel: Graph object returned by function skeleton
## - suffStat, indepTest: infofor the independence tests
## - p: number of nodes in the graph
## - sepset: Sepset that was used for finding the skeleton
## - alpha: niveau for the tests
## - pMax: Maximal p-values during estimation of skeleton
## - m.max: maximal size of the conditioning sets
## - pdsep.max: maximaum size of conditioning set for Possible-D-SEP
## - unfVect: vector containing the unfaithful triples, used for the
##   conservative orientation of the v-structures
## - biCC: if the biconnected components have to be used
## ----------------------------------------------------------------------
## Value:
## - G: Updated boolean adjacency matrix
## - sepset: Updated sepsets
## - pMax: Updated pMax
## - allPdsep: Possible d-sep for each node [list]
## ----------------------------------------------------------------------
## Author: Markus Kalisch, Date:  9 Dec 2009
## Modification: Diego Colombo; Martin Maechler

G <- (as(skel, "matrix") != 0)
n.edgetests <- rep(0, 1000)
ord <- 0L
allPdsep.tmp <- vector("list", p)
if(biCC)
conn.comp <- lapply(biConnComp(skel), as.numeric)
if (any(G)) {
amat <- G
ind <- which(G, arr.ind = TRUE)
storage.mode(amat) <- "integer" # (TRUE, FALSE) -->  (1, 0)
## Orient colliders
if (verbose) cat("\nCompute collider:\n")
for (i in seq_len(nrow(ind))) {
x <- ind[i, 1]
y <- ind[i, 2]
allZ <- setdiff(which(amat[y, ] != 0), x)
for (z in allZ) {
if (amat[x, z] == 0 &&
!(y %in% sepset[[x]][[z]] ||
y %in% sepset[[z]][[x]])) {

if (length(unfVect) == 0) { ## normal version -------------------
amat[x, y] <- amat[z, y] <- 2
if (verbose) cat("\n",x,"*->", y, "<-*", z, "\n")
}
else { ## conservative version : check if x-y-z is faithful
if (!any(unfVect == triple2numb(p,x,y,z), na.rm = TRUE) &&
!any(unfVect == triple2numb(p,z,y,x), na.rm = TRUE)) {
amat[x, y] <- amat[z, y] <- 2
if (verbose)
cat("\n",x,"*->", y, "<-*", z, "\n")
}
}
}
} ## for( z )
} ## for( i  )
allPdsep <- lapply(1:p, qreach, amat = amat)# verbose = (verbose >= 2)
allPdsep.tmp <- vector("list", p)
for(x in 1:p) {
if(verbose) cat("\nPossible D-Sep of", x, "is:", allPdsep[[x]], "\n")
if (any(an0 <- amat[x, ] != 0)) {
tf1 <- setdiff(allPdsep[[x]], x)
if(verbose) cat(sprintf("\ny = %3d\n.........\n", y))
tf <- setdiff(tf1, y)
## bi-connected components
if (biCC) {
for(cci in conn.comp) {
if (x %in% cci && y %in% cci)
break ## found it
}
bi.conn.comp <- setdiff(cci, c(x,y))
tf <- intersect(tf, bi.conn.comp)
if (verbose) {
cat("There is an edge between",x,"and",y,"\n")
cat("Possible D-Sep of", x,
"intersected with the biconnected component of",x,"and",y,
"is:", tf, "\n")
}
} ## if(biCC)
allPdsep.tmp[[x]] <- c(tf,y) ## you must add y to the set
## for the large scale simulations, we need to stop the algorithm if
## it takes to much time, i.e. sepset>25
if (length(tf) > pdsep.max) {
if(verbose)
cat("Size of Possible-D-SEP bigger than",pdsep.max,
". Break the search for the edge between", x,"and",y,"\n")
} else if (length(diff.set) > 0) {
done <- FALSE
ord <- 0L
while (!done && ord < min(length(tf), m.max)) {
ord <- ord + 1L
if(verbose) cat("ord = ", ord, "\n")
if (ord == 1) {
for (S in diff.set) {
pval <- indepTest(x, y, S, suffStat)
n.edgetests[ord + 1] <- n.edgetests[ord + 1] + 1
if (is.na(pval))
if (pval > pMax[x, y])
pMax[x, y] <- pval
if (pval >= alpha) {
amat[x, y] <- amat[y, x] <- 0
sepset[[x]][[y]] <- sepset[[y]][[x]] <- S
done <- TRUE
if (verbose)
cat("x=", x, " y=", y, " S=", S, ": pval =", pval, "\n")
break
}
}
}
else { ## ord > 1
tmp.combn <- combn(tf, ord) ## has  choose( |tf|, ord ) columns
for (k in seq_len(ncol(tmp.combn))) {
S <- tmp.combn[, k]
n.edgetests[ord + 1] <- n.edgetests[ord + 1] + 1
pval <- indepTest(x, y, S, suffStat)
if (is.na(pval))
if(pMax[x, y] < pval)
pMax[x, y] <- pval
if (pval >= alpha) {
amat[x, y] <- amat[y, x] <- 0
sepset[[x]][[y]] <- sepset[[y]][[x]] <- S
done <- TRUE
if (verbose)
cat("x=", x, " y=", y, " S=", S, ": pval =", pval, "\n")
break
}
}
} ## for(k ..)
}
else { ## ord > |adj.x| :
## check all combinations; no combination has been tested before
for (k in seq_len(ncol(tmp.combn))) {
S <- tmp.combn[, k]
n.edgetests[ord + 1] <- n.edgetests[ord + 1] + 1
pval <- indepTest(x, y, S, suffStat)
if (is.na(pval))
if(pMax[x, y] < pval)
pMax[x, y] <- pval
if (pval >= alpha) {
amat[x, y] <- amat[y, x] <- 0
sepset[[x]][[y]] <- sepset[[y]][[x]] <- S
done <- TRUE
if (verbose)
cat("x=", x, " y=", y, " S=", S, ": pval =", pval, "\n")
break
}
} ## for(k ..)
} ## else: { ord > |adj.x| }
} ## else

} ## while(!done ..)
}

} ## for(y ..)

} ## if(any( . ))

} ## for(x ..)
G[amat == 0] <- FALSE
G[amat == 1] <- TRUE
G[amat == 2] <- TRUE

} ## if(any(G))

list(G = G, sepset = sepset, pMax = pMax, allPdsep = allPdsep.tmp,
max.ord = ord, n.edgetests = n.edgetests[1:(ord + 1)])
} ## {pdsep}

udag2pag <- function(pag, sepset, rules = rep(TRUE,10), unfVect = NULL, verbose = FALSE, orientCollider = TRUE)
{
## Purpose: Transform the Skeleton of a pcAlgo-object to a PAG using
## the rules of Zhang. The output is an adjacency matrix.
## ----------------------------------------------------------------------
## Arguments:
## - pag: adjacency matrix of size pxp
## - sepset: list of all separation sets
## - rules: array of length 10 wich contains TRUE or FALSE corrsponding
##          to each rule. TRUE means the rule will be applied.
##          If rules==FALSE only R0 (minimal pattern) will be used
## - unfVect: Vector with ambiguous triples (coded as number using triple2numb)
## - verbose: 0 - no output, 1 - detailed output
## ----------------------------------------------------------------------
## Author: Diego Colombo, Date: 6 Mar 2009; cleanup: Martin Maechler, 2010
## update: Diego Colombo, 2012

## Notation:
## ----------------------------------------------------------------------
## 0: no edge
## 1: -o
## 3: - (tail)
## a=alpha
## b=beta
## c=gamma
## d=theta

stopifnot(is.logical(rules), length(rules) == 10)
if(!is.numeric(pag))  storage.mode(pag) <- "numeric"

if (any(pag != 0)) {
p <- as.numeric(dim(pag)[1])

## orient collider
if (orientCollider) {
ind <- which(pag == 1, arr.ind = TRUE)
for (i in seq_len(nrow(ind))) {
x <- ind[i, 1]
y <- ind[i, 2]
allZ <- setdiff(which(pag[y, ] != 0), x)
for (z in allZ) {
if (pag[x, z] == 0 && !((y %in% sepset[[x]][[z]]) ||
(y %in% sepset[[z]][[x]]))) {
if (length(unfVect) == 0) {
if (verbose) {
cat("\n", x, "*->", y, "<-*", z, "\n")
cat("Sxz=", sepset[[z]][[x]], "and",
"Szx=", sepset[[x]][[z]], "\n")
}
pag[x, y] <- pag[z, y] <- 2
}
else {
if (!any(unfVect == triple2numb(p, x, y, z), na.rm = TRUE) &&
!any(unfVect == triple2numb(p, z, y, x), na.rm = TRUE)) {
if (verbose) {
cat("\n", x, "*->", y, "<-*", z, "\n")
cat("Sxz=", sepset[[z]][[x]], "and",
"Szx=", sepset[[x]][[z]], "\n")
}
pag[x, y] <- pag[z, y] <- 2
}
}
}
}
}
} ## end: Orient collider

old_pag1 <- matrix(0, p, p)
while (any(old_pag1 != pag)) {
old_pag1 <- pag
##-- R1 ------------------------------------------------------------------
if (rules[1]) {
ind <- which((pag == 2 & t(pag) != 0), arr.ind = TRUE)
for (i in seq_len(nrow(ind))) {
a <- ind[i, 1]
b <- ind[i, 2]
indC <- which((pag[b, ] != 0 & pag[, b] == 1) & (pag[a, ] == 0 & pag[, a] == 0))
indC <- setdiff(indC, a)
if (length(indC) > 0) {
if (length(unfVect) == 0) {
pag[b, indC] <- 2
pag[indC, b] <- 3
if (verbose)
cat("\nRule 1",
"\nOrient:", a, "*->", b, "o-*", indC,
"as:", b, "->", indC, "\n")
}
else {
for (c in indC) {
if (!any(unfVect == triple2numb(p, a, b, c), na.rm = TRUE) &&
!any(unfVect == triple2numb(p, c, b, a), na.rm = TRUE)) {
pag[b, c] <- 2
pag[c, b] <- 3
if (verbose)
cat("\nRule 1",
"\nConservatively orient:", a, "*->", b, "o-*",
c, "as:", b, "->", c, "\n")
}
} ## for( c )
}
}
} ## for( i )
}
##-- R2 ------------------------------------------------------------------
if (rules[2]) {
ind <- which((pag == 1 & t(pag) != 0), arr.ind = TRUE)
for (i in seq_len(nrow(ind))) {
a <- ind[i, 1]
c <- ind[i, 2]
indB <- which((pag[a, ] == 2 & pag[, a] == 3 & pag[c, ] != 0 & pag[, c] == 2) | (pag[a, ] == 2 & pag[, a] != 0 & pag[c, ] == 3 & pag[, c] == 2))
if (length(indB) > 0) {
pag[a, c] <- 2
if (verbose) {
cat("\nRule 2","\n")
cat("Orient:", a, "->", indB, "*->", c, "or", a, "*->", indB, "->", c, "with", a, "*-o", c, "as:", a, "*->", c, "\n")
}
}
}
}
##-- R3 ------------------------------------------------------------------
if (rules[3]) {
ind <- which((pag != 0 & t(pag) == 1), arr.ind = TRUE)
for (i in seq_len(nrow(ind))) {
b <- ind[i, 1]
d <- ind[i, 2]
indAC <- which((pag[b, ] != 0 & pag[, b] == 2) & (pag[, d] == 1 & pag[d, ] != 0))
if (length(indAC) >= 2) {
if (length(unfVect) == 0) {
counter <- 0
while ((counter < (length(indAC) - 1)) && (pag[d, b] != 2)) {
counter <- counter + 1
ii <- counter
while (ii < length(indAC) && pag[d, b] != 2) {
ii <- ii + 1
if (pag[indAC[counter], indAC[ii]] == 0 && pag[indAC[ii], indAC[counter]] == 0) {
if (verbose) {
cat("\nRule 3","\n")
cat("Orient:", d, "*->", b, "\n")
}
pag[d, b] <- 2
}
}
}
}
else {
comb.indAC <- combn(indAC, 2)
for (j in 1:dim(comb.indAC)[2]) {
a <- comb.indAC[1, j]
c <- comb.indAC[2, j]
if (pag[a, c] == 0 && pag[c, a] == 0 && c != a) {
if (!any(unfVect == triple2numb(p, a, d, c), na.rm = TRUE) &&
!any(unfVect == triple2numb(p, c, d, a), na.rm = TRUE)) {
pag[d, b] <- 2
if (verbose) {
cat("\nRule 3","\n")
cat("Conservatively orient:", d, "*->", b, "\n")
}
}
}
}
}
}
}
}
##-- R4 ------------------------------------------------------------------
if (rules[4]) {
ind <- which((pag != 0 & t(pag) == 1), arr.ind = TRUE)## b o-* c
while (length(ind) > 0) {
b <- ind[1, 1]
c <- ind[1, 2]
ind <- ind[-1,, drop = FALSE]
## find all a s.t. a -> c and a <-* b
indA <- which((pag[b, ] == 2 & pag[, b] != 0) &
(pag[c, ] == 3 & pag[, c] == 2))
## chose one a s.t. the initial triangle structure exists and the edge hasn't been oriented yet
while (length(indA) > 0 && pag[c,b] == 1) {
a <- indA[1]
indA <- indA[-1]
## path is the initial triangle
## abc <- c(a, b, c)
## Done is TRUE if either we found a minimal path or no path exists for this triangle
Done <- FALSE
### MM: FIXME?? Isn't  Done  set to TRUE in *any* case inside the following
### while(.), the very first time already ??????????
while (!Done && pag[a,b] != 0 && pag[a,c] != 0 && pag[b,c] != 0) {
## find a minimal discriminating path for a,b,c
md.path <- minDiscrPath(pag, a,b,c, verbose = verbose)
## if the path doesn't exists, we are done with this triangle
if ((N.md <- length(md.path)) == 1) {
Done <- TRUE
}
else {
## a path exists
## if b is in sepset
if ((b %in% sepset[[md.path[1]]][[md.path[N.md]]]) ||
(b %in% sepset[[md.path[N.md]]][[md.path[1]]])) {
if (verbose)
cat("\nRule 4",
"\nThere is a discriminating path between",
md.path[1], "and", c, "for", b, ",and", b, "is in Sepset of",
c, "and", md.path[1], ". Orient:", b, "->", c, "\n")
pag[b, c] <- 2
pag[c, b] <- 3
}
else {
## if b is not in sepset
if (verbose)
cat("\nRule 4",
"\nThere is a discriminating path between",
md.path[1], "and", c, "for", b, ",and", b, "is not in Sepset of",
c, "and", md.path[1], ". Orient:", a, "<->", b, "<->",
c, "\n")
pag[a, b] <- pag[b, c] <- pag[c, b] <- 2
}
Done <- TRUE
}
}
}
}
}
##-- R5 ------------------------------------------------------------------
if (rules[5]) {
ind <- which((</```