####file containing pc algorithm
###first version is following the description in the pcalg package documentation
##G_0 is the initial adjacency matrix - usually a complete graph but made an input
##for added flexibility
pcalg_RCIT <- function(obsDat,
agg_info,
test_opts,
alpha,
G_0 = NULL,
supprMessages = FALSE,
n_cl=NULL,
run_parallel=TRUE,
n_subset=100){
##messages
# print(paste0("Running PC algorithm with ", test))
if(is.null(n_cl) & run_parallel){
warning("run_parallel is TRUE but number of cores it not set; running sequentially")
run_parallel <- FALSE
}else{
if(run_parallel) print(paste0("Running in parallel with ", n_cl, " cores")) else print("Running sequentially")
}
nvar <- length(obsDat)
#if G_0 is not supplied, make it a complete graph
if(is.null(G_0)){
G_0 <- matrix(1, nrow=nvar, ncol=nvar) - diag(nvar)
}
#run step 1 of the algorithm to get the skeleton
print("Starting skeleton algorithm")
skeleton <- pcskel_RCIT(G_0=G_0,
obsDat=obsDat,
alpha=alpha,
agg_info=agg_info,
test_opts=test_opts,
supprMessages=supprMessages,
n_cl=n_cl,
run_parallel=run_parallel,
n_subset=n_subset)
print("Skeleton found")
#skeleton matrix
G_skel <- skeleton[[1]]
G_out <- G_skel
G_temp <- G_out
#separating sets
S <- skeleton[[2]]
#get all unshielded triples i-j-k, for each orient them if
#j is not in the sepset of i,k
#actually keep track of any that should be oriented and orient them
#afterwards if there are no contradictions
triples <- getUnTrpls(G_skel)
#tripleMat is a matrix for keeping track of which edges should be oriented
tripleMat <- matrix(0, ncol=nvar, nrow=nvar)
for(triple in triples){
i <- triple[1]
j <- triple[2]
k <- triple[3]
#if j is not in sepset orient
if(!(j %in% S[[deparse(c(i, k))]])){
tripleMat[i, j] <- 1
tripleMat[k, j] <- 1
}
}
#go through edges in tripleMat, if i-j is > 0 but j-i is 0 there is no disagreement
#and we can orient
for(i in 1:nvar){
for(j in 1:nvar){
if(tripleMat[i, j] > 0 & tripleMat[j, i] == 0){
G_temp[j, i] <- 0
}
}
}
##apply rules 1-3 repeatedly until nothing changes
changed <- TRUE
while(changed){
rule1.out <- .rule1(G_temp)
rule2.out <- .rule2(rule1.out[[1]])
rule3.out <- .rule3(rule2.out[[1]])
G_temp <- rule3.out[[1]]
changed <- any(c(rule1.out[[2]], rule2.out[[2]], rule3.out[[2]]))
}
##if at any time we deleted both directions of an edge, this is wrong
##so put them back (unless they were not there in the original
##graph - these are the extra if statements inside)
for(i in 1:nvar){
for(j in 1:i){
if(anyEdge(G_out, i, j) && !anyEdge(G_temp, i, j)){
if(G_0[i, j] == 1) G_temp[i, j] <- 1
if(G_0[j, i] == 1) G_temp[j, i] <- 1
}
}
}
G_out <- G_temp
print("CPDAG found")
return(list(G_out, G_skel, S))
}
##function that produces the skeleton (i.e. undirected acyclic graph) using order-independent
##modifications to the algorithm.
pcskel_RCIT <- function(G_0,
obsDat,
alpha,
agg_info,
test_opts,
supprMessages,
n_cl,
run_parallel,
n_subset){
#G_old is the graph we will use for the adj sets during each loop
#G_new is the graph with edges deleted as we go
#need both since G_1 is only updated to G_2 each time the size of the
#conditioning set grows
G_old <- G_0
G_new <- G_0
`%do_switch%` <- ifelse(run_parallel, `%dopar%`, `%do%`)
#S is the list of seperation sets
S <- list()
##Step 1:Test for unconditional independence
#get list of edges from adjacency matrix
edges <- getEdges(G_old)
if(run_parallel){
cl<-makeCluster(n_cl)
registerDoParallel(cl)
}
comb <- function(x, ...) {
lapply(seq_along(x),
function(i) c(x[[i]], lapply(list(...), function(y) y[[i]])))
}
z <- foreach(i = 1:length(edges), .combine = comb,
.init = list(list(), list()),
.packages=c('RCITcpp')) %do_switch%{
edge <- edges[[i]]
#format obsdat
testDat <- RCITcpp::format_vars(obsDat[edge],
agg_info$agg_levels[edge],
agg_info$poly_static,
agg_info$poly_dynamic,
agg_info$pixel_static,
agg_info$pixel_dynamic,
agg_info$times_dynamic,
agg_info$inc_times,
agg_info$inc_poly,
agg_info$pop_dynamic,
agg_info$pop_static,
n_subset)
# function(x, y,
# poly_start,
# poly_end,
# population,
# n_rff=5, n_bs=100, get_ts=FALSE){
if(RCITcpp::RIT_disag_wrapper_v2(testDat$obs[[1]],
testDat$obs[[2]],
testDat$start_index,
testDat$end_index,
testDat$pop,
test_opts$n_rff,
test_opts$n_bs
) < alpha){
# if(independence_test_wrapper(test_dat$obs[[1]], test_dat$obs[[2]], alpha=alpha, independence_test, ...)){
# # if(fstest(obsDat[[edge[1]]], obsDat[[edge[2]]], alpha=alpha, test=TRUE,
# # n_rff=n_rff)){
edgeremove <- NULL
sepset <- NULL
}else{
edgeremove <- edge
sepset <- NA
}
list(edgeremove, sepset)
}
edgeremove <- z[[1]]
sepsets <- z[[2]]
keep <- !unlist(lapply(edgeremove, is.null))
edgeremove <- edgeremove[keep]
sepsets <- sepsets[keep]
if(length(edgeremove) > 0){
for(k in 1:length(edgeremove)){
edge <- edgeremove[[k]]
sepset <- sepsets[[k]]
i <- edge[1]
j <- edge[2]
G_new[i, j] <- 0
G_new[j, i] <- 0
S[[deparse(c(i, j))]] <- NA
S[[deparse(c(j, i))]] <- NA
}
}
#update adjacency matrix (actually could have just updated G_old for this
#whole step, done to make it conceptually similar to later)
G_old <- G_new
if(length(getEdges(G_old)) == 0){
if(run_parallel) stopCluster(cl)
return(list(G_old, S))
}
##get size of the maximum conditioning set...how long does it take??
ptm <- proc.time()
edges <- getEdges(G_old)
maxsize <- 0
belowMaxCondSize <- TRUE
for(edge in edges){
i <- edge[1]
j <- edge[2]
adjSet <- getAdj(G_old, i, j)
maxsize <- max(maxsize, length(adjSet))
}
##Step 2: Test for conditional independence
condSetSize <- 1
edges <- getEdges(G_old)
#for each edge, check if the adjacency set is big enough (both directions)
#to condition on
#variable maxCondSize checks if the biggest possible size has been reached for
#the conditioning sets
belowMaxCondSize <- TRUE
while(belowMaxCondSize){
belowMaxCondSize <- FALSE
if(!supprMessages) print(paste0("condSetSize ", condSetSize))
#go through each edge
z <- foreach(i = 1:length(edges), .combine = comb,
.init = list(list(), list()),
.packages=c('RCITcpp'),
.export=c('getAdj', 'getSubsets')) %do_switch% {
edge <- edges[[i]]
if(!supprMessages) print(paste0("testing edge ", edge[1], ",", edge[2], " with size ", condSetSize))
i <- edge[1]
j <- edge[2]
brokenEdge <- FALSE
#do the i, j direction
#get set adj(G,i)\j
adjSet <- getAdj(G_old, i, j)
#if the adjacency set is bigger than conditioning set size, test
#conditional independence
if(length(adjSet) >= condSetSize){
belowMaxCondSize <- TRUE
#test independence conditioning on all subsets of right size
condSets <- getSubsets(adjSet, condSetSize)
for(condSet in condSets){
testDat <- RCITcpp::format_vars(obsDat[c(i, j, condSet)],
agg_info$agg_levels[c(i, j, condSet)],
agg_info$poly_static,
agg_info$poly_dynamic,
agg_info$pixel_static,
agg_info$pixel_dynamic,
agg_info$times_dynamic,
agg_info$inc_times,
agg_info$inc_poly,
agg_info$pop_dynamic,
agg_info$pop_static,
n_subset)
#make a matrix of conditioning data
condData <- do.call(cbind, testDat$obs[-(1:2)])
#if they are conditionally independent, remove edge, add condSet to
#separation set list and stop trying this edge
if(RCITcpp::RCIT_disag_wrapper_v2(testDat$obs[[1]], testDat$obs[[2]], condData,
testDat$start_index,
testDat$end_index,
testDat$pop,
test_opts$n_rff,
test_opts$n_rff_z,
test_opts$n_bs) > alpha){
# if(!CI_test_wrapper(testDat$obs[1], testDat$obs[[2]], condData, alpha=alpha, CI_test, ...)){
# if(!condtest(obsDat[[i]], obsDat[[j]], condData, alpha=alpha,
# n_rff=n_rff, n_rffz=n_rffz)){
edgeremove <- edge
sepset <- condSet
brokenEdge <- TRUE
break
}
}
}
if(!brokenEdge){
#do j, i direction
i <- edge[1]
j <- edge[2]
#do the i, j direction
#get set adj(G,i)\j
adjSet <- getAdj(G_old, j, i)
#if the adjacency set is bigger than conditioning set size, test
#conditional independence
if(length(adjSet) >= condSetSize){
#test independence conditioning on all subsets of right size
condSets <- getSubsets(adjSet, condSetSize)
for(condSet in condSets){
testDat <- RCITcpp::format_vars(obsDat[c(i, j, condSet)],
agg_info$agg_levels[c(i, j, condSet)],
agg_info$poly_static,
agg_info$poly_dynamic,
agg_info$pixel_static,
agg_info$pixel_dynamic,
agg_info$times_dynamic,
agg_info$inc_times,
agg_info$inc_poly,
agg_info$pop_dynamic,
agg_info$pop_static,
n_subset)
#make a matrix of conditioning data
condData <- do.call(cbind, testDat$obs[-(1:2)])
#if they are conditionally independent, remove edge, add condSet to
#separation set list and stop trying this edge
if(RCITcpp::RCIT_disag_wrapper_v2(testDat$obs[[1]], testDat$obs[[2]], condData,
testDat$start_index,
testDat$end_index,
testDat$pop,
test_opts$n_rff,
test_opts$n_rff_z,
test_opts$n_bs) > alpha){
#if they are conditionally independent, remove edge, add condSet to
#separation set list and stop trying this edge
# if(!CI_test_wrapper(obsDat[[i]], obsDat[[j]], condData, alpha=alpha, CI_test, ...)){
# if(!condtest(obsDat[[i]], obsDat[[j]], condData, alpha=alpha)){
edgeremove <- edge
sepset <- condSet
brokenEdge <- TRUE
break
}
}
}
}
if(!brokenEdge){
edgeremove <- NULL
sepset <- NULL
}
list(edgeremove, sepset)
}
#update the adjacency matrix with edges remove
edgeremove <- z[[1]]
# print(length(z[[1]]))
sepsets <- z[[2]]
# print(length(z[[2]]))
# print(edgeremove)s
keep <- !unlist(lapply(edgeremove, is.null))
edgeremove <- edgeremove[keep]
sepsets <- sepsets[keep]
if(length(edgeremove) > 0){
for(k in 1:length(edgeremove)){
edge <- edgeremove[[k]]
sepset <- sepsets[[k]]
i <- edge[1]
j <- edge[2]
G_new[i, j] <- 0
G_new[j, i] <- 0
S[[deparse(c(i, j))]] <- sepset
S[[deparse(c(j, i))]] <- sepset
}
}
G_old <- G_new
condSetSize <- condSetSize + 1
edges <- getEdges(G_old)
maxsize <- 0
for(edge in edges){
i <- edge[1]
j <- edge[2]
adjSet <- getAdj(G_old, i, j)
maxsize <- max(maxsize, length(adjSet))
}
if(!supprMessages) print(paste0("maxsize ", maxsize))
belowMaxCondSize <- maxsize >= condSetSize
}
if(run_parallel) stopCluster(cl)
colnames(G_old) <- names(obsDat)
return(list(G_old, S))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.