Nothing
#' @import V8 jsonlite
#' @importFrom boot boot
#' @importFrom MASS ginv
#' @importFrom grDevices dev.size
#' @importFrom methods is
#' @importFrom utils tail
#' @importFrom stats as.formula coef confint cov cor lm pnorm qnorm quantile runif loess sd
#' @importFrom graphics abline arrows axis lines par plot plot.new segments strheight strwidth text xspline
NULL
#' Get Graph Type
#'
#' @param x the input graph.
#' @examples
#' graphType( "mag{ x<-> y }" ) == "mag"
#' @export
#'
graphType <- function( x ){
x <- as.dagitty( x )
xv <- .getJSVar()
r <- NULL
tryCatch({
.jsassigngraph( xv, x )
.jsassign( xv, .jsp("global.",
xv,".getType()") )
r <- .jsget( xv )
},
error=function(e) stop(e),
finally={.deleteJSVar(xv)})
r
}
'graphType<-' <- function( x, value=c("dag","mag","pdag","pag") ){
value <- match.arg(value)
x <- as.dagitty( x )
xv <- .getJSVar()
r <- NULL
tryCatch({
.jsassigngraph( xv, x )
.jseval( paste0("global.",xv,".setType('",value,"')") )
r <- .jsgetgraph( xv )
},
error=function(e) stop(e),
finally={.deleteJSVar(xv)})
r
}
#' Get Bundled Examples
#'
#' Provides access to the builtin examples of the dagitty website.
#'
#' @param x name of the example, or part thereof. Supported values are:
#' \itemize{
#' \item{"M-bias"}{ the M-bias graph.}
#' \item{"confounding"}{ an extended confounding triangle.}
#' \item{"mediator"}{ a small model with a mediator.}
#' \item{"paths"}{ a graph with many variables but few paths}
#' \item{"Sebastiani"}{ a small part of a genetics study (Sebastiani et al., 2005)}
#' \item{"Polzer"}{ DAG from a dentistry study (Polzer et al., 2012)}
#' \item{"Schipf"}{ DAG from a study on diabetes (Schipf et al., 2010)}
#' \item{"Shrier"}{ DAG from a classic sports medicine example (Shrier & Platt, 2008)}
#' \item{"Thoemmes"}{ DAG with unobserved variables
#' (communicated by Felix Thoemmes, 2013)}.
#' \item{"Kampen"}{ DAG from a psychiatry study (van Kampen, 2014)}
#' }
#' @references
#' Sabine Schipf, Robin Haring, Nele Friedrich, Matthias Nauck, Katharina Lau,
#' Dietrich Alte, Andreas Stang, Henry Voelzke, and Henri Wallaschofski (2011),
#' Low total testosterone is associated with increased risk of incident
#' type 2 diabetes mellitus in men: Results from the study of health in
#' pomerania (SHIP). \emph{The Aging Male} \bold{14}(3):168--75.
#'
#' Paola Sebastiani, Marco F. Ramoni, Vikki Nolan, Clinton T. Baldwin, and
#' Martin H. Steinberg (2005), Genetic dissection and prognostic modeling of overt
#' stroke in sickle cell anemia. \emph{Nature Genetics}, \bold{37}:435--440.
#'
#' Ian Shrier and Robert W. Platt (2008),
#' Reducing bias through directed acyclic graphs.
#' \emph{BMC Medical Research Methodology}, \bold{8}(70).
#'
#' Ines Polzer, Christian Schwahn, Henry Voelzke, Torsten Mundt, and Reiner
#' Biffar (2012), The association of tooth loss with all-cause and circulatory
#' mortality. Is there a benefit of replaced teeth? A systematic review and
#' meta-analysis. \emph{Clinical Oral Investigations}, \bold{16}(2):333--351.
#'
#' Dirk van Kampen (2014),
#' The SSQ model of schizophrenic prodromal unfolding revised: An
#' analysis of its causal chains based on the language of directed graphs.
#' \emph{European Psychiatry}, \bold{29}(7):437--48.
#'
#' @examples
#' g <- getExample("Shrier")
#' plot(g)
#'
#' @export
getExample <- function( x ){
ct <- .getJSContext()
xv <- .getJSVar()
tryCatch({
.jsassign( xv, x )
r <- ct$eval( paste0("DagittyR.findExample(global.",xv,")") )
}, finally={.deleteJSVar(xv)})
if( r != "undefined" ){
structure(r,class="dagitty")
} else {
stop("Example ",x," could not be found!")
}
}
#' Simulate Data from Structural Equation Model
#'
#' Interprets the input graph as a structural equation model, generates random path
#' coefficients, and simulates data from the model. This is a very bare-bones
#' function and probably not very useful
#' except for quick validation purposes (e.g. checking that an implied vanishing
#' tetrad truly vanishes in simulated data). For more elaborate simulation studies, please
#' use the lavaan package or similar facilities in other packages.
#'
#' @details Data are generated in the following manner.
#' Each directed arrow is assigned a path coefficient that can be given using the attribute
#' "beta" in the model syntax (see the examples). All coefficients not set in this manner are
#' set to the \code{b.default} argument, or if that is not given, are chosen uniformly
#' at random from the interval given by \code{b.lower} and \code{b.upper} (inclusive; set
#' both parameters to the same value for constant path coefficients). Each bidirected
#' arrow a <-> b is replaced by a substructure a <- L -> b, where L is an exogenous latent
#' variable. Path coefficients on such substructures are set to \code{sqrt(x)}, where
#' \code{x} is again chosen at random from the given interval; if \code{x} is negative,
#' one path coefficient is set to \code{-sqrt(x)} and the other to \code{sqrt(x)}. All
#' residual variances are set to \code{eps}.
#'
#' @param x the input graph, a DAG (which may contain bidirected edges).
#' @param N number of samples to generate.
#' @param b.lower lower bound for random path coefficients, applied if \code{b.default=NULL}.
#' @param b.upper upper bound for path coefficients.
#' @param b.default default path coefficient applied to arrows for which no coefficient is
#' defined in the model syntax.
#' @param eps residual variance (only meaningful if \code{standardized=FALSE}).
#' @param standardized whether a standardized output is desired (all variables have variance 1).
#'
#' If \code{standardized=TRUE}, all path coefficients are interpreted as standardized coefficients.
#' But not all standardized coefficients are compatible with all graph structures.
#' For instance, the graph structure z <- x -> y -> z is incompatible with standardized
#' coefficients of 0.9, since this would imply that the variance of z must be larger than
#' 1. For large graphs with many parallel paths, it can be very difficult to find coefficients
#' that work.
#'
#' @return Returns a data frame containing \code{N} values for each variable in \code{x}.
#'
#' @examples
#' ## Simulate data with pre-defined path coefficients of -.6
#' g <- dagitty('dag{z -> x [beta=-.6] x <- y [beta=-.6] }')
#' x <- simulateSEM( g )
#' cov(x)
#'
#'
#' @export
simulateSEM <- function( x, b.default=NULL, b.lower=-.6, b.upper=.6, eps=1, N=500, standardized=TRUE ){
if( !requireNamespace( "MASS", quietly=TRUE ) ){
stop("This function requires the 'MASS' package!")
}
.supportsTypes( x, c("dag") )
x <- as.dagitty( x )
e <- .edgeAttributes( x, "beta" )
e$a <- as.double(as.character(e$a))
b.not.set <- is.na(e$a)
if( is.null( b.default ) ){
e$a[b.not.set] <- runif(sum(b.not.set),b.lower,b.upper)
} else {
e$a[b.not.set] <- b.default
}
ovars <- names(x)
nV <- length(ovars)
nL <- sum(e$e=="<->")
if( nrow(e) > 0 ){
vars <- paste0("v",ovars)
if( nL > 0 ){
lats <- paste0("l",seq_len(nL))
} else {
lats <- c()
}
Beta <- matrix( 0, nrow=nV+nL, ncol=nV+nL )
rownames(Beta) <- colnames(Beta) <- c(vars,lats)
cL <- 1
for( i in seq_len( nrow(e) ) ){
b <- e$a[i]
if( e$e[i] == "<->" ){
lV <- paste0("l",cL)
lb <- sqrt(abs(b))
Beta[lV,paste0("v",e$v[i])] <- lb
if( b < 0 ){
Beta[lV,paste0("v",e$w[i])] <- -lb
} else {
Beta[lV,paste0("v",e$w[i])] <- lb
}
cL <- cL + 1
} else if( e$e[i] == "->" ){
Beta[paste0("v",e$v[i]),paste0("v",e$w[i])] <- b
}
}
L <- (diag( 1, nV+nL ) - Beta)
Li <- MASS::ginv( L )
if( standardized == TRUE ){
Phi <- MASS::ginv( t(Li)^2 ) %*% rep(eps,nrow(Beta))
Phi <- diag( c(Phi), nV+nL )
} else {
Phi <- diag( eps, nV+nL )
}
Sigma <- t(Li) %*% Phi %*% Li
} else {
Sigma <- diag(1,nV+nL)
}
r <- MASS::mvrnorm( N, rep(0,nV+nL), Sigma )[,1:nV]
colnames(r) <- ovars
r <- as.data.frame(r)
r[,setdiff(ovars,latents(x))]
}
#' Ancestral Relations
#'
#' Retrieve the names of all variables in a given graph that are in the specified
#' ancestral relationship to the input variable \code{v}.
#'
#' @param x the input graph, of any type.
#' @param v name(s) of variable(s).
#'
#' \code{descendants(x,v)} retrieves variables that are are reachable from \code{v} via
#' a directed path.
#'
#' \code{ancestors(x,v)} retrieves variables from which \code{v} is reachable via a
#' directed path.
#'
#' \code{children(x,v)} finds all variables \code{w} connected to \code{v}
#' by an edge \eqn{v} -> \eqn{w}.
#'
#' \code{parents(x,v)} finds all variables \code{w} connected to \code{v}
#' by an edge \eqn{w} -> \eqn{v}.
#'
#' \code{markovBlanket(x,v}) returns \code{x}'s parents, its children, and all other
#' parents of its children. The Markov blanket always renders \code{x} independent
#' of all other nodes in the graph.
#'
#' By convention, \code{descendants(x,v)} and \code{ancestors(x,v)} include
#' \code{v} but \code{children(x,v)} and \code{parents(x,v)} do not.
#'
#' @name AncestralRelations
#'
#' @examples
#' g <- dagitty("graph{ a <-> x -> b ; c -- x <- d }")
#' descendants(g,"x")
#' parents(g,"x")
#' spouses(g,"x")
#'
NULL
#' @rdname AncestralRelations
#' @export
descendants <- function( x, v ){
.kins( x, v, "descendants" )
}
#' @rdname AncestralRelations
#' @export
ancestors <- function( x, v ){
.kins( x, v, "ancestors" )
}
#' @rdname AncestralRelations
#' @export
children <- function( x, v ){
.kins( x, v, "children" )
}
#' @rdname AncestralRelations
#' @export
parents <- function( x, v ){
.kins( x, v, "parents" )
}
#' @rdname AncestralRelations
#' @export
neighbours <- function( x, v ){
.kins( x, v, "neighbours" )
}
#' @rdname AncestralRelations
#' @export
spouses <- function( x, v ){
.kins( x, v, "spouses" )
}
#' @rdname AncestralRelations
#' @export
adjacentNodes <- function( x, v ){
.kins( x, v, "adjacentNodes" )
}
#' @rdname AncestralRelations
#' @export
markovBlanket <- function( x, v ){
setdiff( union( union( parents( x, v ), children( x, v ) ),
parents( x, children( x, v ) ) ), v )
}
#' Orient Edges in PDAG.
#'
#' Orients as many edges as possible in a partially directed acyclic graph (PDAG)
#' by converting induced subgraphs
#' X -> Y -- Z to X -> Y -> Z.
#'
#' @param x the input graph, a PDAG.
#'
#' @examples
#' orientPDAG( "pdag { x -> y -- z }" )
#' @export
#'
orientPDAG <- function( x ){
.supportsTypes( x, "pdag" )
.graphTransformer( x, "cgToRcg" )
}
#' Generating Equivalent Models
#'
#' \code{equivalenceClass(x)} generates a complete partially directed acyclic graph
#' (CPDAG) from an input DAG \code{x}. The CPDAG represents all graphs that are Markov
#' equivalent to \code{x}: undirected
#' edges in the CPDAG can be oriented either way, as long as this does not create a cycle
#' or a new v-structure (a sugraph a -> m <- b, where a and b are not adjacent).
#'
#' \code{equivalentDAGs(x,n)} enumerates at most \code{n} DAGs that are Markov equivalent
#' to \code{x}.
#'
#' @param x the input graph, a DAG.
#' @param n maximal number of returned graphs.
#' @name EquivalentModels
#' @examples
#' # How many equivalent DAGs are there for the sports DAG example?
#' g <- getExample("Shrier")
#' length(equivalentDAGs(g))
#' # Plot all equivalent DAGs
#' par( mfrow=c(2,3) )
#' lapply( equivalentDAGs(g), plot )
#' # How many edges can be reversed without changing the equivalence class?
#' sum(edges(equivalenceClass(g))$e == "--")
NULL
#' @rdname EquivalentModels
#' @export
equivalenceClass <- function( x ){
x <- as.dagitty( x )
.supportsTypes( x, "dag" )
.graphTransformer( x, "dagToCpdag" )
}
#' @rdname EquivalentModels
#' @export
equivalentDAGs <- function( x, n=100 ){
x <- as.dagitty(x)
.supportsTypes( x, "dag" )
xv <- .getJSVar()
r <- NULL
tryCatch({
.jsassigngraph( xv, x )
.jsassign( xv, .jsp("_.map(GraphTransformer.markovEquivalentDags(global.",
xv,",",n,"),function(x){return x.toString()})") )
r <- .jsget( xv )
},
error=function(e) stop(e),
finally={.deleteJSVar(xv)})
lapply( r, dagitty )
}
#' Moral Graph
#'
#' Graph obtained from \code{x} by (1) \dQuote{marrying} (inserting an undirected
#' ede between) all nodes that have common children, and then replacing all edges
#' by undirected edges. If \code{x} contains bidirected edges, then all sets of
#' nodes connected by a path containing only bidirected edges are treated like a
#' single node (see Examples).
#'
#' @param x the input graph, a DAG, MAG, or PDAG.
#'
#' @examples
#' # returns a complete graph
#' moralize( "dag{ x->m<-y }" )
#' # also returns a complete graph
#' moralize( "dag{ x -> m1 <-> m2 <-> m3 <-> m4 <- y }" )
#'
#' @export
moralize <- function( x ){
.supportsTypes( x, c("dag","mag","pdag") )
.graphTransformer( x, "moralGraph" )
}
#' Back-Door Graph
#'
#' Removes every first edge on a proper causal path from \code{x}.
#' If \code{x} is a MAG or PAG, then only \dQuote{visible} directed
#' edges are removed (Zhang, 2008).
#'
#' @param x the input graph, a DAG, MAG, PDAG, or PAG.
#'
#' @references
#' J. Zhang (2008), Causal Reasoning with Ancestral Graphs.
#' \emph{Journal of Machine Learning Research} 9: 1437-1474.
#'
#' @examples
#' g <- dagitty( "dag { x <-> m <-> y <- x }" )
#' backDoorGraph( g ) # x->y edge is removed
#'
#' g <- dagitty( "mag { x <-> m <-> y <- x }" )
#' backDoorGraph( g ) # x->y edge is not removed
#'
#' g <- dagitty( "mag { x <-> m <-> y <- x <- i }" )
#' backDoorGraph( g ) # x->y edge is removed
#'
#' @export
backDoorGraph <- function( x ){
x <- as.dagitty(x)
.supportsTypes( x, c("dag","mag","pdag","pag") )
.graphTransformer( x, "backDoorGraph" )
}
#' Ancestor Graph
#'
#' Creates the induced subgraph containing only the vertices
#' in \code{v}, their ancestors, and the edges between them. All
#' other vertices and edges are discarded.
#'
#' @param x the input graph, a DAG, MAG, or PDAG.
#' @param v variable names.
#'
#' @details If the input graph is a MAG or PDAG, then all *possible* ancestors
#' will be returned (see Examples).
#'
#' @examples
#' g <- dagitty("dag{ z <- x -> y }")
#' ancestorGraph( g, "z" )
#'
#' g <- dagitty("pdag{ z -- x -> y }")
#' ancestorGraph( g, "y" ) # includes z
#'
#' @export
ancestorGraph <- function( x, v=NULL ){
x <- as.dagitty(x)
.supportsTypes( x, c("dag","mag","pdag") )
if( is.null(v) ){
v <- c(exposures(x),outcomes(x),adjustedNodes(x))
} else {
v <- as.list(v)
}
xv <- .getJSVar()
xv2 <- .getJSVar()
r <- NULL
tryCatch({
.jsassigngraph( xv, x )
.jsassign( xv2, v )
.jsassign( xv2, .jsp("DagittyR.getVertices(global.",xv,",global.",xv2,")") )
.jsassign( xv, .jsp("GraphTransformer.ancestorGraph(global.",xv,",global.",xv2,")") )
r <- .jsgetgraph( xv )
},
error=function(e) stop(e),
finally={.deleteJSVar(xv);.deleteJSVar(xv2)})
r
}
#' Variable Statuses
#'
#' Get or set variables with a given status in a graph. Variables in dagitty graphs can
#' have one of several statuses. Variables with status \emph{exposure} and
#' \emph{outcome} are important when determining causal effects via the functions
#' \code{\link{adjustmentSets}} and \code{\link{instrumentalVariables}}. Variables
#' with status \emph{latent} are assumed
#' to be unobserved variables or latent constructs, which is respected when deriving
#' testable implications of a graph via the functions
#' \code{\link{impliedConditionalIndependencies}} or \code{\link{vanishingTetrads}}.
#'
#' \code{setVariableStatus} first removes the given status from all variables in the graph
#' that had it, and then sets it on the given variables.
#' For instance, if \code{status="exposure"} and \code{value="X"} are given, then
#' \code{X} will be the only exposure in the resulting graph.
#'
#' @param x the input graph, of any type.
#' @param value character vector; names of variables to receive the given status.
#' @param status character, one of "exposure", "outcome" or "latent".
#'
#' @name VariableStatus
#'
#' @examples
#' g <- dagitty("dag{ x<->m<->y<-x }") # m-bias graph
#' exposures(g) <- "x"
#' outcomes(g) <- "y"
#' adjustmentSets(g)
#'
NULL
#' @rdname VariableStatus
#' @export
exposures <- function( x ){
.nodesWithProperty( x, "source" )
}
#' @rdname VariableStatus
#' @export
'exposures<-' <- function( x, value ){
setVariableStatus(x, "exposure", value )
}
#' @rdname VariableStatus
#' @export
outcomes <- function( x ){
.nodesWithProperty( x, "target" )
}
#' @rdname VariableStatus
#' @export
'outcomes<-' <- function( x, value ){
setVariableStatus(x, "outcome", value )
}
#' @rdname VariableStatus
#' @export
latents <- function( x ){
.nodesWithProperty( x, "latentNode" )
}
#' @rdname VariableStatus
#' @export
'latents<-' <- function( x, value ){
setVariableStatus(x, "latent", value )
}
#' @rdname VariableStatus
#' @export
adjustedNodes <- function( x ){
.nodesWithProperty( x, "adjustedNode" )
}
#' @rdname VariableStatus
#' @export
'adjustedNodes<-' <- function( x, value ){
setVariableStatus(x, "adjustedNode", value )
}
#' @rdname VariableStatus
#' @export
setVariableStatus <- function( x, status, value ) {
allowed.statuses <- c("exposure","outcome","latent","adjustedNode")
if( !(status %in% allowed.statuses) ){
stop( "Status must be one of: ", paste(allowed.statuses,collapse=", ") )
}
xv <- .getJSVar()
vv <- .getJSVar()
tryCatch({
.jsassign( xv, as.character(x) )
.jsassign( xv, .jsp("GraphParser.parseGuess(global.",xv,")") )
if( status == "exposure" ){
jsstatname <- "Source"
} else if (status == "outcome" ){
jsstatname <- "Target"
} else if (status == "latent" ){
jsstatname <- "LatentNode"
} else if (status == "adjustedNode" ){
jsstatname <- "AdjustedNode"
}
.jseval( paste0( "global.",xv,".removeAll",jsstatname,"s()" ) )
for( n in value ){
.jsassign( vv, n )
.jseval( paste0( "global.",xv,".add",jsstatname,"(global.",vv,")" ) )
}
r <- .jsget( paste0( xv,".toString()" ) )
},finally={
.deleteJSVar(vv)
.deleteJSVar(xv)
})
structure(r,class="dagitty")
}
#' Names of Variables in Graph
#'
#' Extracts the variable names from an input graph. Useful for iterating
#' over all variables.
#'
#' @param x the input graph, of any type.
#' @export
#' @examples
#' ## A "DAG" with Romanian and Swedish variable names. These can be
#' ## input using quotes to overcome the limitations on unquoted identifiers.
#' g <- dagitty( 'digraph {
#' "coração" [pos="0.297,0.502"]
#' "hjärta" [pos="0.482,0.387"]
#' "coração" -> "hjärta"
#' }' )
#' names( g )
names.dagitty <- function( x ){
ct <- .getJSContext()
xv <- .getJSVar()
tryCatch({
.jsassigngraph( xv, x )
.jsassign( xv, .jsp("global.",xv,".vertices.keys()") )
r <- .jsget(xv)},
finally={.deleteJSVar(xv)})
r
}
#' Retrieve Exogenous Variables
#'
#' Returns the names of all variables that have no directed arrow pointing to them.
#' Note that this does not preclude variables connected to bidirected arrows.
#'
#' @param x the input graph, of any type.
#' @export
exogenousVariables <- function( x ){
x <- as.dagitty( x )
e <- edges( x )
setdiff( names(x), as.character( e$w[e$e=="->"] ) )
}
#' Plot Coordinates of Variables in Graph
#'
#' The DAGitty syntax allows specification of plot coordinates for each variable in a
#' graph. This function extracts these plot coordinates from the graph description in a
#' \code{dagitty} object. Note that the coordinate system is undefined, typically one
#' needs to compute the bounding box before plotting the graph.
#'
#' @param x the input graph, of any type.
#' @param value a list with components \code{x} and \code{y},
#' giving relative coordinates for each variable. This format is suitable
#' for \code{\link{xy.coords}}.
#'
#' @examples
#' ## Plot localization of each node in the Shrier example
#' plot( coordinates( getExample("Shrier") ) )
#'
#' ## Define a graph and set coordinates afterwards
#' x <- dagitty('dag{
#' G <-> H <-> I <-> G
#' D <- B -> C -> I <- F <- B <- A
#' H <- E <- C -> G <- D
#' }')
#' coordinates( x ) <-
#' list( x=c(A=1, B=2, D=3, C=3, F=3, E=4, G=5, H=5, I=5),
#' y=c(A=0, B=0, D=1, C=0, F=-1, E=0, G=1, H=0, I=-1) )
#' plot( x )
#'
#' @seealso
#' Function \link{graphLayout} for automtically generating layout coordinates, and function
#' \link{plot.dagitty} for plotting graphs.
#'
#' @export
coordinates <- function( x ){
ct <- .getJSContext()
xv <- .getJSVar()
yv <- .getJSVar()
tryCatch({
.jsassigngraph( xv, x )
.jsassign( xv, .jsp("global.",xv,".getVertices()") )
.jsassign( yv, .jsp("_.pluck(global.",xv,",'id')") )
labels <- .jsget(yv)
.jsassign( yv, .jsp("_.pluck(global.",xv,",'layout_pos_x')") )
rx <- .jsget(yv)
.jsassign( yv, .jsp("_.pluck(global.",xv,",'layout_pos_y')") )
ry <- .jsget(yv)},
finally={
.deleteJSVar(xv)
.deleteJSVar(yv)
})
names(rx) <- labels
names(ry) <- labels
list( x=rx, y=ry )
}
#' @rdname coordinates
#' @export
'coordinates<-' <- function( x, value ){
xv <- .getJSVar()
xv2 <- .getJSVar()
tryCatch({
.jsassigngraph( xv, x )
for( n in intersect( names(value$x), names(x) ) ){
.jsassign( xv2, as.character(n) )
.jseval(.jsp("global.",xv,".vertices.get(",xv2,").layout_pos_x=",
as.numeric(value$x[n])))
.jseval(.jsp("global.",xv,".vertices.get(",xv2,").layout_pos_y=",
as.numeric(value$y[n])))
}
.jsassign( xv, .jsp("global.",xv,".toString()") )
r <- .jsget( xv )
},
error=function(e) stop(e),
finally={.deleteJSVar(xv);.deleteJSVar(xv2)})
structure(r,class="dagitty")
}
#' Canonicalize an Ancestral Graph
#'
#' Takes an input ancestral graph (a graph with directed, bidirected and undirected
#' edges) and converts it to a DAG by replacing every bidirected edge x <-> y with a
#' substructure x <- L -> y, where L is a latent variable, and every undirected edge
#' x -- y with a substructure x -> S <- y, where S is a selection variable. This function
#' does not check whether the input is actually an ancestral graph.
#'
#' @param x the input graph, a DAG or MAG.
#' @return A list containing the following components:
#' \itemize{
#' \item{g}{The resulting graph.}
#' \item{L}{Names of newly inserted latent variables.}
#' \item{S}{Names of newly inserted selection variables.}
#' }
#'
#' @examples
#' canonicalize("mag{x<->y--z}") # introduces two new variables
#' @export
canonicalize <- function( x ){
x <- as.dagitty(x)
.supportsTypes(x,c("dag","mag"))
xv <- .getJSVar()
xv2 <- .getJSVar()
r <- NULL
tryCatch({
.jsassigngraph( xv, x )
.jsassign( xv, .jsp("GraphTransformer.canonicalDag(global.",xv,")") )
.jsassign( xv2, .jsp("global.",xv,".g.toString()") )
g <- .jsget( xv2 )
.jsassign( xv2, .jsp("_.pluck(",xv,".L,'id')") )
L <- .jsget( xv2 )
.jsassign( xv2, .jsp("_.pluck(",xv,".S,'id')") )
S <- .jsget( xv2 )
r <- list( g=structure(g,class="dagitty"), L=L, S=S )
},
error=function(e) stop(e),
finally={.deleteJSVar(xv);.deleteJSVar(xv2)})
r
}
#' Graph Edges
#'
#' Extracts edge information from the input graph.
#'
#' @param x the input graph, of any type.
#' @return a data frame with the following variables:
#' \itemize{
#' \item{v}{ name of the start node.}
#' \item{w}{ name of the end node. For symmetric edges (bidirected and undirected), the
#' order of start and end node is arbitrary.}
#' \item{e}{ type of edge. Can be one of \code{"->"}, \code{"<->"} and \code{"--"}.}
#' \item{x}{ X coordinate for a control point. If this is not \code{NA}, then the edge
#' is drawn as an \code{\link{xspline}} through the start point, this control point,
#' and the end point. This is especially important for cases where there is more than
#' one edge between two variables (for instance, both a directed and a bidirected edge).}
#' \item{y}{ Y coordinate for a control point.}
#' }
#'
#' @examples
#' ## Which kinds of edges are used in the Shrier example?
#' levels( edges( getExample("Shrier") )$e )
#' @export
edges <- function( x ){
x <- as.dagitty( x )
xv <- .getJSVar()
tryCatch({
.jsassigngraph( xv, x )
.jsassign( xv, .jsp("DagittyR.edge2r(global.",xv,")") )
r <- .jsget(xv)
}, finally={.deleteJSVar(xv)})
as.data.frame(r)
}
#' Test for Graph Class
#'
#' A function to check whether an object has class \code{dagitty}.
#'
#' @param x object to be tested.
#'
#' @export
is.dagitty <- function(x) inherits(x,"dagitty")
#' Generate Graph Layout
#'
#' This function generates plot coordinates for each variable in a graph that does not
#' have them already. To this end, the well-known \dQuote{Spring} layout algorithm is
#' used. Note that this is a stochastic algorithm, so the generated layout will be
#' different every time (which also means that you can try several times until you find
#' a decent layout).
#'
#' @param x the input graph, of any type.
#' @param method the layout method; currently, only \code{"spring"} is supported.
#' @return the same graph as \code{x} but with layout coordinates added.
#'
#' @examples
#' ## Generate a layout for the M-bias graph and plot it
#' plot( graphLayout( dagitty( "dag { X <- U1 -> M <- U2 -> Y } " ) ) )
#'
#' @export
graphLayout <- function( x, method="spring" ){
x <- as.dagitty( x )
if( !(method %in% c("spring")) ){
stop("Layout method ",method," not supported!")
}
xv <- .getJSVar()
tryCatch({
.jsassigngraph( xv, x )
.jseval( paste0("(new GraphLayouter.Spring(global.",xv,")).layout()") )
.jsassign( xv, .jsp("global.",xv,".toString()") )
r <- .jsget(xv)
}, finally={.deleteJSVar(xv)})
structure( r, class="dagitty" )
}
#' Plot Graph
#'
#' A simple plot method to quickly visualize a graph. This is intended mainly for
#' simple visualization purposes and not as a full-fledged graph drawing
#' function.
#'
#' @param x the input graph, a DAG, MAG, or PDAG.
#' @param ... not used.
#'
#' @export
plot.dagitty <- function( x, ... ){
x <- as.dagitty( x )
.supportsTypes(x,c("dag","mag","pdag"))
coords <- coordinates( x )
if( any( !is.finite( coords$x ) | !is.finite( coords$y ) ) ){
stop("Please supply plot coordinates for graph! See ?coordinates and ?graphLayout.")
}
labels <- names(coords$x)
par(mar=rep(0,4))
plot.new()
par(new=TRUE)
wx <- sapply( paste0("mm",labels),
function(s) strwidth(s,units="inches") )
wy <- sapply( paste0("\n",labels),
function(s) strheight(s,units="inches") )
ppi.x <- dev.size("in")[1] / (max(coords$x)-min(coords$x))
ppi.y <- dev.size("in")[2] / (max(coords$y)-min(coords$y))
wx <- wx/ppi.x
wy <- wy/ppi.y
xlim <- c(min(coords$x-wx/2),max(coords$x+wx/2))
ylim <- c(-max(coords$y+wy/2),-min(coords$y-wy/2))
plot( NA, xlim=xlim, ylim=ylim, xlab="", ylab="", bty="n",
xaxt="n", yaxt="n" )
wx <- sapply( labels,
function(s) strwidth(paste0("xx",s)) )
wy <- sapply( labels,
function(s) strheight(paste0("\n",s)) )
asp <- par("pin")[1]/diff(par("usr")[1:2]) /
(par("pin")[2]/diff(par("usr")[3:4]))
ex <- edges(x)
ax1 <- rep(0,nrow(ex))
ax2 <- rep(0,nrow(ex))
ay1 <- rep(0,nrow(ex))
ay2 <- rep(0,nrow(ex))
axc <- rep(0,nrow(ex))
ayc <- rep(0,nrow(ex))
acode <- rep(2,nrow(ex))
has.control.point <- rep(FALSE,nrow(ex))
for( i in seq_len(nrow(ex)) ){
if( ex[i,3] == "<->" ){
acode[i] <- 3
has.control.point[i] <- TRUE
}
if( ex[i,3] == "--" ){
acode[i] <- 0
}
l1 <- as.character(ex[i,1]); l2 <- as.character(ex[i,2])
x1 <- coords$x[l1]; y1 <- coords$y[l1]
x2 <- coords$x[l2]; y2 <- coords$y[l2]
if( is.na( ex[i,4] ) || is.na( ex[i,5] ) ){
cp <- .autoControlPoint( x1, y1, x2, y2, asp,
.2*as.integer( acode[i]==3 ) )
} else {
cp <- list(x=ex[i,4],y=ex[i,5])
has.control.point[i] <- TRUE
}
bi1 <- .lineSegBoxIntersect( x1-wx[l1]/2,y1-wy[l1]/2,
x1+wx[l1]/2,y1+wy[l1]/2, x1, y1, cp$x, cp$y )
bi2 <- .lineSegBoxIntersect( x2-wx[l2]/2,y2-wy[l2]/2,
x2+wx[l2]/2,y2+wy[l2]/2, cp$x, cp$y, x2, y2 )
if( length(bi1) == 2 ){
x1 <- bi1$x; y1 <- bi1$y
}
if( length(bi2) == 2 ){
x2 <- bi2$x; y2 <- bi2$y
}
ax1[i] <- x1; ax2[i] <- x2
ay1[i] <- y1; ay2[i] <- y2
axc[i] <- cp$x; ayc[i] <- cp$y
}
directed <- acode==2 & !has.control.point
undirected <- acode==0 & !has.control.point
arrows( ax1[directed], -ay1[directed],
ax2[directed], -ay2[directed], length=0.1, col="gray" )
segments( ax1[undirected], -ay1[undirected],
ax2[undirected], -ay2[undirected], col="black", lwd=2 )
for( i in which( has.control.point ) ){
.arc( ax1[i], -ay1[i],
ax2[i], -ay2[i], axc[i], -ayc[i],
col="gray",
code=acode[i], length=0.1, lwd=2 )
}
text( coords$x, -coords$y[labels], labels )
}
#' Covariate Adjustment Sets
#'
#' Enumerates sets of covariates that (asymptotically) allow unbiased estimation of causal
#' effects from observational data, assuming that the input causal graph is correct.
#'
#' @param x the input graph, a DAG, MAG, PDAG, or PAG.
#' @param exposure name(s) of the exposure variable(s). If not given (default), then the
#' exposure variables are supposed to be defined in the graph itself.
#' @param outcome name(s) of the outcome variable(s), also taken from the graph if
#' not given.
#' @param effect which effect is to be identified. If \code{effect="total"}, then the
#' total effect is to be identified, and the adjustment criterion by Perkovic et
#' al (2015; see also van der Zander et al., 2014),
#' an extension of Pearl's back-door criterion, is used. Otherwise, if
#' \code{effect="direct"}, then the average direct effect is to be identified, and Pearl's
#' single-door criterion is used (Pearl, 2009). In a structural equation model (Gaussian
#' graphical model), direct effects are simply the path coefficients.
#' @param type which type of adjustment set(s) to compute. If \code{type="minimal"},
#' then only minimal sufficient adjustment sets are returned (default). For
#' \code{type="all"}, all valid adjustment sets are returned. For \code{type="canonical"},
#' a single adjustment set is returned that consists of all (possible) ancestors
#' of exposures and outcomes, minus (possible) descendants of nodes on proper causal
#' paths. This canonical adjustment set is always valid if any valid set exists
#' at all.
#'
#' @details
#' If the input graph is a MAG or PAG, then it must not contain any undirected
#' edges (=hidden selection variables).
#'
#' @references
#' J. Pearl (2009), Causality: Models, Reasoning and Inference.
#' Cambridge University Press.
#'
#' B. van der Zander, M. Liskiewicz and J. Textor (2014),
#' Constructing separators and adjustment sets in ancestral graphs.
#' In \emph{Proceedings of UAI 2014.}
#'
#' E. Perkovic, J. Textor, M. Kalisch and M. H. Maathuis (2015), A
#' Complete Generalized Adjustment Criterion. In \emph{Proceedings of UAI
#' 2015.}
#'
#' @examples
#' # The M-bias graph showing that adjustment for
#' # pre-treatment covariates is not always valid
#' g <- dagitty( "dag{ x -> y ; x <-> m <-> y }" )
#' adjustmentSets( g, "x", "y" ) # empty set
#' # Generate data where true effect (=path coefficient) is .5
#' set.seed( 123 ); d <- simulateSEM( g, .5, .5 )
#' confint( lm( y ~ x, d ) )["x",] # includes .5
#' confint( lm( y ~ x + m, d ) )["x",] # does not include .5
#'
#' # Adjustment sets can also sometimes be computed for graphs in which not all
#' # edge directions are known
#' g <- dagitty("pdag { x[e] y[o] a -- {i z b}; {a z i} -> x -> y <- {z b} }")
#' adjustmentSets( g )
#' @export
adjustmentSets <- function( x, exposure=NULL, outcome=NULL,
type=c("minimal","canonical","all"), effect=c("total","direct") ){
effect <- match.arg( effect )
type <- match.arg( type )
if( effect == "direct" && type != "minimal" ){
stop("Only minimal adjustment sets are supported for direct effects!")
}
x <- as.dagitty( x )
.supportsTypes( x, c("dag","mag","pdag","pag") )
if( !is.null( exposure ) ){
exposures(x) <- exposure
}
if( !is.null( outcome ) ){
outcomes(x) <- outcome
}
if( length(exposures(x)) == 0 || length(outcomes(x)) == 0 ){
stop("Both exposure(s) and outcome(s) need to be set!")
}
if( type == "minimal" ){
xv <- .getJSVar()
tryCatch({
.jsassigngraph( xv, x )
if( effect=="direct" ){
.jsassign( xv, .jsp("GraphAnalyzer.listMsasDirectEffect(global.",xv,")") )
} else {
.jsassign( xv, .jsp("GraphAnalyzer.listMsasTotalEffect(global.",xv,")") )
}
.jsassign( xv, .jsp("DagittyR.adj2r(global.",xv,")"))
r <- structure( .jsget(xv), class="dagitty.sets" )
},finally={.deleteJSVar(xv)})
} else if( type == "all" ){
covariates <- setdiff( names( x ), c( exposures(x), outcomes(x) ) )
subsets <- (expand.grid( rep( list(0:1),length(covariates)) ))
r <- lapply( 1:nrow(subsets), function(i){
Z <- covariates[as.logical(subsets[i,])]
if( isAdjustmentSet( x, Z ) ){
Z
} else {
NA
}
})
non.r <- which(sapply(r,function(x) isTRUE(is.na(x))))
if( length(non.r) > 0 ){
r <- r[-non.r]
}
r <- structure(r,class="dagitty.sets")
} else { # type == "canonical"
xv <- .getJSVar()
tryCatch({
.jsassigngraph( xv, x )
.jsassign( xv, .jsp("DagittyR.canonicalAdjustment(global.",xv,")"))
r <- structure(.jsget(xv),class="dagitty.sets")
},finally={.deleteJSVar(xv)})
}
r
}
#' Adjustment Criterion
#'
#' Test whether a set fulfills the adjustment criterion, that means,
#' it removes all confounding bias when estimating a *total* effect.
#' This is an extension of Pearl's
#' Back-door criterion (Shpitser et al, 2010; van der Zander et al,
#' 2014; Perkovic et al, 2015)
#' which is complete in the sense that either a set
#' fulfills this criterion, or it does not remove all confounding bias.
#'
#' @param x the input graph, a DAG, MAG, PDAG, or PAG.
#' @param exposure name(s) of the exposure variable(s). If not given (default), then the
#' exposure variables are supposed to be defined in the graph itself.
#' @param outcome name(s) of the outcome variable(s), also taken from the graph if
#' not given.
#' @param Z vector of variable names.
#'
#' @details
#' If the input graph is a MAG or PAG, then it must not contain any undirected
#' edges (=hidden selection variables).
#'
#' @references
#'
#' E. Perkovic, J. Textor, M. Kalisch and M. H. Maathuis (2015), A
#' Complete Generalized Adjustment Criterion. In \emph{Proceedings of UAI
#' 2015.}
#'
#' I. Shpitser, T. VanderWeele and J. M. Robins (2010), On the
#' validity of covariate adjustment for estimating causal effects. In
#' \emph{Proceedings of UAI 2010.}
#'
#' @export
isAdjustmentSet <- function( x, Z, exposure=NULL, outcome=NULL ){
x <- as.dagitty( x )
.supportsTypes( x, c("dag","mag","pdag","pag") )
xv <- .getJSVar()
Zv <- .getJSVar()
if( !is.null( exposure ) ){
exposures(x) <- exposure
}
if( !is.null( outcome ) ){
outcomes(x) <- outcome
}
if( length(exposures(x)) == 0 || length(outcomes(x)) == 0 ){
stop("Both exposure(s) and outcome(s) need to be set!")
}
tryCatch({
.jsassigngraph( xv, x )
.jsassign( Zv, as.list(Z) )
.jsassign( xv, .jsp("GraphAnalyzer.isAdjustmentSet(",xv,",",Zv,")") )
r <- .jsget(xv)
},finally={
.deleteJSVar(xv)
.deleteJSVar(Zv)
})
r
}
#' List Implied Conditional Independencies
#'
#' Generates a list of conditional independence statements that must hold in every
#' probability distribution compatible with the given model.
#'
#' @param x the input graph, a DAG, MAG, or PDAG.
#' @param type can be one of "missing.edge" or "basis.set". With the former, one testable
#' implication is returned per missing edge of the graph. With the latter, one testable
#' implication is returned per vertex of the graph that has non-descendants other than
#' its parents. Basis sets can be smaller, but they involve higher-dimensional independencies,
#' whereas missing edge sets involve only bivariate independencies.
#' @param max.results integer. The listing of conditional independencies is stopped once
#' this many results have been found. Use \code{Inf} to generate them all. This applies
#' only when \code{type="missing.edge"}.
#' @examples
#' g <- dagitty( "dag{ x -> m -> y }" )
#' impliedConditionalIndependencies( g ) # one
#' latents( g ) <- c("m")
#' impliedConditionalIndependencies( g ) # none
#' @export
impliedConditionalIndependencies <- function( x, type="missing.edge", max.results=Inf ){
if( ! type %in% c("missing.edge","basis.set") ){
stop("'type' must be one of: missing.edge, basis.set")
}
x <- as.dagitty( x )
.supportsTypes( x, c("dag","mag","pdag") )
xv <- .getJSVar()
tryCatch({
.jsassign( xv, as.character(x) )
.jsassign( xv, .jsp("GraphParser.parseGuess(global.",xv,")") )
if( type == "missing.edge" ){
if( is.finite( max.results ) ){
.jsassign( xv,
.jsp("GraphAnalyzer.listMinimalImplications(global.",xv,",",
as.numeric(max.results),")"))
} else {
.jsassign( xv,
.jsp("GraphAnalyzer.listMinimalImplications(global.",xv,")"))
}
} else {
.jsassign( xv, .jsp("GraphAnalyzer.listBasisImplications(global.",xv,")"))
}
.jsassign( xv, .jsp("DagittyR.imp2r(global.",xv,")") )
r <- structure( lapply( .jsget(xv),
function(x) structure(x,class="dagitty.ci") ), class="dagitty.cis" )
},finally={.deleteJSVar(xv)})
r
}
#' Find Instrumental Variables
#'
#' Generates a list of instrumental variables that can be used to infer the total effect
#' of an exposure on an outcome in the presence of latent confounding, under linearity
#' assumptions.
#'
#' @param x the input graph, a DAG.
#' @param exposure name of the exposure variable. If not given (default), then the
#' exposure variable is supposed to be defined in the graph itself. Only a single
#' exposure variable and a single outcome variable supported.
#' @param outcome name of the outcome variable, also taken from the graph if not given.
#' Only a single outcome variable is supported.
#'
#' @references
#' B. van der Zander, J. Textor and M. Liskiewicz (2015),
#' Efficiently Finding Conditional Instruments for Causal Inference.
#' In \emph{Proceedings of the 24th International Joint Conference on
#' Artificial Intelligence (IJCAI 2015)}, pp. 3243-3249. AAAI Press, 2015.
#'
#' @examples
#' # The classic IV model
#' instrumentalVariables( "dag{ i->x->y; x<->y }", "x", "y" )
#' # A conditional instrumental variable
#' instrumentalVariables( "dag{ i->x->y; x<->y ; y<-z->i }", "x", "y" )
#' @export
instrumentalVariables <- function( x, exposure=NULL, outcome=NULL ){
x <- as.dagitty( x )
.supportsTypes( x, "dag" )
if( !is.null( exposure ) ){
exposures(x) <- exposure
}
if( !is.null( outcome ) ){
outcomes(x) <- outcome
}
if( length(exposures(x)) != 1 || length(outcomes(x)) != 1 ){
stop("Both exposure(s) and outcome(s) need to be set!")
}
xv <- .getJSVar()
tryCatch({
.jsassigngraph( xv, x )
.jsassign( xv, .jsp("GraphAnalyzer.conditionalInstruments(global.",xv,")") )
.jsassign( xv, .jsp("DagittyR.iv2r(global.",xv,")") )
r <- structure( .jsget(xv), class="dagitty.ivs" )
}, finally={.deleteJSVar(xv)})
r
}
#' List Implied Vanishing Tetrads
#'
#' Interpret the given graph as a structural equation model and list all the
#' vanishing tetrads that it implies.
#'
#' @param x the input graph, a DAG.
#' @param type restrict output to one level of Kenny's tetrad typology.
#' Possible values are "within" (homogeneity within constructs; all four
#' variables have the same parents), "between" (homogeneity between constructs;
#' two pairs of variables each sharing one parent)
#' and "epistemic" (consistency of epistemic correlations; three variables have
#' the same parent). By default, all tetrads are listed.
#' @return a data frame with four columns, where each row of the form
#' i,j,k,l means that the tetrad Cov(i,j)Cov(k,l) - Cov(i,k)Cov(j,l) vanishes
#' (is equal to 0) according to the model.
#'
#' @examples
#' # Specify two-factor model with 4 indicators each
#' g <- dagitty("dag{{x1 x2 x3 x4} <- x <-> y -> {y1 y2 y3 y4}}")
#' latents(g) <- c("x","y")
#'
#' # Check how many tetrads are implied
#' nrow(vanishingTetrads(g))
#' # Check how these distribute across the typology
#' nrow(vanishingTetrads(g,"within"))
#' nrow(vanishingTetrads(g,"between"))
#' nrow(vanishingTetrads(g,"epistemic"))
#'
#' @references
#' Kenny, D. A. (1979), Correlation and Causality. Wiley, New York.
#'
#' @export
vanishingTetrads <- function( x, type=NA ){
x <- as.dagitty( x )
xv <- .getJSVar()
tryCatch({
.jsassigngraph( xv, x )
if( is.character( type ) ){
.jsassign( xv, .jsp("GraphAnalyzer.vanishingTetrads(global.",
xv,",undefined,'",type,"')") )
} else {
.jsassign( xv, .jsp("GraphAnalyzer.vanishingTetrads(global.",
xv,")") )
}
r <- .jsget(xv)
}, finally={.deleteJSVar(xv)})
r
}
#' Convert Lavaan Model to DAGitty Graph
#'
#' The \code{lavaan} package is a popular package for structural equation
#' modeling. To provide interoperability with lavaan, this function
#' converts models specified in lavaan syntax to dagitty graphs.
#'
#' @param x data frame, lavaan parameter table such as returned by
#' \code{\link[lavaan]{lavaanify}}.
#' @param ... Not used.
#'
#' @examples
#' if( require(lavaan) ){
#' mdl <- lavaanify("
#' X ~ C1 + C3
#' M ~ X + C3
#' Y ~ X + M + C3 + C5
#' C1 ~ C2
#' C3 ~ C2 + C4
#' C5 ~ C4
#' C1 ~~ C2 \n C1 ~~ C3 \n C1 ~~ C4 \n C1 ~~ C5
#' C2 ~~ C3 \n C2 ~~ C4 \n C2 ~~ C5
#' C3 ~~ C4 \n C3 ~~ C5",fixed.x=FALSE)
#' plot( graphLayout( lavaanToGraph( mdl ) ) )
#' }
#' @export
lavaanToGraph <- function( x, ... ){
latents <- c()
arrows <- c()
for( i in seq_len( nrow(x) ) ){
if( x$op[i] == "=~" ){
latents <- union(latents,x$lhs[i])
arrows <- c(arrows,paste(x$lhs[i]," -> ",x$rhs[i]))
}
if( x$op[i] == "~" ){
arrows <- c(arrows,paste(x$lhs[i]," <- ",x$rhs[i]))
}
if( x$op[i] == "~~" && (x$lhs[i] != x$rhs[i]) ){
arrows <- c(arrows,paste(x$lhs[i]," <-> ",x$rhs[i]))
}
}
if( length(latents) > 0 ){
latents <- paste(latents,' [latent]',collapse="\n")
} else {
latents <- ""
}
dagitty( paste("dag { ",latents,"\n",
paste(arrows,collapse="\n")," } ",collapse="\n") )
}
#' @export
toString.dagitty <- function( x, format="dagitty", ... ){
x <- as.dagitty( x )
if( !format %in% c("dagitty","tikz","lavaan","dagitty.old") ){
stop( "Unsupported export format: ", format )
}
r <- NULL
if( format == "dagitty" ){
r <- as.character( x )
} else if( format == "dagitty.old" ){
xv <- .getJSVar()
tryCatch({
.jsassigngraph( xv, x )
.jsassign( xv, .jsp("global.",xv,".oldToString()") )
r <- as.character( .jsget(xv) )
}, error=function(e){
stop( e )
},finally={.deleteJSVar(xv)})
} else if( format %in% c("lavaan","tikz") ){
xv <- .getJSVar()
tryCatch({
.jsassign( xv, as.character(x) )
.jsassign( xv, .jsp("GraphSerializer.to",
toupper(substring(format, 1,1)), substring(format, 2),
"(GraphParser.parseGuess(global.",xv,"))") )
r <- as.character( .jsget(xv) )
}, error=function(e){
stop( e )
},finally={.deleteJSVar(xv)})
}
r
}
#' Parse DAGitty Graph
#'
#' Constructs a \code{dagitty} graph object from a textual description.
#'
#' @param x character, string describing a graphical model in dagitty syntax.
#' @param layout logical, whether to automatically generate layout coordinates for each
#' variable (see \code{\link{graphLayout}})
#' @details
#' The textual syntax for DAGitty graph is based on the dot language of the
#' graphviz software (\url{http://www.graphviz.org/content/dot-language}). This is a
#' fairly intuitive syntax -- use the examples below and in the other functions to
#' get you started. An important difference to graphviz is that the DAGitty language
#' supports several types of graphs, which have different semantics. However, many users
#' will mainly focus on DAGs.
#'
#' A DAGitty graph description has the following form:
#'
#' \code{[graph type] '{' [statements] '}'}
#'
#' where \code{[graph type]} is one of 'dag', 'mag', 'pdag', or 'pag' and \code{[statements]}
#' is a list of variables statements and edge statements, which may (optionally) be
#' separated by semicolons. Whitespace, including newlines, has no semantic role.
#'
#' Variable statments look like
#'
#' \code{[variable id] '[' [properties] ']'}
#'
#' For example, the statement
#'
#' \code{x [exposure,pos="1,0"]}
#'
#' declares a variable with ID x that is an exposure variable and has a layout position
#' of 1,0.
#'
#' The edge statement
#'
#' \code{x -> y}
#'
#' declares a directed edge from variable x to variable y. Explicit variable statements
#' are not required for the variables involved in edge statements, unless attributes
#' such as position or exposure/outcome status need to be set.
#'
#' DAGs (directed acyclic graphs) can contain the following edges: \code{->}, \code{<->}.
#' Bidirected edges in DAGs are simply shorthands for substructures \code{<- U ->},
#' where U is an unobserved variable.
#'
#' MAGs (maximal ancestral graphs) can contain the following edges: \code{->},
#' \code{<->}, \code{--}.
#' The bidirected and directed edges of MAGs can represent latent confounders, and
#' the undirected edges represent latent selection variables.
#' For details, see Richardson and Spirtes (2002).
#'
#' PDAGs (partially directed acyclic graphs) can contain the following edges: \code{->},
#' \code{<->}, \code{--}.
#' The bidirected edges mean the same thing as in DAGs. The undirected edges represent
#' edges whose direction is not known. Thus, PDAGs are used to represent equivalence
#' classes of DAGs (see also the function \code{\link{equivalenceClass}}).
#'
#' PAGs (partial ancestral graphs) are to MAGs what PDAGs are to DAGs: they represent
#' equivalence classes of MAGs. MAGs can contain the following edges: \code{@-@},
#' \code{->}, \code{@->}, \code{--}, \code{@--}
#' (the @ symbols are written as circle marks in most of the literature). For
#' details on PAGs, see Zhang et al (2008). For now, only a few DAGitty functions
#' support PAGs (for instance, \code{\link{adjustmentSets}}.
#'
#' The DAGitty parser does not perform semantic validation. That is,
#' it will not check whether a DAG is actually acyclic, or whether all chain components
#' in a PAG are actually chordal. This is not done because it can be computationally
#' rather expensive.
#'
#' @references
#' Richardson, Thomas; Spirtes, Peter (2002), Ancestral graph Markov models.
#' \emph{The Annals of Statistics} 30(4): 962-1030.
#'
#' J. Zhang (2008), Causal Reasoning with Ancestral Graphs.
#' \emph{Journal of Machine Learning Research} 9: 1437-1474.
#'
#' B. van der Zander and M. Liskiewicz (2016),
#' Separators and Adjustment Sets in Markov Equivalent DAGs.
#' In \emph{Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence (AAAI'16)},
#' Phoenix, Arizona, USA.
#' @examples
#' # Specify a simple DAG containing one path
#' g <- dagitty("dag{
#' a -> b ;
#' b -> c ;
#' d -> c
#' }")
#' # Newlines and semicolons are optional
#' g <- dagitty("dag{
#' a -> b b -> c c -> d
#' }")
#' # Paths can be specified in one go; the semicolon below is
#' # optional
#' g <- dagitty("dag{
#' a -> b ->c ; c -> d
#' }")
#' # Edges can be written in reverse notation
#' g <- dagitty("dag{
#' a -> b -> c <- d
#' }")
#' # Spaces are optional as well
#' g <- dagitty("dag{a->b->c<-d}")
#' # Variable attributes can be set in square brackets
#' # Example: DAG with one exposure, one outcome, and one unobserved variable
#' g <- dagitty("dag{
#' x -> y ; x <- z -> y
#' x [exposure]
#' y [outcome]
#' z [unobserved]
#' }")
#' # The same graph as above
#' g <- dagitty("dag{x[e]y[o]z[u]x<-z->y<-x}")
#' # A two-factor latent variable model
#' g <- dagitty("dag {
#' X <-> Y
#' X -> a X -> b X -> c X -> d
#' Y -> a Y -> b Y -> c Y -> d
#' }")
#' # Curly braces can be used to "group" variables and
#' # specify edges to whole groups of variables
#' # The same two-factor model
#' g <- dagitty("dag{ {X<->Y} -> {a b c d} }")
#' # A MAG
#' g <- dagitty("mag{ a -- x -> y <-> z }")
#' # A PDAG
#' g <- dagitty("pdag{ x -- y -- z }")
#' # A PAG
#' g <- dagitty("pag{ x @-@ y @-@ z }")
#' @export
dagitty <- function(x, layout=FALSE){
if(!is.character(x)){
stop("Expecting a string to build dagitty graph!")
}
xv <- .getJSVar()
tryCatch({
.jsassign( xv, as.character(x) )
.jsassign( xv, .jsp("GraphParser.parseGuess(global.",xv,").toString()") )
r <- structure( .jsget(xv), class="dagitty" )
}, error=function(e){
stop( e )
},finally={.deleteJSVar(xv)})
r <- structure( r, class="dagitty" )
if( layout ){
r <- graphLayout(r)
}
r
}
#' Load Graph from dagitty.net
#'
#' Downloads a graph that has been built and stored online using the dagitty.net GUI.
#' Users who store graphs online will receive a unique URL for their graph, which
#' can be fed into this function to continue working with the graph in R.
#'
#' @param x dagitty model URL.
#' @export
downloadGraph <- function(x="dagitty.net/mz-Tuw9"){
if( !requireNamespace( "base64enc", quietly=TRUE ) ){
stop("This function requires the package 'base64enc'!")
}
id <- gsub( "dagitty\\.net\\/m(.*)$", "\\1", x )
r <- base64enc::base64decode(scan(paste0("http://dagitty.net/dags/load.php?id=",id),"character"))
if( base64enc::checkUTF8(r) ){
dagitty( rawToChar( r ) )
} else {
NULL
}
}
#' Test Graph against Data
#'
#' Derives testable implications from the given graphical model and tests them against
#' the given dataset.
#'
#' @param x the input graph, a DAG, MAG, or PDAG.
#' @param tests optional list of the precise tests to perform. If not given, the list
#' of tests is automatically derived from the input graph. Can be used to restrict
#' testing to only a certain subset of tests (for instance, to test only those conditional
#' independencies for which the conditioning set is of a reasonably low dimension, such
#' as shown in the example).
#' @param data matrix or data frame containing the data.
#' @param sample.cov the sample covariance matrix; ignored if \code{data} is supplied.
#' Either \code{data} or \code{sample.cov} and \code{sample.nobs} must be supplied.
#' @param sample.nobs number of observations; ignored if \code{data} is supplied.
#' @param type character indicating which kind of local
#' test to perform. Supported values are \code{"cis"} (linear conditional independence),
#' \code{"cis.loess"} (conditional independence using loess regression),
#' \code{"tetrads"} and \code{"tetrads.type"}, where "type" is one of the items of the
#' tetrad typology, e.g. \code{"tetrads.within"} (see \code{\link{vanishingTetrads}}).
#' Tetrad testing is only implemented for DAGs.
#' @param R how many bootstrap replicates for estimating confidence
#' intervals. If \code{NULL}, then confidence intervals are based on normal
#' approximation. For tetrads, the normal approximation is only valid in
#' large samples even if the data are normally distributed.
#' @param conf.level determines the size of confidence intervals for test
#' statistics.
#' @param loess.pars list of parameter to be passed on to \code{\link[stats]{loess}}
#' (for \code{type="cis.loess"}), for example the smoothing range.
#'
#' @details Tetrad implications can only be derived if a Gaussian model (i.e., a linear
#' structural equation model) is postulated. Conditional independence implications (CI)
#' do not require this assumption. However, both Tetrad and CI implications are tested
#' parametrically: for Tetrads, Wishart's confidence interval formula is used, whereas
#' for CIs, a Z test of zero conditional covariance (if the covariance
#' matrix is given) or a test of residual independence after linear regression
#' (it the raw data is given) is performed.
#' Both tetrad and CI tests also support bootstrapping instead of estimating parametric
#' confidence intervals.
#'
#' @examples
#' # Simulate full mediation model with measurement error of M1
#' set.seed(123)
#' d <- simulateSEM("dag{X->{U1 M2}->Y U1->M1}",.6,.6)
#'
#' # Postulate and test full mediation model without measurement error
#' plotLocalTestResults(localTests( "dag{ X -> {M1 M2} -> Y }", d, "cis" ))
#'
#' # Simulate data from example SEM
#' g <- getExample("Polzer")
#' d <- simulateSEM(g,.1,.1)
#'
#' # Compute independencies with at most 3 conditioning variables
#' imp <- Filter(function(x) length(x$Z)<4, impliedConditionalIndependencies(g))
#' plotLocalTestResults(localTests( g, d, "cis.loess", R=100, tests=imp, loess.pars=list(span=0.6) ))
#'
#' @export
localTests <- function(x, data=NULL,
type=c("cis","cis.loess","tetrads","tetrads.within","tetrads.between","tetrads.epistemic"),
tests=NULL,
sample.cov=NULL,sample.nobs=NULL,
conf.level=.95,R=NULL,loess.pars=NULL){
x <- as.dagitty(x)
type <- match.arg(type)
if( type=="cis" ){
.supportsTypes(x,c("dag","pdag","mag"))
} else {
.supportsTypes(x,c("dag"))
}
if( !is.null(sample.cov) && is.null(sample.nobs) ){
stop("Please provide sample size (sample.nobs)!")
}
if( !is.null(R) && is.null(data) ){
stop("Bootstrapping requires raw data!")
}
if( is.null(R) && type=="cis.loess" ){
stop("Non-parametric conditional independence testing requires bootstrapping! Please provide R argument.")
}
if( is.null(data) && is.null(sample.cov) ){
stop("Please provide either data or sample covariance matrix!")
}
w <- (1-conf.level)/2
if( type %in% c("tetrads","tetrads.within","tetrads.between","tetrads.epistemic") ){
if( is.null( tests ) ){
if( type == "tetrads" ){
tests <- vanishingTetrads( x )
} else {
tests <- vanishingTetrads( x, strsplit(type,"\\.")[[1]][2] )
}
}
if( length(tests) == 0 ){
return(data.frame())
}
if( is.null( sample.cov ) ){
sample.cov <- cov(data)
}
if( is.null( sample.nobs ) ){
sample.nobs <- nrow(data)
}
tetrad.values <- .tetradsFromCov(sample.cov,tests)
tetrad.sample.sds <- sapply(seq_len(nrow(tests)),
function(i) .tetrad.sem(tests[i,],sample.cov,sample.nobs))
r <- data.frame(
row.names=apply( tests, 1,
function(x) paste(x,collapse=",") ),
estimate=tetrad.values
)
if( !is.null(R) ){
requireNamespace("boot",quietly=TRUE)
bo <- boot::boot( data,
function(data,i) .tetradsFromData(data,tests,i), R )
r <- cbind( r, t(apply( bo$t, 2,
function(x) c(sd(x), quantile(x,c((1-conf.level)/2,1-(1-conf.level)/2))) )) )
colnames(r) <- c("estimate","std.error",
paste0(100*w,"%"),paste0(100*(1-w),"%"))
} else {
std.errors <- tetrad.sample.sds
p.values <- 2*pnorm(abs(tetrad.values/tetrad.sample.sds),
lower.tail=FALSE)
conf <- cbind( std.errors, p.values, tetrad.values+qnorm(w)*tetrad.sample.sds,
tetrad.values+qnorm(1-w)*tetrad.sample.sds )
r <- cbind( r, conf )
colnames(r) <- c("estimate","std.error","p.value",
paste0(100*w,"%"),paste0(100*(1-w),"%"))
}
} else if( type %in% c("cis","cis.loess") ){
if( is.null(tests) ){
tests <- impliedConditionalIndependencies( x )
}
if( length(tests) == 0 ){
return(data.frame())
}
row.names <- sapply(tests,as.character)
if( !is.null(R) ){
if( type == "cis" ){
f <- function(i) .ci.test.lm.perm(data,i,conf.level,R)
} else {
f <- function(i) .ci.test.loess.perm(data,i,conf.level,R,loess.pars)
}
r <- as.data.frame(
row.names=row.names,
t(sapply( tests, f ))
)
colnames(r) <- c("estimate","std.error",
paste0(100*w,"%"),paste0(100*(1-w),"%"))
} else {
if( !is.null(data) ){
r <- as.data.frame(
row.names=row.names,
t(sapply( tests, function(i)
.ri.test(data,i,conf.level) ))
)
} else {
r <- as.data.frame(
row.names=row.names,
t(sapply( tests, function(i)
.ci.test.covmat(sample.cov,sample.nobs,i,conf.level) ))
)
}
colnames(r) <- c("estimate","std.error","p.value",
paste0(100*w,"%"),paste0(100*(1-w),"%"))
}
}
return(r)
}
#' Plot Results of Local Tests
#'
#' Generates a summary plot of the results of local tests
#' (see \link{localTests}). For each test, a test statistic and
#' the confidence interval are shown.
#'
#' @param x data frame; results of the local tests as returned by
#' \link{localTests}.
#' @param xlab X axis label.
#' @param xlim numerical vector with 2 elements; range of X axis.
#' @param axis.pars arguments to be passed on to \code{\link{axis}}
#' when generating the Y axis for the plot.
#' @param sort.by.statistic logical. Sort the rows of \code{x} by
#' the absolute value of the test statistic before plotting.
#' @param n plot only the n tests for which the absolute value of
#' the test statistics diverges most from 0.
#' @param ... further arguments to be passed on to \code{\link{plot}}.
#'
#' @examples
#' d <- simulateSEM("dag{X->{U1 M2}->Y U1->M1}",.6,.6)
#' par(mar=c(2,8,1,1)) # so we can see the test names
#' plotLocalTestResults(localTests( "dag{ X -> {M1 M2} -> Y }", d, "cis" ))
#'
#' @export
plotLocalTestResults <- function(x,xlab="test statistic (95% CI)",
xlim=range(x[,c(ncol(x)-1,ncol(x))]),sort.by.statistic=TRUE,
n=Inf,axis.pars=list(las=1),...){
x <- x[order(abs(x[1]),decreasing=TRUE),]
if( is.finite(n) && n > 0 && n < nrow(x) ){
x <- x[1:n,]
}
y <- seq_len(nrow(x))
plot( x[,1], y,xlab=xlab,xlim=xlim, yaxt="n", ylab="", ... )
do.call( axis, c( list( 2, at=y, labels=rownames(x)), axis.pars ) )
segments( x[,ncol(x)-1], y, x[,ncol(x)], y )
#segments( seq_len(nrow(x))+.1, x[,1]-2*x[,2],
# y1=x[,1]+2*x[,2], col=2 )
abline( v=0 )
}
#' Show Paths
#'
#' Returns a list with two compontents: \code{path} gives the actual
#' paths, and \code{open} shows whether each path is open (d-connected)
#' or closed (d-separated).
#'
#' @param x the input graph, a DAG, PDAG, or MAG.
#' @param from name(s) of first variable(s).
#' @param to name(s) of last variable(s).
#' @param Z names of variables to condition on for determining open
#' paths.
#' @param limit maximum amount of paths to show. In general, the number of paths grows
#' exponentially with the number of variables in the graph, such that path inspection
#' is not useful except for the most simple models.
#' @param directed logical; should only directed (i.e., causal) paths
#' be shown?
#'
#' @examples
#' sum( paths(backDoorGraph(getExample("Shrier")))$open ) # Any open Back-Door paths?
#'
#' @export
paths <- function(x,from=exposures(x),to=outcomes(x),Z=list(),limit=100,directed=FALSE){
x <- as.dagitty(x)
.supportsTypes(x,c("dag","mag","pdag"))
xv <- .getJSVar()
xv2 <- .getJSVar()
exposures(x) <- from
outcomes(x) <- to
if( length(exposures(x)) == 0 || length(outcomes(x)) == 0 ){
stop("Both start end end node(s) need to be set!")
}
tryCatch({
.jsassigngraph( xv, x )
.jsassign( xv2, as.list(Z) )
.jsassign( xv, .jsp("DagittyR.paths2r(GraphAnalyzer.listPaths(global.",
xv,",",tolower(directed),",",limit,"),",xv2,",",
xv,")" ) )
r <- .jsget(xv)
},finally={.deleteJSVar(xv);.deleteJSVar(xv2)})
r
}
#' d-Separation
#'
#' A set Z d-separates a path p if (1) Z contains a non-collider
#' on p, e.g. x->m->y with \code{Z=c("m")}; or (2) some collider on p is not
#' on Z, e.g. x->m<-y with \code{Z=c()}.
#'
#' @param x the input graph, a DAG, PDAG, or MAG.
#' @param X vector of variable names.
#' @param Y vector of variable names.
#' @param Z vector of variable names.
#'
#' \code{dseparated(x,X,Y,Z)} checks if all paths between X and Y are
#' d-separated by Z.
#'
#' \code{dconnected(x,X,Y,Z)} checks if at least one path between X and Y
#' is not d-separated by Z.
#'
#' @details
#' The functions also work for mixed graphs with directed, undirected,
#' and bidirected edges. The definition of a collider in such graphs
#' is: a node where two arrowheads collide, e.g. x<->m<-y but not
#' x->m--y.
#'
#' @examples
#' dconnected( "dag{x->m->y}", "x", "y", c() ) # TRUE
#' dconnected( "dag{x->m->y}", "x", "y", c("m") ) # FALSE
#' dseparated( "dag{x->m->y}", "x", "y", c() ) # TRUE
#' dseparated( "dag{x->m->y}", "x", "y", c("m") ) # FALSE
#'
#' @export
dconnected <- function(x,X,Y=list(),Z=list()){
x <- as.dagitty(x)
.supportsTypes(x,c("dag","pdag","mag"))
xv <- .getJSVar()
Xv <- .getJSVar()
Yv <- .getJSVar()
Zv <- .getJSVar()
tryCatch({
.jsassigngraph( xv, x )
.jsassign( Xv, as.list(X) )
.jsassign( Yv, as.list(Y) )
.jsassign( Zv, as.list(Z) )
.jsassign( xv, .jsp("DagittyR.dconnected(",xv,",",Xv,",",Yv,",",Zv,")") )
r <- .jsget(xv)
},finally={
.deleteJSVar(xv)
.deleteJSVar(Xv)
.deleteJSVar(Yv)
.deleteJSVar(Zv)
})
r
}
#' @rdname dconnected
#' @export
dseparated <- function(x,X,Y=list(),Z=list()){
if( length(Y) > 0 ){
!dconnected(x,X,Y,Z)
} else {
setdiff(names(x),dconnected(x,X,Y,Z))
}
}
#' Generate DAG at Random
#'
#' Generates a random DAG with N variables called x1,...,xN. For each
#' pair of variables xi,xj with i<j, an edge i->j will be present with
#' probability p.
#'
#' @param N desired number of variables.
#' @param p connectivity parameter, a number between 0 and 1.
#' @export
randomDAG <- function( N, p ){
N <- as.integer(N)
if( N < 1 ){
stop("N must be positive!")
}
p <- as.double(p)
if( p < 0 || p > 1 ){
stop("p is not a probability!")
}
xv <- .getJSVar()
pv <- .getJSVar()
tryCatch({
.jsassign( pv, p )
.jsassign( xv, as.list(paste0("x",seq_len(N))) )
.jsassign( xv, .jsp("GraphGenerator.randomDAG(",xv,",",pv,").toString()") )
r <- .jsget(xv)
},finally={
.deleteJSVar(xv)
.deleteJSVar(pv)
})
r
}
#' @export
print.dagitty.ivs <- function( x, prefix="", ... ){
for( i in x ){
cat( prefix, i$I )
if( length( i$Z > 0 ) ){
cat( " | ", paste(i$Z,collapse=", ") )
}
cat( "\n" )
}
}
#' @export
print.dagitty.sets <- function( x, prefix="", ... ){
for( i in x ){
if( length(i) == 0 ){
cat( prefix, "{}\n")
} else {
cat( prefix, "{",paste(i,collapse=", "), "}\n" )
}
}
}
#' @export
as.character.dagitty.ci <- function( x, ... ){
r <- paste0( x$X, " _||_ ", paste(x$Y,collapse=", ") )
if( length( x$Z > 0 ) ){
r <- paste0( r, " | ", paste(x$Z,collapse=", ") )
}
r
}
#' @export
print.dagitty.ci <- function( x, ... ){
cat( as.character( x ),"\n" )
}
#' @export
print.dagitty.cis <- function( x, ... ){
for( i in seq_along(x) ){
cat( as.character( x[[i]] ) )
cat("\n")
}
}
#' @export
as.list.dagitty.cis <- function( x, ... ) structure( x, class="list" )
#' @export
`[.dagitty.cis` <- function(x,y) structure(as.list(x)[y],class="dagitty.cis")
#' @export
as.list.dagitty.sets <- function( x, ... ) structure( x, class="list" )
#' @export
`[.dagitty.sets` <- function(x,y) structure(as.list(x)[y],class="dagitty.sets")
#' @export
as.list.dagitty.ivs <- function( x, ... ) structure( x, class="list" )
#' @export
`[.dagitty.ivs` <- function(x,y) structure(as.list(x)[y],class="dagitty.ivs")
#' Convert to DAGitty object
#'
#' Converts its argument to a DAGitty object, if possible.
#'
#' @param x an object.
#' @param ... further arguments passed on to methods.
#' @export
as.dagitty <- function( x, ... ) UseMethod("as.dagitty")
#' @export
as.dagitty.character <- function( x, ... ) dagitty( x )
#' @export
as.dagitty.default <- function( x, ... ){
if( class(x) == "dagitty" ){
x
} else {
stop("Cannot coerce object to class 'dagitty': ",x)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.