SEMID: Identifiability of Linear Structural Equation Models

Documented in ancestralID ancestralIdentifyStep createAncestralIdentifier getAncestors getDescendants getMaxFlow getMixedCompForNode getParents getSiblings graphID.ancestralID htr

#' Create an ancestral identification function.
#'
#' A helper function for \code{\link{ancestralIdentifyStep}}, creates an identifier function
#' based on its given parameters. This created identifier function will
#' identify the directed edges from 'targets' to 'node.'
#'
#' @inheritParams createHtcIdentifier
#' @param ancestralSubset an ancestral subset of the graph containing \code{node}.
#' @param cComponent a list corresponding to the connected component containing \code{node}
#'        in the subgraph induced by \code{ancestralSubset}. See
#'        \code{\link{tianDecompose}} for how such connected component lists are formed.
#'
#' @return an identification function
createAncestralIdentifier <- function(idFunc, sources, targets, node, htrSources,
    ancestralSubset, cComponent) {
    # Necessary 'redundant' assignments
    idFunc <- idFunc
    sources <- sources
    targets <- targets
    node <- node
    htrSources <- htrSources
    ancestralSubset <- sort(ancestralSubset)
    cComponent <- cComponent
    return(function(Sigma) {
        m <- nrow(Sigma)
        identifiedParams <- idFunc(Sigma)
        Lambda <- identifiedParams$Lambda

        topOrder <- cComponent$topOrder

        afterAnc <- function(x) {
            which(x == ancestralSubset)
        }
        topOrderAfterAnc <- sapply(topOrder, afterAnc)
        sourcesAfterAnc <- sapply(sources, afterAnc)
        targetsAfterAnc <- sapply(targets, afterAnc)
        htrSourcesAfterAnc <- sapply(htrSources, afterAnc)
        internalAfterAnc <- sapply(cComponent$internal, afterAnc)
        incomingAfterAnc <- sapply(cComponent$incoming, afterAnc)
        nodeAfterAnc <- afterAnc(node)

        SigmaAnc <- Sigma[ancestralSubset, ancestralSubset]
        SigmaAncTian <- tianSigmaForComponent(SigmaAnc, internalAfterAnc, incomingAfterAnc,
            topOrderAfterAnc)

        afterAncTian <- function(x) {
            which(x == topOrderAfterAnc)
        }
        sourcesAfterAncTian <- sapply(sourcesAfterAnc, afterAncTian)
        targetsAfterAncTian <- sapply(targetsAfterAnc, afterAncTian)
        htrSourcesAfterAncTian <- sapply(htrSourcesAfterAnc, afterAncTian)
        nodeAfterAncTian <- afterAncTian(nodeAfterAnc)

        LambdaAfterAncTian <- Lambda[ancestralSubset, ancestralSubset, drop = F]
        LambdaAfterAncTian <- LambdaAfterAncTian[topOrderAfterAnc, topOrderAfterAnc,
            drop = F]

        SigmaMinus <- SigmaAncTian
        for (source in htrSourcesAfterAncTian) {
            SigmaMinus[source, ] <- SigmaAncTian[source, ] - t(LambdaAfterAncTian[,
                source, drop = F]) %*% SigmaAncTian
        }

        if (abs(det(SigmaMinus[sourcesAfterAncTian, targetsAfterAncTian, drop = F])) <
            10^-10) {
            stop("In identification, found near-singular system. Is the input matrix generic?")
        }

        sols <- solve(SigmaMinus[sourcesAfterAncTian, targetsAfterAncTian, drop = F],
            SigmaMinus[sourcesAfterAncTian, nodeAfterAncTian, drop = F])

        undoAncTian <- function(x) {
            ancestralSubset[topOrderAfterAnc[x]]
        }
        Lambda[undoAncTian(targetsAfterAncTian), node] <- sols

        return(list(Lambda = Lambda, Omega = identifiedParams$Omega))
    })
}

#' Perform one iteration of ancestral identification.
#'
#' A function that does one step through all the nodes in a mixed graph
#' and tries to determine if directed edge coefficients are generically
#' identifiable by leveraging decomposition by ancestral subsets. See
#' Algorithm 1 of Drton and Weihs (2015); this version of the algorithm
#' is somewhat different from Drton and Weihs (2015) in that it also works
#' on cyclic graphs.
#'
#' @inheritParams htcIdentifyStep
#'
#' @return a list with four components:
#' \describe{
#'   \item{\code{identifiedEdges}}{a matrix rx2 matrix where r is the number
#'   of edges that where identified by this function call and
#'   \code{identifiedEdges[i,1] -> identifiedEdges[i,2]} was the ith edge
#'   identified}
#'   \item{\code{unsolvedParents}}{as the input argument but updated with
#'   any newly identified edges}
#'   \item{\code{solvedParents}}{as the input argument but updated with
#'   any newly identified edges}
#'   \item{\code{identifier}}{as the input argument but updated with
#'   any newly identified edges}
#' }
#'
#' @export
#'
#' @references
#' {Drton}, M. and {Weihs}, L. (2015) Generic Identifiability of Linear
#' Structural Equation Models by Ancestor Decomposition. arXiv 1504.02992
ancestralIdentifyStep <- function(mixedGraph, unsolvedParents, solvedParents, identifier) {
    identifiedEdges <- c()
    m <- mixedGraph$numNodes()
    ancestralComps <- rep(list(list()), m)
    solvedNodes <- which(sapply(unsolvedParents, FUN = function(x) {
        length(x) == 0
    }))
    for (i in 1:m) {
        unsolved <- unsolvedParents[[i]]
        if (length(unsolved) != 0) {

            if (length(ancestralComps[[i]]) == 0) {
                nodeAncestors <- mixedGraph$ancestors(i)
                ancGraph <- mixedGraph$inducedSubgraph(nodeAncestors)
                tianComp <- ancGraph$tianComponent(i)
                tianComp$mixedGraph <- MixedGraph(tianComp$L, tianComp$O, vertexNums = tianComp$topOrder)
                ancestralComps[[i]] <- tianComp
                ancestralComps[[i]]$ancestors <- nodeAncestors
            }

            nodeParents <- ancGraph$parents(i)

            # Using the first ancestor graph
            ancGraph <- ancestralComps[[i]]$mixedGraph
            htrFromNode <- ancGraph$htrFrom(i)
            allowedNodes <- setdiff(solvedNodes, ancGraph$siblings(i))
            allowedNodes <- union(setdiff(ancGraph$nodes(), htrFromNode), allowedNodes)
            allowedNodes <- intersect(ancGraph$nodes(), allowedNodes)
            if (length(allowedNodes) >= length(nodeParents)) {
                halfTrekSystemResult <- ancGraph$getHalfTrekSystem(allowedNodes,
                  nodeParents)
                if (halfTrekSystemResult$systemExists) {
                  identifiedEdges <- c(identifiedEdges, as.integer(rbind(nodeParents,
                    i)))
                  activeFrom <- halfTrekSystemResult$activeFrom
                  identifier <- createAncestralIdentifier(identifier, activeFrom,
                    nodeParents, i, intersect(activeFrom, htrFromNode), ancestralComps[[i]]$ancestors,
                    ancestralComps[[i]])
                  solvedParents[[i]] <- nodeParents
                  unsolvedParents[[i]] <- integer(0)
                  solvedNodes <- c(i, solvedNodes)
                }
                next
            }

            # Using the second ancestor graph
            nodeAncestors <- mixedGraph$ancestors(i)
            nodeAncSibs <- union(nodeAncestors, mixedGraph$siblings(nodeAncestors))
            allowedAncSibs <- intersect(nodeAncSibs, setdiff(union(solvedNodes, setdiff(1:m,
                mixedGraph$htrFrom(i))), c(i, mixedGraph$siblings(i))))
            ancGraph <- mixedGraph$inducedSubgraph(mixedGraph$ancestors(c(i, allowedAncSibs)))
            tianComp <- ancGraph$tianComponent(i)
            allowedNodes <- union(intersect(allowedAncSibs, tianComp$internal), tianComp$incoming)

            htrFromNode <- ancGraph$htrFrom(i)
            allowedNodes <- setdiff(solvedNodes, ancGraph$siblings(i))
            allowedNodes <- union(setdiff(ancGraph$nodes(), htrFromNode), allowedNodes)
            allowedNodes <- intersect(ancGraph$nodes(), allowedNodes)

            tianAncGraph <- MixedGraph(tianComp$L, tianComp$O, tianComp$topOrder)
            if (length(allowedNodes) >= length(nodeParents)) {
                halfTrekSystemResult <- tianAncGraph$getHalfTrekSystem(allowedNodes,
                  nodeParents)
                if (halfTrekSystemResult$systemExists) {
                  identifiedEdges <- c(identifiedEdges, as.integer(rbind(nodeParents,
                    i)))
                  activeFrom <- halfTrekSystemResult$activeFrom
                  identifier <- createAncestralIdentifier(identifier, activeFrom,
                    nodeParents, i, intersect(activeFrom, htrFromNode), ancGraph$nodes(),
                    tianComp)
                  solvedParents[[i]] <- nodeParents
                  unsolvedParents[[i]] <- integer(0)
                  solvedNodes <- c(i, solvedNodes)
                }
            }
        }
    }
    return(list(identifiedEdges = identifiedEdges, unsolvedParents = unsolvedParents,
        solvedParents = solvedParents, identifier = identifier))
}

#' Determines which edges in a mixed graph are ancestralID-identifiable
#'
#' Uses the an identification criterion of Drton and Weihs (2015); this version
#' of the algorithm is somewhat different from Drton and Weihs (2015) in that it
#' also works on cyclic graphs. The original version of the algorithm can be
#' found in the function \code{\link{graphID.ancestralID}}.
#'
#' @export
#'
#' @inheritParams generalGenericID
#' @inheritParams semID
#' @inheritParams ancestralIdentifyStep
#'
#' @return see the return of \code{\link{generalGenericID}}.
ancestralID <- function(mixedGraph, tianDecompose = T) {
    result <- generalGenericID(mixedGraph, list(ancestralIdentifyStep), tianDecompose = tianDecompose)
    result$call <- match.call()
    return(result)
}

#' Get getAncestors of nodes in a graph.
#'
#' Get the getAncestors of a collection of nodes in a graph g, the getAncestors DO
#' include the the nodes themselves.
#'
#' @param g the graph (as an igraph).
#' @param nodes the nodes in the graph of which to get the getAncestors.
#'
#' @return a sorted vector of all ancestor nodes.
getAncestors <- function(g, nodes) {
    if (vcount(g) == 0 || length(nodes) == 0) {
        return(numeric(0))
    }
    as.integer(sort(graph.bfs(g, nodes, mode = "in", unreachable = F)$order, na.last = NA))
    # sort(unique(unlist(neighborhood(g, vcount(g), nodes=nodes, mode='in'))))
}

#' Get getParents of nodes in a graph.
#'
#' Get the getParents of a collection of nodes in a graph g, the getParents DO include
#' the input nodes themselves.
#'
#' @param nodes the nodes in the graph of which to get the getParents.
#' @inheritParams getAncestors
#'
#' @return a sorted vector of all parent nodes.
getParents <- function(g, nodes) {
    if (vcount(g) == 0 || length(nodes) == 0) {
        return(numeric(0))
    }
    sort(unique(unlist(neighborhood(g, 1, nodes = nodes, mode = "in"))))
}

#' Get getSiblings of nodes in a graph.
#'
#' Get the getSiblings of a collection of nodes in a graph g, the getSiblings DO
#' include the input nodes themselves.
#'
#' @param nodes the nodes in the graph of which to get the getSiblings.
#' @inheritParams getAncestors
#'
#' @return a sorted vector of all getSiblings of nodes.
getSiblings <- function(g, nodes) {
    if (vcount(g) == 0 || length(nodes) == 0) {
        return(numeric(0))
    }
    sort(unique(unlist(neighborhood(g, 1, nodes = nodes, mode = "all"))))
}

#' Get descendants of nodes in a graph.
#'
#' Gets the descendants of a collection of nodes in a graph (all nodes that can
#' be reached by following directed edges from those nodes). Descendants DO
#' include the nodes themselves.
#'
#' @param nodes the nodes in the graph of which to get the descendants.
#' @inheritParams getAncestors
#'
#' @return a sorted vector of all descendants of nodes.
getDescendants <- function(g, nodes) {
    if (vcount(g) == 0 || length(nodes) == 0) {
        return(numeric(0))
    }
    as.integer(sort(graph.bfs(g, nodes, mode = "out", unreachable = F)$order,
        na.last = NA))
    # sort(unique(unlist(neighborhood(g, vcount(g), nodes=nodes, mode='out'))))
}

#' Get all HTR nodes from a set of nodes in a graph.
#'
#' Gets all vertices in a graph that are half-trek reachable from a set of
#' nodes.
#' WARNING: Often the half-trek reachable nodes from a vertex v are defined to
#' not include the vertex v or its getSiblings. We DO NOT follow this convention,
#' the returned set will include input nodes and their getSiblings.
#'
#' @inheritParams getMixedCompForNode
#' @param nodes the nodes in the graph of which to get the HTR nodes.
#'
#' @return a sorted list of all half-trek reachable nodes.
htr <- function(dG, bG, nodes) {
    if (!is.directed(dG) || is.directed(bG) || vcount(dG) != vcount(bG)) {
        stop("dG is undirected or bG is directed.")
    }
    if (vcount(dG) == 0 || length(nodes) == 0) {
        return(numeric(0))
    }
    return(getDescendants(dG, getSiblings(bG, nodes)))
}

#' Get the mixed component of a node in a mixed subgraph.
#'
#' For an input mixed graph H and set of nodes A, let GA be the subgraph of
#' H on the nodes A. This function returns the mixed component of GA containing
#' a specified node.
#'
#' @param dG a directed graph representing the directed part of the mixed graph.
#' @param bG an undirected graph representing the undirected part of the mixed
#'        graph.
#' @param subNodes an ancestral set of nodes in the mixed graph, this set should
#'        include the node for which the mixed component sould be found.
#' @param node the node for which the mixed component is found.
#' @return a list with two named elements:
#'          biNodes - the nodes of the mixed graph in the biDirected component
#'                    containing nodeName w.r.t the ancestral set of nodes
#'          inNodes - the nodes in the graph which are not part of biNodes
#'                    but which are a parent of some node in biNodes.
getMixedCompForNode <- function(dG, bG, subNodes, node) {
    VdG <- V(dG)
    VbG <- V(bG)
    m <- vcount(dG)
    if (is.null(VdG$names) || is.null(VbG$names) || any(VdG$names != 1:m) || any(VbG$names !=
        1:m)) {
        stop(paste("Input graphs to getMixedCompForNode must have vertices named",
            "1:m in order."))
    }

    ## This is a workaround of a graph.bfs bug in igraph
    ## versions 1.0.1 and before
    restricted = subNodes
    if (utils::packageVersion("igraph") <= "1.0.1") {
      restricted = restricted - 1
    }

    bidirectedComp <- as.integer(sort(graph.bfs(bG, root = node,
                                                restricted = restricted,
                                                mode = "total",
                                                unreachable = F)$order))

    bidirectedComp =
      as.integer(sort(graph.bfs(bG, root = node, restricted = restricted,
                                mode = "total", unreachable = F)$order))


    incomingNodes <- intersect(setdiff(getParents(dG, bidirectedComp), bidirectedComp),
        subNodes)
    return(list(biNodes = bidirectedComp, inNodes = incomingNodes))
}


#' Size of largest HT system Y satisfying the HTC for a node v except perhaps
#' having |getParents(v)| < |Y|.
#'
#' For an input mixed graph H, constructs the Gflow graph as described in Foygel
#' et al. (2012) for a subgraph G of H. A max flow algorithm is then run on
#' Gflow to determine the largest half-trek system in G to a particular node's
#' getParents given a set of allowed nodes. Here G should consist of a bidirected
#' part and nodes which are not in the bidirected part but are a parent of some
#' node in the bidirected part. G should contain the node for which to compute
#' the max flow.
#'
#' @param allowedNodes the set of allowed nodes.
#' @param biNodes the set of nodes in the subgraph G which are part of the
#'        bidirected part.
#' @param inNodes the nodes of the subgraph G which are not in the bidirected
#'        part but are a parent of some node in the bidirected component.
#' @param node the node (as an integer) for which the maxflow the largest half
#' trek system
#' @inheritParams graphID
#'
#' @return See title.
#'
#' @references
#' Foygel, R., Draisma, J., and Drton, M.  (2012) Half-trek criterion for
#' generic identifiability of linear structural equation models.
#' \emph{Ann. Statist.} 40(3): 1682-1713.
getMaxFlow <- function(L, O, allowedNodes, biNodes, inNodes, node) {
    if (!(node %in% biNodes) || any(O[biNodes, inNodes] != 0)) {
        stop(paste("When getting max flow either some in-nodes were connected to",
            "some bi-nodes or node was not in biNodes."))
    }
    if (length(intersect(allowedNodes, c(node, which(O[node, ] == 1)))) != 0) {
        stop("Allowed nodes contained getSiblings of input node or the node itself.")
    }
    allowedNodes <- union(allowedNodes, inNodes)
    m <- length(biNodes) + length(inNodes)

    if (m == 1) {
        return(0)
    }

    oldNumToNewNum <- numeric(m)
    oldNumToNewNum[biNodes] <- 1:length(biNodes)
    oldNumToNewNum[inNodes] <- (length(biNodes) + 1):m

    nodesToUse <- c(biNodes, inNodes)
    allowedNodes <- intersect(allowedNodes, nodesToUse)
    L[c(inNodes, biNodes), inNodes] <- 0
    O[inNodes, inNodes] <- 0
    L <- L[nodesToUse, nodesToUse]
    O <- O[nodesToUse, nodesToUse]

    # 1 & 2 = source & target 2 + {1,...,m} = L(i) for i=1,...,m 2 + m + {1,...,m} =
    # R(i)-in for i=1,...,m 2 + 2*m + {1,...,m} = R(i)-out for i=1,...,m

    Cap.matrix <- matrix(0, 2 + 3 * m, 2 + 3 * m)

    for (i in 1:m) {
        # edge from L(i) to R(i)-in, and to R(j)-in for all getSiblings j of i
        Cap.matrix[2 + i, 2 + m + c(i, which(O[i, ] == 1))] <- 1
        # edge from R(i)-in to R(i)-out
        Cap.matrix[2 + m + i, 2 + 2 * m + i] <- 1
        # edge from R(i)-out to R(j)-in for all directed edges i->j
        Cap.matrix[2 + 2 * m + i, 2 + m + which(L[i, ] == 1)] <- 1
    }

    allowedNodes <- oldNumToNewNum[allowedNodes]
    node <- oldNumToNewNum[node]
    Cap.matrix[1, 2 + allowedNodes] <- 1
    Cap.matrix[2 + 2 * m + which(L[, node] == 1), 2] <- 1

    return(igraph::graph.maxflow(igraph::graph.adjacency(Cap.matrix), source = 1,
        target = 2)$value)
}

#' Determine generic identifiability of an acyclic mixed graph using ancestral
#' decomposition.
#'
#' For an input, acyclic, mixed graph attempts to determine if the graph is
#' generically identifiable using decomposition by ancestral subsets. See
#' algorithm 1 of Drton and Weihs (2015).
#'
#' @export
#'
#' @inheritParams graphID
#'
#' @return The vector of nodes that could be determined to be generically
#' identifiable using the above algorithm.
#'
#' @references
#' {Drton}, M. and {Weihs}, L. (2015) Generic Identifiability of Linear
#' Structural Equation Models by Ancestor Decomposition. arXiv 1504.02992
graphID.ancestralID <- function(L, O) {
    m <- nrow(L)
    validateMatrices(L, O)
    O <- 1 * ((O + t(O)) != 0)

    dG <- igraph::graph.adjacency(L)
    newOrder <- as.integer(topological.sort(dG))
    L <- L[newOrder, newOrder]
    O <- O[newOrder, newOrder]

    dG <- igraph::graph.adjacency(L)
    bG <- igraph::graph.adjacency(O)
    V(dG)$names <- 1:m
    V(bG)$names <- 1:m

    # Generates a list where, for each node v, we have a vector corresponding to all
    # the nodes that could ever be in a half-trek system for v
    halfTrekSources <- vector("list", length = m)
    for (i in 1:m) {
        halfTrekSources[[i]] <- getSiblings(bG, getAncestors(dG, i))
    }

    # A matrix determining which nodes are half-trek reachable from each node
    Dependence.matrix <- O + diag(m)
    for (i in 1:m) {
        Dependence.matrix <- ((Dependence.matrix + Dependence.matrix %*% L) > 0)
    }

    Solved.nodes <- rep(0, m)
    Solved.nodes[which(colSums(L) == 0)] <- 1  # nodes with no getParents
    change <- 1
    count <- 1
    while (change == 1) {
        change <- 0

        Unsolved.nodes <- which(Solved.nodes == 0)
        for (i in Unsolved.nodes) {
            # A <- (Solved Nodes 'union' nodes not htr from i) \ ({i} 'union' sibs(i))
            A <- setdiff(c(which(Solved.nodes > 0), which(Dependence.matrix[i, ] ==
                0)), c(i, which(O[i, ] == 1)))
            # A <- A intersect (nodes that can ever be in a HT system for i)
            A <- intersect(A, halfTrekSources[[i]])

            mixedCompList <- getMixedCompForNode(dG, bG, getAncestors(dG, c(i, A)),
                i)
            flow <- getMaxFlow(L, O, A, mixedCompList$biNodes, mixedCompList$inNodes,
                i)
            if (flow == sum(L[, i])) {
                change <- 1
                count <- count + 1
                Solved.nodes[i] <- count
                next
            }

            mixedCompList <- getMixedCompForNode(dG, bG, getAncestors(dG, i), i)
            A <- intersect(A, unlist(mixedCompList))
            flow <- getMaxFlow(L, O, A, mixedCompList$biNodes, mixedCompList$inNodes,
                i)
            if (flow == sum(L[, i])) {
                change <- 1
                count <- count + 1
                Solved.nodes[i] <- count
                next
            }
        }
    }

    if (all(Solved.nodes == 0)) {
        Solved.nodes <- NULL
    } else {
        Solved.nodes <- order(Solved.nodes)[(1 + sum(Solved.nodes == 0)):m]
    }

    return(newOrder[Solved.nodes])
}
Lucaweihs/SEMID documentation built on July 22, 2023, 7:49 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
Lucaweihs/SEMID
Identifiability of Linear Structural Equation Models

R/ancestralID.R
In Lucaweihs/SEMID: Identifiability of Linear Structural Equation Models

Defines functions graphID.ancestralID getMaxFlow getMixedCompForNode htr getDescendants getSiblings getParents getAncestors ancestralID ancestralIdentifyStep createAncestralIdentifier

Documented in ancestralID ancestralIdentifyStep createAncestralIdentifier getAncestors getDescendants getMaxFlow getMixedCompForNode getParents getSiblings graphID.ancestralID htr

R Package Documentation

Browse R Packages

We want your feedback!

Lucaweihs/SEMID Identifiability of Linear Structural Equation Models

R/ancestralID.R In Lucaweihs/SEMID: Identifiability of Linear Structural Equation Models

Defines functions graphID.ancestralID getMaxFlow getMixedCompForNode htr getDescendants getSiblings getParents getAncestors ancestralID ancestralIdentifyStep createAncestralIdentifier

Documented in ancestralID ancestralIdentifyStep createAncestralIdentifier getAncestors getDescendants getMaxFlow getMixedCompForNode getParents getSiblings graphID.ancestralID htr

R Package Documentation

Browse R Packages

We want your feedback!

Lucaweihs/SEMID
Identifiability of Linear Structural Equation Models

R/ancestralID.R
In Lucaweihs/SEMID: Identifiability of Linear Structural Equation Models