R/conversion-helpers.R
In OncoSimulR: Forward Genetic Simulation of Cancer Progression with Epistasis

## Copyright 2013, 2014, 2015 Ramon Diaz-Uriarte

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.


## Posets, restriction tables, etc.


## This is all too complicated. Have here just the minimal set we need,
## because we will soon be changing formats.



## When we go poset -> rT or
##  adjMat -> rT

## we have there all there is to be, or the poset follows the usual
## (messy) rules. In particular, CBN does understand that OK. And a linear
## poset, for example, places all nodes there. And CBN returns all nodes.


## Problem is generating a poset from an adjmat that is the output of OT,
## since that adjmat need not contain all nodes, as OT sometimes drops
## some of them. So we need to preserve names AND when we write the poset,
## we need to specify, as we always do, the correct number of columns.
## Thus, we use the special intAdjMatToPosetPreserveNames.
## Change name! OTtoPoset


## In newer, future, versions of OncoSimulR all this will be cleared out
##    by having posets that are strict. This means:

##  - if we pass things to CBN, we will clean that to the messy format in
##  write.poset.

## And we will relax adjMats, to allow for non-integer names? Or pass that
## as additional labels, so we can interchange with posets without ambiguity?

## adjMatToPoset is not really used for anything at all in the code now.

## Remember from adjmat we go to rT directly.

## Now, posetToGraph also includes checking strict adjMat, except if
## called from plotPoset, where arbitrary labels can be used.





checkProperMinimalAdjMat <- function(x,
                                     rootNames = c("0", "root", "Root"),
                                     root = TRUE,
                                     orderedNames = TRUE) {

    ## We go from poset to rt through adjmat, but the adjmat has no root,
    ## as the rT has no root. But this must have ordered names, as any
    ## poset uses only integers from 1 to whatever.

    ## Any adjmat should fulfill what follows, with the option of not
    ## checking for root and for orderedNames
    
    if(is.null(colnames(x)) && is.null(rownames(x)))
        stop("column and/or row names are null")
    if(!identical(colnames(x), rownames(x)))
        stop("colnames and rownames not identical")
    if(!is.integer(x))
        stop("Non-integer values")
    if( !all(x %in% c(0L, 1L) ))
        stop("Values not in {0L, 1L}")
    posRoot <- which(colnames(x) %in% rootNames)
    cs <- colSums(x)
    if(root) {
        ## only applies to rooted adj mat, as a matrix without root where
        ## all descend from root will have not a single 1
        if(any( (cs + rowSums(x)) < 1))
            stop("You have nodes that have no incoming and no outgoing connections")
    }
    if(length(posRoot) > 0) {
        if(length(posRoot) > 1)
            stop("Ambiguous location of root")
        if( posRoot != 1)
            stop("Root must be in first row and column")
        if(cs[1] > 0)
            stop("Nothing can have an edge to Root")
        cs <- cs[-1]
        if(any(cs < 1))
            stop(paste("In trees with Root,",
                       " there must be at least one incoming edge to every node (except Root)"))
        if(!root)
            stop("You said there is no root, but there is one")
    } else {
        if(root)
            stop("No Root, and you said one should be present")
    }

    if(orderedNames) {
        if(length(posRoot) == 1) {
            ncx <- ncol(x) - 1
            scn <- sort(as.integer(colnames(x)[-1]))
        }
        else {
            ncx <- ncol(x)
            scn <- sort(as.integer(colnames(x)))
        }
        if(!identical(scn, seq.int(ncx)))
            stop(paste("Either non-integer column names,",
                       " or non-successive integers, ",
                       " or not sorted integer names") )
    }
}


checkProperFullAdjMat <- function(x,
                              rootNames = c("0", "root", "Root")) {
    checkProperMinimalAdjMat(x, rootNames = rootNames,
                             root = TRUE, orderedNames = TRUE)
}


checkProperOTAdjMat <- function(x) {
    ## oncotree ALWAYS returns things that have a rootName that says "Root"
    checkProperMinimalAdjMat(x, rootNames = "Root", root = TRUE,
                             orderedNames = FALSE)
}



poset.to.restrictTable <- function(x) {
    x1 <- posetToGraph(x, names = 1:max(x), addroot = FALSE, type = "adjmat")
    adjmat.to.restrictTable(x1, root = FALSE,
                            rootNames = c("0", "root", "Root"))
}


adjmat.to.restrictTable <- function(x, root = FALSE,
                                    rootNames = c("0", "root", "Root")) {

    checkProperMinimalAdjMat(x, rootNames = rootNames,
                             root = root,
                             orderedNames = TRUE)
    if(root)
        x <- x[-1, -1, drop = FALSE]
    ## ## we have the zero
    ## if( any(colnames(x) %in% c("0", "root", "Root")) & !root)
    ##     warning("Looks like the matrix has a root but you specified root = FALSE")

    ## if(!identical(colnames(x), rownames(x)))
    ##     stop("colnames and rownames not identical")
    ## if(root) {
    ##     posRoot <- which(colnames(x) %in% rootNames)
    ##     if(!length(posRoot))
    ##         stop("No column with the root name")
    ##     if(length(posRoot) > 1)
    ##         stop("Ambiguous location of root")
    ##     x <- x[-posRoot, -posRoot]
    ## }

    ## if(typeof(x) != "integer")
    ##     warning("This is not an _integer_ adjacency matrix")
    ## if( !all(x %in% c(0, 1) ))
    ##     stop("Values not in [0, 1]")

    ## if(!is.null(colnames(x))) {
    ##     ## FIXME: this makes sense with numeric labels for columns, but
    ##     ## not ow.
    ##     oi <- order(as.numeric(colnames(x)))
    ##     if(any(oi != (1:ncol(x)))) {
    ##         warning("Reordering adjacency matrix")
    ##         x <- x[oi, oi]
    ##     }
    ## }
    
    num.deps <- colSums(x)
    max.n.deps <- max(num.deps)
    rt <- matrix(-9L, nrow = nrow(x),
                 ncol = max.n.deps + 2)
    for(i in 1:ncol(x)) {
        if( num.deps[ i ])
            rt[i , 1:(2 + num.deps[ i ])] <- c(i, num.deps[i ],
                                               which(x[, i ] != 0))
        else
            rt[i , 1:2] <- c(i , 0L)
    }
    class(rt) <- "restrictionTable"
    return(rt)
}




## this preserves the names in the adj mat. For OT -> poset.
## intAdjMatToPosetPreserveNames <- function(x, dropRoot = TRUE) {

OTtoPoset <- function(x) {
    checkProperOTAdjMat(x)

    ## ## root must always be dropped, as we are creating a poset
    ## dropRoot <- TRUE
    ## if(!dropRoot)
    ##     warning("Are you sure you do not want dropRoot?")
    ## if(dropRoot) {
    ##     ncx <- ncol(x)
    ##     x <- x[-1, -1]
    ##     ## y <- (which(x == 1L, arr.ind = TRUE) )
    ##     ## if(nrow(y) == 0) ## all nodes descend from 0
    ##     ##     y <- cbind(0L, ncx - 1L)
    ##     namesInts <- type.convert(colnames(x), as.is = TRUE)
        
    ## } else {
    ##     ## y <- (which(x == 1L, arr.ind = TRUE) )
    ##     namesInts <- c(0L, type.convert(colnames(x)[-1], as.is = TRUE))
    ## }

    ncx <- ncol(x)
    x <- x[-1, -1, drop = FALSE]
    ## y <- (which(x == 1L, arr.ind = TRUE) )
    ## if(nrow(y) == 0) ## all nodes descend from 0
    ##     y <- cbind(0L, ncx - 1L)
    namesInts <- type.convert(colnames(x), as.is = TRUE)
    
    if(!is.integer(namesInts))
        stop("cannot convert to poset adj mat with non-int colnames")
    y <- (which(x == 1L, arr.ind = TRUE) )
    if(nrow(y) == 0) ## all nodes descend from 0
        y <- cbind(0L, ncx - 1L)

    p2 <- cbind(namesInts[ y[, 1] ], namesInts[ y[, 2] ])
    storage.mode(p2) <- "integer"
    if(ncol(p2) == 1) ## the hack for when all from root
        p2 <- matrix(nrow = 0, ncol = 2)
    return(p2)
}    

## the next is just a convenience
## sortAdjMat <- function(am) {
##     cn <- colnames(am)
##     rootpos <- grep("^Root$", cn) 
##     if(length(rootpos) != 1)
##         stop("No root in adj mat, or multiple Roots")
##     cn <- c("Root", sort(colnames(am)[-rootpos]))
##     return(am[cn, cn])
## }


## No longer used
## sortAdjMat <- function(am) {
##     ## If column names, except Root, are integers, sort as integers. O.w.,
##     ## general lexicog. sort.
##     cn <- colnames(am)
##     rootpos <- grep("^Root$", cn) 
##     if(length(rootpos) != 1)
##         stop("No root in adj mat, or multiple Roots")
##     cn0 <- colnames(am)[-rootpos]
##     namesInts <- type.convert(cn0, as.is = TRUE)
##     if(is.integer(namesInts)) {
##         cn <- c("Root", sort(namesInts))
##     } else {
##         cn <- c("Root", sort(cn0))
##     }
##     return(am[cn, cn])
## }




convertRestrictTable <- function(x) {
    ## Only used to put the restrictTable in the format that the C++ code
    ## uses.
    t.restrictTable <- matrix(as.integer(x),
                              ncol = nrow(x), byrow = TRUE)

    t.restrictTable[-2, ] <- t.restrictTable[-2, ] - 1
    return(t.restrictTable)
}


posetToGraph <- function(x, names,
                         addroot = FALSE,
                         type = "graphNEL",
                         strictAdjMat = TRUE) {

    ## Using addroot = FALSE returns adjacency matrices without root,
    ## which is not neat.
    
    ## But this is a complicated mess that can lead to confusion. And
    ## cannot easily handle posets with names of nodes. So I fix all that.
    
    ## Intermediate nodes, if no ancestor or descendant, need not
    ## be in the poset.
    ## Any node with index largest than any node with ancestor or descendant
    ## needs to be in the file.

    ## E.g., rbind(c(1,2), c(4, 5)) will get three as a child-less and
    ## parent-less node.  But to place 6 you need c(6, NA)

    
    ## We could use something like in run.oncotree,
    ## as a poset also is a set of parent-child,
    ## and we would then need to add the root connections
    ## as is done below in no.ancestor.

    ## But we do not for now. Note we show lonely nodes, which oncotrees
    ## do not.  wait: when using root, we do not have "lonely nodes"
    ## anymore.  But that is irrelevant for metrics based on transitive
    ## closure. Not for Diff, etc.

    ## In fact, this is all OK, but is confussing, because I can
    ## have two kinds of posets: ones that are full, with NAs, etc, if
    ## needed. Others that are not, but are fixed by the code here. But if
    ## using the later, the user needs to make sure that the last node is
    ## in the poset. This can be used as a shortcut trick, but in the docs
    ## I do not do it, as it is bad practice.

    if(!is.integer(x))
        stop("poset contains non-integers")
    
    m <- length(names) 
    m1 <- matrix(0L, nrow = m, ncol = m)
    colnames(m1) <- names
    rownames(m1) <- names
    if(is.null(dim(x)) ) {
        if(length(x) != 1)
            stop("If poset is not a matrix, it must be a vector of length 1")
    } else { ## a matrix so a non-null poset
        ## a debugging check. 
        if(nrow(x) > 0) {
            if(addroot) {
                if(max(x, na.rm = TRUE) != (m - 1))
                    stop("\n in poset, max(x) != (m - 1)")
            } else { ## this was missing!
                if(max(x, na.rm = TRUE) != m)
                    stop("\n in poset, max(x) != m")
            }
        }
        if(nrow(x) > 0) {
            if(addroot)
                m1[x + 1] <- 1L
            else
                m1[x] <- 1L ## this will remove all entries with a 0
                            ## index. So posets where explicit the dep. on
                            ## 0.
        }
        if((length(names) > 1) & addroot) {
            no.ancestor <- which(apply(m1, 2, function(x) all(x == 0)))
            no.ancestor <- no.ancestor[-1]
            m1[cbind(1, no.ancestor)] <- 1L
        } ## o.w. do nothing
    }
    if(addroot)
        m1[1, 1] <- 0L
    ## yes, we must skip this check when plotting a poset (because we can
    ## have arbitrary non-integer names )
    if(strictAdjMat)
        checkProperMinimalAdjMat(m1, root = addroot)
    
    if(type == "adjmat") return(m1)
    else if (type == "graphNEL") return(as(m1, "graphNEL"))
    ## does not show the labels
    ## else if (type == "igraph") return(graph.adjacency(m1))
}

Any scripts or data that you put into this service are public.

OncoSimulR documentation built on Nov. 8, 2020, 8:31 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

OncoSimulR
Forward Genetic Simulation of Cancer Progression with Epistasis

R/conversion-helpers.R
In OncoSimulR: Forward Genetic Simulation of Cancer Progression with Epistasis

Defines functions posetToGraph convertRestrictTable OTtoPoset adjmat.to.restrictTable poset.to.restrictTable checkProperOTAdjMat checkProperFullAdjMat checkProperMinimalAdjMat

Try the OncoSimulR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

OncoSimulR Forward Genetic Simulation of Cancer Progression with Epistasis

R/conversion-helpers.R In OncoSimulR: Forward Genetic Simulation of Cancer Progression with Epistasis

Defines functions posetToGraph convertRestrictTable OTtoPoset adjmat.to.restrictTable poset.to.restrictTable checkProperOTAdjMat checkProperFullAdjMat checkProperMinimalAdjMat

Try the OncoSimulR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

OncoSimulR
Forward Genetic Simulation of Cancer Progression with Epistasis

R/conversion-helpers.R
In OncoSimulR: Forward Genetic Simulation of Cancer Progression with Epistasis