R/community_graph.R

Defines functions prec2cov cov2prec make_precision_mat graph2prec make_graph scale_free erdos_renyi hub cluster band block edge_count eigGraph graphReport covReport

Documented in band block cluster cov2prec erdos_renyi graph2prec hub make_graph make_precision_mat prec2cov scale_free

#' Precision matrix (inverse covariance) to a covariance matrix
#'
#' @param Precision symmetric precision matrix
#' @param tol tolerance to define a zero eigenvalue (ie - is Prec positive definite)
#' @importFrom MASS ginv
#' @export
prec2cov <- function(Precision, tol=1e-4) {
    eigval <- eigen(Precision)$values
    if (any(eigval < tol)) {
        warning("Warning: Precision matrix not invertible, trying generalized inverse instead") 
        ginv(Precision)
    } else {
        solve(Precision)
    }
}

#' Covariance matrix to its matrix inverse (Precision matrix)
#'
#' @param Cov symmetric covariance matrix (can be correlation also)
#' @param tol tolerance to define a zero eigenvalue (ie - is Prec positive definite)
#' @importFrom MASS ginv
#' @export
cov2prec <- function(Cov, tol=1e-4) {
    Precision <- tryCatch(solve(Cov), error = function(e) { 
              warning("Warning: Precision matrix not invertible, trying generalized inverse instead") 
              ginv(Cov) })
    Precision[which(Precision == 0)] <- tol
}


#' Wrapper function for generating a Precision matrix
#'
#' @param method graph generation method
#' @param D graph dimension (number of OTUs)
#' @param u some parameter that controls Prec values
#' @param v some other parameter that controls Prec values
#' @export
make_precision_mat <- function(method, D, u, v, ...) {
    if (v <=0 || v >= 1) stop ("v must be between 0 and 1")
    Graph <- make_graph(method, D, ...)
    Precision <- graph2prec(Graph, u=u, v=v)
    return(list(Graph=Graph, Precision=Precision))
}

#' Convert a symmetric graph (extension of R matrix class)
#' 
#'  Has internal rules for converting various graph topologies into the associated
#' adjancency and, therefore, precision matrix
#'
#' @param u some parameter that controls Prec values
#' @param v some other parameter that controls Prec values
#' @export
graph2prec <- function(Graph, u=0.1, v=0.3, method) {

    if (class(Graph) != 'graph') stop('input is not a graph')
    n <- ncol(Graph)

    if (missing(method)) {
        method <- attr(Graph, "graph")
        if (is.null(method)) stop("Please supply graph gen method")
    }

    A <- Graph * v #adjacency matrix, multiply by arbitrary constant
    eigVals <- sort(eigen(A)$values)
    lam_min <- eigVals[1]
    
    
    if (method %in% c("scale_free", "hub")) {
        # Continuous scales
        D   <- diag(seq(1.5,1,length.out=n))
        Phi <- D %*%(A + (abs(lam_min)+u+0.1)*diag(n))%*%D
    } else if (method %in% c( "erdos_renyi", "cluster", "band")) {
        # two underlying scales
        D   <- diag(c(rep(1.5, floor(n/2)), rep(1, ceiling(n/2))))
        Phi <- Phi <- D %*%(A + (abs(lam_min)+u+0.1)*diag(n))%*%D
    } else if (method == "block") {
        # random Gaussian graph:
        temp <- matrix(0, n, n)
        temp[upper.tri(temp)] <- 1.5*sqrt(1/n)*rnorm(n*(n-1)/2)
        temp <- temp %*% t(temp)
        
        D <-diag(c(rep(1.5, floor(n/2)), rep(0.5, ceiling(n/2))))
        Phi = temp * Graph + D
    }
    return(Phi)
}


#' Procedure to generate graph topologies for Gaussian Graphical Models
#' @param method Type of graph to make
#' @param D Number of nodes/OTUs (Graph dimension)
#' @param e Number of edges (preferably sparse, must be at least 1/2 D)
#' @param enforce add/remove edges to enforce graph has e edges
#' @param ... additional options to graph method
#' @export
make_graph <- function(method, D, e, enforce=TRUE, ...) {
    if (e < round(D/2)) stop('Number of edges e must be bigger than 1/2 D')
    if (e>((D-1)*D)/2) stop('Number of edges e must smaller than D(D-1)/2')
    
    method <- switch(method, cluster = "cluster", erdos_renyi = "erdos_renyi", 
                       hub = "hub", scale_free = "scale_free", 
                       block = "block", band = "band", 
                       stop(paste("Error: graph method ", method, "not supported")))
    graphgen <- get(method)
    Graph    <- graphgen(D, e=e, ...)
    attr(Graph, "graph") <- method
    
    ## enforce edge number
    if (enforce) {
        nedges <- edge_count(Graph)
        diffE  <- nedges - e
        ind2sub <- function(m, ind) {
            r <- ((ind-1) %% m) + 1
            c <- floor((ind-1) / m) + 1
            return(c(r,c))
        }
        
        if (diffE > 0) {
            oneInds <- which(Graph == 1)
            oneInds <- oneInds[sample(1:length(oneInds), abs(diffE))]
            for (i in oneInds) {
                gind <- ind2sub(D, i)
                Graph[gind[1], gind[2]] <- Graph[gind[2], gind[1]] <- 0
            }
        } else if (diffE < 0) {
            diag(Graph) <- 1   # fill diag with dummy 1s
            zeroInds <- which(Graph == 0)
            zeroInds <- zeroInds[sample(1:length(zeroInds), abs(diffE))]
            for (i in zeroInds) {
                gind <- ind2sub(D, i)
                Graph[gind[1], gind[2]] <- Graph[gind[2], gind[1]] <- 1
            }
            diag(Graph) <- 0
        }
    }
    return(structure(Graph, class='graph'))
}

#' keywords internal
scale_free <- function(D, e, pfun) {
# Make a scale free graph
# Args:
#   D -   > The number of nodes
#   pfun -> override default probability of getting an edge

    if (e < D) stop("Too few edges to generate a scale-free graph, e>=D")

    #initialize D by D zero matrix
    Graph <- matrix(0, D, D)
    K_mat <- matrix(0, D)   # keep track of the number of degrees in the graph
    
    # pick first two nodes at random
    nodes <- sample(1:D, 2)
    #connect nodes
    Graph[nodes[1], nodes[2]] <- 1
    Graph[nodes[2], nodes[1]] <- 1
    # Add degree to nodes
    K_mat[nodes[1]] <- K_mat[nodes[1]] + 1
    K_mat[nodes[2]] <- K_mat[nodes[2]] + 1

    if (missing(pfun)) {
        pfun <- function(K_mat, i) {
            (K_mat[i] / sum(K_mat))
        }
    }

    newnodes <- setdiff(1:D, nodes)
    existnodes <- nodes
    while (length(newnodes) != 0) {  # For each node
        n1 <- na.exclude(newnodes)[1]
        for (n2 in existnodes)  { # and a node already in the graph
            p <- pfun(K_mat, n2)
            conn <- sample(c(TRUE, FALSE), 1, p=c(p, 1-p))
            if (conn) {
                 #connect nodes
                Graph[n1, n2] <- 1
                Graph[n2, n1] <- 1
                 # Add degree to nodes
                K_mat[n1] <- K_mat[n1] + 1
                K_mat[n2] <- K_mat[n2] + 1
                # add connected nodes to list, remove from new
                existnodes <- c(existnodes, n1)
                newnodes   <- setdiff(newnodes, n1)
                break
            } else {
                # move n1 to back of the list
                if (length(newnodes > 1)) newnodes <- newnodes[c(2:length(newnodes),1)]
            }
        }
    }
    return(Graph)
}

#' keywords internal
erdos_renyi <- function(D, e, p=e/(D*(D-1)/2)) {
# Make a random graph:
# Args:
#   D -> number of nodes
#   e -> Number of edges
#   p -> probability of edge
    if (missing(e)) stop("Must supply either number of edges (e)")
    Gtemp <- matrix(runif(D^2, 0,1), D, D)
    Gtemp <- ifelse(Gtemp < p, 1, 0)
    Gtemp[lower.tri(Gtemp, diag=TRUE)] <- 0
    Graph <- Gtemp + t(Gtemp)
    return(Graph)
}

#' keywords internal
hub <- function(D, e, numHubs=ceiling(D/20)) {
# Make hub graph
# Args:
# D   -> number of nodes
# e   -> number of edges, only for compatability
# numHubs -> number of hubs (divide nodes into this many groups)

    Graph   <- matrix(0, D, D)
    # partiion nodes into groups
    groupid  <- factor(sample(1:numHubs, D, replace=TRUE))
    groupind <- split(1:D, groupid)

    for (ind in groupind) {
        hub <- sample(ind, 1)   # pick 1 hub
        nodes <- setdiff(ind, hub)
        Graph[hub, nodes] <- 1
        Graph[nodes, hub] <- 1
    }
    attr(Graph, "g") <- numHubs
    return(Graph)
}

#' keywords internal
cluster <- function(D, e, numHubs=floor((D/15)+(e/D))-1) {
# Make a cluster graph (groups of random graphs)
# Args:
# D   -> number of nodes
# numHubs -> number of hubs (divide nodes into this many groups)
# d    -> number of edges
    Graph   <- matrix(0, D, D)
    # partiion nodes into groups
    groupid  <- factor(sample(1:numHubs, D, replace=TRUE))
    groupind <- split(1:D, groupid)
    eHub     <- ceiling(e/numHubs)
    for (ind in groupind) {
        nHub <- length(ind)
        p    <- eHub/(nHub*(nHub-1)/2)
        Graph[ind,ind] <- erdos_renyi(length(ind), e=eHub, p=p)
    }
    attr(Graph, "g") <- numHubs
    return(Graph)
}

#' keywords internal
band <- function(D, e) {
# Make a banded graph 
# Args:
#   D    -> number of nodes
#   e    -> target number of edges

    bestFit <- FALSE
    k <- 1
    Graph   <- matrix(0, D, D)
    zero    <- matrix(0, D, D)
    # add off diagonals until e is exhausted
    while (!bestFit) {
        off1    <- matrix(0, D, D)
        diag(off1[-(1:k),]) <- rep(1, D-k)
        off2    <- matrix(0, D, D)
        diag(off2[,-(1:k)]) <- rep(1, D-k)
        tempG <- Graph + off1 + off2
        if (sum(tempG)>(2*e)) bestFit = TRUE
        else Graph <- tempG
        k <- k + 1
    }
    return(Graph)
}

#' keywords internal
block <- function(D, e, numHubs) {  #blocksize=20, p=D/((D/blocksize)*(blocksize*(blocksize)/2)), u=NULL, v=NULL) {
# Make precision matrix for block graph (note the difference from other functions)
# Args:
#   D    -> Number of nodes
#   e    -> target number of edges
#   numHubs -> number of hubs, calculate best number if missing

    if (missing(numHubs)) {
        bestFit <- Inf
        for (numHubs in 2:(D/2)) {
            nHubs   <- round(D/numHubs)
            currFit <- abs(nHubs*(nHubs-1)/2*numHubs - e)
            if (currFit < bestFit) {
                bestNumHubs <- numHubs
                bestNHubs   <- nHubs
                bestFit     <- currFit
            }
        }
        numHubs <- bestNumHubs
    }

    Graph <- matrix(0, D, D)
   # partition nodes into blocks
    blockid  <- factor(sample(1:numHubs, D, replace=TRUE))
    blockind <- split(1:D, blockid)
  #  hubs <- round(seq.int(1,D+1, length.out=numHubs+1))
    for (ind in blockind) {
        nHubs <- length(ind)
        Ghub  <- matrix(1, nHubs, nHubs)
        Ghub[seq(1, nHubs^2, nHubs+1)] <- 0
        Graph[ind,ind] <- Ghub
    }
    return(Graph)
}


#' @keywords internal
edge_count <- function(Graph) {
    # return number of edges in a [square matrix] graph
    length(which(Graph[upper.tri(Graph)] != 0))
}

#' @keywords internal
eigGraph <- function(G, tol=1e-12) {
    D <- diag(rowSums(G))
    L <- D-G
    eigsG   <- eigen(L)$values
    specGap <- eigsG[which(eigsG > tol)]
    nCC <- sum(eigsG < tol)
    return(list(specGap=specGap, nCC=nCC, eigsG=eigsG))
}

#' @keywords internal
graphReport <- function(Graph) {
    nedges   <- edge_count(Graph)
    eigGraph <- eigGraph(Graph)
    return(c(list(NumEdges=nedges), eigGraph))
}

#' @keywords internal
covReport <- function(Cov, Prec) {
    condCov <- kappa(Cov)
    condPrec <- kappa(Prec)
    # Stats over correlations
    corr  <- cov2cor(Cov)
    corr <- corr[corr <0.999]
    corr <- corr[corr != 0]
    minC  <- min(corr)
    meanC <- mean(corr)
    medianC <- median(corr)
    maxC  <- max(corr)
    corrStats <- t(data.frame(minC, meanC, medianC, maxC))
    return(list(condCov=condCov, condInvCov=condPrec, corrStats=corrStats))
}
zdk123/codmeR documentation built on May 4, 2019, 10:14 p.m.