R/network-methods.R

Defines functions make_network

Documented in make_network

################################################################################
#' Make microbiome network (igraph)
#'
#' A specialized function for creating a network representation of microbiomes,
#' sample-wise or taxa-wise,
#' based on a user-defined ecological distance and (potentially arbitrary) threshold.
#' The graph is ultimately represented using the 
#' \code{igraph}-package.
#'
#' @usage make_network(physeq, type="samples", distance="jaccard", max.dist = 0.4, 
#'     keep.isolates=FALSE, ...)
#'
#' @param physeq (Required). Default \code{NULL}. 
#'  A \code{\link{phyloseq-class}} object,
#'  or \code{\link{otu_table-class}} object,
#'  on which \code{g} is based. \code{phyloseq-class} recommended.
#'
#' @param type (Optional). Default \code{"samples"}.
#'  Whether the network should be samples or taxa/OTUs.
#'  Supported arguments are \code{"samples"}, \code{"taxa"},
#'  where \code{"taxa"} indicates using the OTUs/taxaindices,
#'  whether they actually represent species or some other taxonomic rank.
#'
#'  NOTE: not all distance methods are supported if \code{"taxa"}
#'  selected for type. For example, the UniFrac distance and DPCoA
#'  cannot be calculated for taxa-wise distances, because they use
#'  a taxa-wise tree as part of their calculation between samples, and
#'  there is no transpose-equivalent for this tree.
#'
#' @param distance (Optional). Default \code{"jaccard"}.
#'  Any supported argument to the \code{method} parameter of the 
#'  \code{\link{distance}} function is supported here.
#'  Some distance methods, like \code{"unifrac"}, may take 
#'  a non-trivial amount of time to calculate, in which case
#'  you probably want to calculate the distance matrix separately,
#'  save, and then provide it as the argument to \code{distance} instead.
#'  See below for alternatives).
#'
#'  Alternatively, if you have already calculated the sample-wise distance
#'  object, the resulting \code{dist}-class object
#'  can be provided as \code{distance} instead (see examples).
#' 
#'  A third alternative is to provide a function that takes 
#'  a sample-by-taxa matrix (typical vegan orientation)
#'  and returns a sample-wise distance
#'  matrix. 
#' 
#' @param max.dist (Optional). Default \code{0.4}. 
#'  The maximum ecological distance (as defined by \code{distance})
#'  allowed between two samples to still consider them ``connected'' 
#'  by an edge in the graphical model.
#' 
#' @param keep.isolates (Optional). Default \code{FALSE}. Logical.
#'  Whether to keep isolates (un-connected samples, not microbial isolates)
#'  in the graphical model that is returned. Default results in isolates
#'  being removed from the object.
#'
#' @param ... (Optional). Additional parameters passed on to \code{\link{distance}}.
#'
#' @return A \code{igraph}-class object. 
#' 
#' @seealso 
#'  \code{\link{plot_network}}
#'
#' @importFrom igraph graph.adjacency
#' @importFrom igraph V
#' @importFrom igraph delete.vertices
#' @importFrom igraph degree
#' @importFrom igraph vcount
#'
#' @export
#'
#' @examples
#' # # Example plots with Enterotype Dataset
#' data(enterotype)
#' ig <- make_network(enterotype, max.dist=0.3)
#' plot_network(ig, enterotype, color="SeqTech", shape="Enterotype", line_weight=0.3, label=NULL)
#' # 
#' ig1 <- make_network(enterotype, max.dist=0.2)
#' plot_network(ig1, enterotype, color="SeqTech", shape="Enterotype", line_weight=0.3, label=NULL)
#' # 
#' # # Three methods of choosing/providing distance/distance-method
#' # Provide method name available to distance() function
#' ig <- make_network(enterotype, max.dist=0.3, distance="jaccard")
#' # Provide distance object, already computed
#' jaccdist <- distance(enterotype, "jaccard")
#' ih <- make_network(enterotype, max.dist=0.3, distance=jaccdist)
#' # Provide "custom" function.
#' ii <- make_network(enterotype, max.dist=0.3, distance=function(x){vegan::vegdist(x, "jaccard")})
#' # The have equal results:		
#' all.equal(ig, ih)
#' all.equal(ig, ii)
#' # 
#' # Try out making a trivial "network" of the 3-sample esophagus data,
#' # with weighted-UniFrac as distance
#' data(esophagus)
#' ij <- make_network(esophagus, "samples", "unifrac", weighted=TRUE)
make_network <- function(physeq, type="samples", distance="jaccard", max.dist = 0.4, 
	keep.isolates=FALSE, ...){

	if( type %in% c("taxa", "species", "OTUs", "otus", "otu")){
    # Calculate or asign taxa-wise distance matrix
	  if( class(distance) == "dist" ){ 
	    # If distance a distance object, use it rather than re-calculate
	    obj.dist <- distance
	    if( attributes(obj.dist)$Size != ntaxa(physeq) ){
	      stop("ntaxa(physeq) does not match size of dist object in distance")
	    }
	    if( !setequal(attributes(obj.dist)$Labels, taxa_names(physeq)) ){
	      stop("taxa_names does not exactly match dist-indices")
	    }
	  } else if( class(distance) == "character" ){ 
	    # If character string, pass on to distance(), assume supported
	    obj.dist <- distance(physeq, method=distance, type=type, ...)
	    # Else, assume a custom function and attempt to calculate.
	  } else { 	
	    # Enforce orientation for taxa-wise distances
	    if( !taxa_are_rows(physeq) ){ physeq <- t(physeq) }
	    # Calculate distances
	    obj.dist <- distance(as(otu_table(physeq), "matrix"))	    
	  }
	    # coerce distance-matrix back into vanilla matrix, Taxa Distance Matrix, TaDiMa
	    TaDiMa  <- as.matrix(obj.dist)
	    # Add Inf to the diagonal to avoid self-connecting edges (inefficient)
	    TaDiMa <- TaDiMa + diag(Inf, ntaxa(physeq), ntaxa(physeq))
	    # Convert distance matrix to coincidence matrix, CoMa, using max.dist
		CoMa <- TaDiMa < max.dist   
	} else if( type == "samples" ){
    # Calculate or asign sample-wise distance matrix
	  if( class(distance) == "dist" ){ # If argument is already a distance matrix.
	    # If distance a distance object, use it rather than re-calculate
	    obj.dist <- distance
	    if( attributes(obj.dist)$Size != nsamples(physeq) ){
	      stop("nsamples(physeq) does not match size of dist object in distance")
	    }
	    if( !setequal(attributes(obj.dist)$Labels, sample_names(physeq)) ){
	      stop("sample_names does not exactly match dist-indices")
	    }
	    # If character string, pass on to distance(), assume supported    	
	  } else if( class(distance) == "character" ){
	    # Else, assume a custom function and attempt to calculate. 
	    obj.dist <- distance(physeq, method=distance, type=type, ...)
	  } else { 
	    # Enforce orientation for sample-wise distances
	    if(taxa_are_rows(physeq)){ physeq <- t(physeq) }
	    # Calculate distances
	    obj.dist <- distance(as(otu_table(physeq), "matrix"))	    
	  }
	  # coerce distance-matrix back into vanilla matrix, Sample Distance Matrix, SaDiMa
	  SaDiMa  <- as.matrix(obj.dist)
	  # Add Inf to the diagonal to avoid self-connecting edges (inefficient)
	  SaDiMa <- SaDiMa + diag(Inf, nsamples(physeq), nsamples(physeq))
	  # Convert distance matrix to coincidence matrix, CoMa, using max.dist
	  CoMa <- SaDiMa < max.dist  
	} else {
		stop("type argument must be one of \n (1) samples \n or \n (2) taxa")
	}
	# Calculate the igraph-formatted network
	ig <- graph.adjacency(CoMa, mode="lower")
	if( !keep.isolates ){
	  # If not-keeping isolates, remove them
	  isolates <- V(ig)[degree(ig) == 0]
	  ig = delete.vertices(ig, V(ig)[degree(ig) == 0])
	}
	if( vcount(ig) < 2 ){
	  # Report a warning if the graph is empty
	  warning("The graph you created has too few vertices. Consider changing `max.dist` argument, and check your data.")
	}      
	return(ig)  
}
################################################################################
joey711/phyloseq documentation built on Nov. 4, 2022, 1:16 a.m.