Nothing
#' Conduct Network analysis based on tax summary object
#'
#' @param taxobj tax summary objects computed by \code{\link{tax_summary}}.
#' @param taxlevel taxonomy levels used for analysis. Must be one of c("Domain","Phylum","Class","Order","Family","Genus","Species","Base")
#' @param reads Logical,default:FALSE. Taxonomy abundance type used in analysis.FALSE for relative abundance, TRUE for absolute abundance.
#' @param n Numeric. Number of sample size indicating kept asv/otu/gene/taxa appearing. Recommended to set more than half of total sample size.
#' @param threshold Numeric.Threshold of absolute correlation value (r value for pearson method and rho value for spearman method).
#' @param rel_threshold Numeric.Threshold of relative abundance included in the network analysis.Default:0
#' @param method Character, default: "spearman". A character indicating which correlation coefficient method to be computed. One of "pearson" or "spearman"
#' @param display Logical, default:TRUE. If display a preview plot of network based on igraph. FALSE for the first attempt is recommended in case of too many vertices and edges.
#' @return One list contains nodes information table, adjacency column table, adjacency matrix and 'igraph' object.
#' @export
#'
#' @details
#' 1. We had optimized the correlation algorithm to achieve a faster running speed. It takes less than 2 minute to calculate dataframe correlation and p value which more than 400 samples and 10000 OTUs for computer with dual Core i5 processor.
#' However, too many vertices(>2000) or links(>10000) may slow the statistical process and visualization,so we recommend that in your first attempt,set `display` paramter as `F` to have a preview.
#' Then you can adjust your n/threshold/method paramter to generate a suitable visualization network
#'
#' 2. We display a preview plot so as to adjusting your network. Generally a global figure (like we show in examples) with less than 1000 vertices and 5000 edges/links
#' is recommended. Further more,we recommend you to output the statistics and adjacency table and use software like cytoscape or gephi for better visualization.
#'
#' @note
#' 1. Replicates should be at least 5,more than 8 is recommend.
#'
#' 2. In case of too many edges/links or not a global network plot, you can stop the process immediately to provent wasting too much time.
#'
#' @import igraph
#' @importFrom Hmisc rcorr
#' @importFrom dplyr left_join
#' @importFrom fdrtool fdrtool
#' @examples
#' {
#' ### Data preparation ###
#' data("Two_group")
#' set.seed(999)
#' ## Analysis
#' network_results <- network_analysis(
#' taxobj = Two_group,
#' taxlevel = "Genus",
#' n = 10,
#' threshold = 0.8
#' )
#'
#' # Nodes information table
#' network_nodes <- network_results$Nodes_info
#' head(network_nodes)
#'
#' # Adjacency table
#' network_adjacency <- network_results$Adjacency_column_table
#' head(network_adjacency)
#'
#' # Complete adjacency matrix
#' network_matrix <- network_results$Adjacency_matrix
#' print(network_matrix[1:10, 1:10])
#'
#' # igraph object
#' igraph_object <- network_results$Igraph_object
#' network_stat(igraph_object) # In case you want to see statistics again
#' # or do other analysis based on igraph.
#' }
network_analysis<-function(taxobj,taxlevel,reads=FALSE,n,threshold,rel_threshold=0,method="spearman",display=TRUE){
if(!taxlevel %in% c("Domain","Kingdom","Phylum","Class","Order","Family","Genus","Species","Base")){
stop("Illegal 'taxlevel',please choose among c('Domain','Kingdom','Phylum','Class','Order','Family','Genus','Species','Base'")
}
if(reads==FALSE){
input0=eval(parse(text=paste0("taxobj","$",taxlevel,"_percent")))
}else{
input0=eval(parse(text=paste0("taxobj","$",taxlevel)))
}
taxonomy=eval(parse(text=paste0("taxobj","$",taxlevel,"_taxonomy")))
input1=data.frame(input0[,-1],row.names =taxonomy[,1])
if(reads==FALSE){
if((which(rowMeans(input1)>rel_threshold) %>% length())==0){
stop("'rel_threshold' was too high!Please set lower!" )
}
input1=input1[rowMeans(input1)>rel_threshold,]
}else{
if((which(rowMeans(sweep(input1,colSums(input1),"/",MARGIN=2))>rel_threshold) %>% length())==0){
stop("'rel_threshold' was too high!Please set lower!" )
}
input1=input1[rowMeans(sweep(input1,colSums(input1),"/",MARGIN=2))>rel_threshold,]
}
zero_count=function(input){length(which(input==0)) %>% return()}
zerocount=apply(input1,1,zero_count)
input1=input1[which(zerocount<=(ncol(input1)-n)),]
input1=input1[which(rowSums(input1)>0),]
##network analysis
corr=rcorr(as.matrix(t(input1)),type=method)
cor.p=corr$P;cor.p[is.na(cor.p)]<- 0
fdr=fdrtool(as.numeric(cor.p), statistic="pvalue",plot=FALSE,verbose = FALSE) ##Global fdr correlation##
cor.r=corr$r
cor.q=matrix(fdr$qval,ncol=ncol(cor.p),nrow=nrow(cor.p));cor.q[is.nan(cor.q)]<- 0
cor.r[cor.q>0.05|abs(cor.r)<threshold] = 0 ##fliter via threshold##
cor.r[is.na(cor.r)]<-0 ##NA变0##
cor.r[which(cor.r>0.9999)]<-0 ###对角线的1变成0##
cor.r1=cor.r;cor.r1[which(cor.r1!=0)]<-1 ##无方向矩阵用于igraph##
cutoff=which(rowSums(cor.r1)== 0)
if(length(cutoff)==0){cor.r=cor.r;cor.r1=cor.r1}else{
cor.r=cor.r[-c(cutoff),-c(cutoff)];cor.r1=cor.r1[-c(cutoff),-c(cutoff)]}
cor.r1[which(cor.r>0)]<-1;cor.r1[which(cor.r<0)]<- -1;
adj_matrix=cor.r1
cor.r1[lower.tri(cor.r1,diag = TRUE)]<-NA
###creat adjacency###
source<-rownames(cor.r1)
adjacency<- data.frame(source,cor.r1) %>%
gather("target","value",-source)
adjacency<- adjacency[adjacency$value %in% c(1,-1),]
adjacency$value=as.numeric(adjacency$value)
igraph1<-graph_from_data_frame(adjacency,directed = FALSE)##全局输出##
network_stat(igraph1)
if(length(E(igraph1))>10000){warning("\n too many edges/links!Better STOP the process")}
if(isTRUE(display)){
plot(igraph1,main="Co-occurrence network",vertex.frame.color=NA,vertex.label=NA,edge.width=1,
vertex.size=5,edge.lty=1,edge.curved=TRUE,margin=c(0,0,0,0))}
V(igraph1)$degree=igraph::degree(igraph1)
V(igraph1)$modularity <- membership(cluster_fast_greedy(igraph1))%>%as.numeric()
cor.tempr=cor.r[order(rownames(cor.r),decreasing = FALSE),];cor.tempr=cor.tempr[,order(colnames(cor.tempr),decreasing = FALSE)]
#calculate zipi
communities <- cluster_fast_greedy(igraph1)
comm_membership <- igraph::membership(communities)
node_degree <- igraph::degree(igraph1)
module_mean <- tapply(node_degree, comm_membership, mean)
module_sd <- tapply(node_degree, comm_membership, sd)
Zi <- ifelse(module_sd[comm_membership] != 0,
(node_degree - module_mean[comm_membership]) / module_sd[comm_membership],0)
module_connections <- matrix(0, nrow=length(V(igraph1)), ncol=max(comm_membership))
for (v in V(igraph1)) {
neighbors_of_v <- neighbors(igraph1, v)
modules_of_neighbors <- comm_membership[neighbors_of_v]
module_connections[v, ] <- table(factor(modules_of_neighbors, levels=1:max(comm_membership)))
}
Pi <- 1 - rowSums((module_connections / node_degree)^2)
zi_pi8 <- data.frame(
name = V(igraph1)$name,
Zi = Zi,
Pi = Pi
)
#merging
output=data.frame(nodes_id = V(igraph1)$name, node_degree = V(igraph1)$degree, node_betw=betweenness(igraph1),
node_evcent=evcent(igraph1,scale = FALSE)$vector,Clustering_coefficient=transitivity(igraph1,type="local"),
No.module = V(igraph1)$modularity,Zi=zi_pi8$Zi,Pi=zi_pi8$Pi)
output=left_join(output,data.frame(nodes_id=taxonomy[,1],taxonomy)) %>% suppressMessages()
output=output[,-c(9)]
Groupfile=eval(parse(text=paste0("taxobj","$","Groupfile")))
config=list(input0,taxonomy,Groupfile,taxlevel,n,threshold,taxobj$configuration)
names(config)=c("input_data","input_taxonomy","Groupfile","taxlevel","n","threshold","taxobj_configuration")
outlist=c(list(output),list(adjacency),list(adj_matrix),list(igraph1),list(config))
names(outlist)=c("Nodes_info","Adjacency_column_table","Adjacency_matrix","Igraph_object","config")
return(outlist)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.