knitr::opts_chunk$set( collapse = TRUE, comment = "#>", dpi = 300 )
PCGII
is an R package developed for Information-incorporated Gene Network Construction with FDR Control. PCGII stands for Partial Correlation Graphs with Information Incorporation.
To start using PCGII, prepare your gene expression data and your prior knowledge about gene-gene direct associations in the target network. Two methods were implemented in the package, PCGII for the case when you have prior knowledge and CLEVEL for the case when you do not have prior knowledge. You can select one of two methods to construct the gene network. Incorporating false information could lead to an inflation of the empirical FDR. As a result, we strongly recommend incorporating only high-confidence information in real data applications.
# devtools::install_github("HaoWang47/PCGII") library(PCGII) library(corpcor) library(glmnet) library(igraph) library(Matrix) library(mvtnorm) library(tidyverse) set.seed(010120000)
Three methods of generating a network structure were included in PCGII, including Scale Free Network Structure, Random Connection Network Structure, and Block Diagonal Network Structure. In this section, we will illustrate what are expected during data preparation. We will start with a network with 100 nodes.
## Simulate precision matrix as your true network structure N_nodes = 100 N_samples = 30 Omega = make_sf_precision_mat(e = 1, p = N_nodes)
## Display the true network nodenames = 1:N_nodes links = which(lower.tri(Omega) & Omega!=0, arr.ind = TRUE) dim(links) # display number of connections, two columns correspond to the connected nodes my_net <- graph_from_data_frame(d=links, vertices=nodenames, directed=F) Ecolrs=c("gray50") Vcolrs=c("gold") plot(my_net, edge.arrow.size=.5, edge.color=Ecolrs, vertex.frame.color="#ffffff", vertex.label.color="black", vertex.size=3, layout=layout.auto(my_net))
plot(my_net, edge.arrow.size=.5, edge.color=Ecolrs, vertex.frame.color="#ffffff", vertex.label.color="black", vertex.size=3, layout=layout.circle(my_net))
## Simulate Normal data Sigma = solve(Omega) mu = exp(qnorm(seq(from = 0.01, to = 0.99, length.out = N_nodes), mean = 2, sd=1)) norm_data = mvtnorm::rmvnorm(n = N_samples, mean = mu, sigma = Sigma) ## Convert simulated normal data to expression count data norm_data[norm_data<0] = 0 Exp_data = round(norm_data) head(Exp_data[,1:10])
For illustration, we will randomly select a subset of true connections and use them as prior knowledge. It should be carefully considered if such prior connections are undirected (i.e. A-B OR A->B & B->A) in practice. Under the PCGII framework, it is recommended to use undirected connections as prior information.
prior_links = links[sample(1:nrow(links), .2*nrow(links)),] types = rep(1, 99) types[prior_links] = 2 # 1 for unknown true connections, 2 for known prior connections undirected_prior_links = undirected_prior(prior_links) prior_net = graph_from_data_frame(d=cbind(links,types), vertices=nodenames, directed=F) Ecolrs=c("grey90", "grey10") # dark grey shows known prior connections E(prior_net)$color = Ecolrs[E(prior_net)$types] Vcolrs=c("gold") plot(prior_net, edge.arrow.size=.5, vertex.frame.color="#ffffff", vertex.label.color="black", vertex.size=3, layout=layout.circle(my_net))
fixed_lamdba = sqrt(2*log(N_nodes/sqrt(N_samples))/N_samples) PCGII.out = PCGII(df = Exp_data, prior = undirected_prior_links, lambda = fixed_lamdba) PCGII.inf = inference(PCGII.out, alpha = .1)
PCGII_links = which(lower.tri(PCGII.inf) & PCGII.inf!=0, arr.ind = TRUE) dim(PCGII_links) # display number of connections detected by PCGII PCGII_net <- graph_from_data_frame(d=PCGII_links, vertices=nodenames, directed=F) Ecolrs=c("blue") Vcolrs=c("gold") plot(PCGII_net, edge.arrow.size=.5, edge.color=Ecolrs, vertex.frame.color="#ffffff", vertex.label.color="black", vertex.size=3, layout=layout.circle(PCGII_net))
CLEVEL.out = clevel(df = Exp_data, lambda = fixed_lamdba) CLEVEL.inf = inference(CLEVEL.out, alpha = .1)
CLEVEL_links = which(lower.tri(CLEVEL.inf) & CLEVEL.inf!=0, arr.ind = TRUE) dim(CLEVEL_links) # display number of connections detected by PCGII CLEVEL_net <- graph_from_data_frame(d=CLEVEL_links, vertices=nodenames, directed=F) Ecolrs=c("blue") Vcolrs=c("gold") plot(CLEVEL_net, edge.arrow.size=.5, edge.color=Ecolrs, vertex.frame.color="#ffffff", vertex.label.color="black", vertex.size=3, layout=layout.circle(CLEVEL_net))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.