inst/doc/nethet.R

## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()

## ----echo=FALSE, include=FALSE, warning=FALSE------------------------------
# Load package
library(nethet)

set.seed(1)

## ----echo=TRUE, include=TRUE-----------------------------------------------
# Specify number of simulated samples and dimensionality
n = 100
p = 25

# Specify number of components of the mixture model and mixture probabilities
n.comp = 4

mix.prob = c(0.1, 0.4, 0.3, 0.2)

# Specify sparsity in [0,1], indicating fraction of off-diagonal zero entries.
s = 0.9


# Generate networks with random means and covariances. Means will be drawn from 
# a standard Gaussian distribution, non-zero covariance values from a 
# Beta(1,1) distribution.
sim.result = sim_mix_networks(n, p, n.comp, s, mix.prob)

## ----echo=TRUE, include=TRUE, fig.width=5, fig.height=3, fig.align='center'----
print(table(sim.result$comp)/n)

component = as.factor(sim.result$comp)

library('ggplot2')
qplot(x=sim.result$data[,1], y=sim.result$data[,2], 
      colour=component) + 
  xlab('Dimension 1') +
  ylab('Dimension 2')
  

## ----echo=TRUE, include=TRUE, fig.width=5, fig.height=3, fig.align='center'----
# Generate new dataset with the same covariances, but different means
sim.result.new = sim_mix_networks(n, p, n.comp, s, mix.prob, Sig=sim.result$Sig)

component = as.factor(sim.result.new$comp)

qplot(x=sim.result.new$data[,1], y=sim.result.new$data[,2], 
      colour=component) + 
  xlab('Dimension 1') +
  ylab('Dimension 2')

## ----echo=TRUE, include=TRUE, fig.width=7, fig.height=3, fig.align='center'----
## Sample size and number of nodes
n <- 40
p <- 10

## Specify sparse inverse covariance matrices,
## with number of edges in common equal to ~ 0.8*p
gen.net <- generate_2networks(p,graph='random',n.nz=rep(p,2),
                              n.nz.common=ceiling(p*0.8))

invcov1 <- gen.net[[1]]
invcov2 <- gen.net[[2]]

plot_2networks(invcov1,invcov2,label.pos=0,label.cex=0.7)

## ----echo=TRUE, include=TRUE-----------------------------------------------
set.seed(10)

n = 100
p = 25

# Generate networks with random means and covariances. 
sim.result = sim_mix_networks(n, p, n.comp, s, mix.prob)

test.data = sim.result$data
test.labels = sim.result$comp

# Reconstruct networks for each component
networks = het_cv_glasso(data=test.data, grouping=test.labels)

## ----echo=TRUE, include=TRUE, fig.width=5, fig.height=3, fig.align='center'----
# Component labels for covariance values
components = as.factor(rep(1:n.comp, each=p^2))

qplot(x=c(networks$Sig), y=c(sim.result$Sig),
			colour=components) + 
  xlab('Reconstructed Covariances') +
  ylab('True Covariances')
  

## ----eval=TRUE, echo=TRUE, include=TRUE, warning=TRUE, results="asis"------
## Set seed
set.seed(1)
 
## Sample size and number of nodes
p <- 30
 
## Specify sparse inverse covariance matrices
gen.net <- generate_2networks(p,graph='random',n.nz=rep(p,2),                               
                             n.nz.common=ceiling(p*0.8))
invcov1 <- gen.net[[1]]
invcov2 <- gen.net[[2]] 

## Get corresponding correlation matrices
cor1 <- cov2cor(solve(invcov1))
cor2 <- cov2cor(solve(invcov2))

## ----echo=TRUE, include=TRUE, warning=TRUE, results="asis"-----------------
## Generate data under null hypothesis
library(mvtnorm) # To generate multivariate Gaussian random samples

## Sample size
n <- 70

x1 <- rmvnorm(n,mean = rep(0,dim(cor1)[1]), sigma = cor1)
x2 <- rmvnorm(n,mean = rep(0,dim(cor1)[1]), sigma = cor1)

## ----echo=TRUE, include=TRUE, warning=TRUE, results="asis"-----------------
## Run diffnet (under null hypothesis)
dn.null <- diffnet_multisplit(x1,x2,b.splits=1,verbose=FALSE)

## ----echo=TRUE, include=TRUE, warning=TRUE, results="asis"-----------------

## Generate data under alternative hypothesis (datasets have different networks)
x1 <- rmvnorm(n,mean = rep(0,dim(cor1)[1]), sigma = cor1)
x2 <- rmvnorm(n,mean = rep(0,dim(cor1)[2]), sigma = cor2)

## Run diffnet (under alternative)
dn.altn <- diffnet_multisplit(x1,x2,b.splits=1,verbose=FALSE)

## ----eval=TRUE, echo=TRUE, include=TRUE, warning=TRUE, fig.width=3.5, fig.height=3.5, fig.align='center'----
## Typically we would choose a larger number of splits
# Use parallel library (only available under Unix) for computational efficiency
if(.Platform$OS.type == "unix") {
	dn.altn <- diffnet_multisplit(x1,x2,b.splits=50,verbose=FALSE,mc.flag=TRUE)
} else {
  dn.altn <- diffnet_multisplit(x1,x2,b.splits=25,verbose=FALSE,mc.flag=FALSE)
}

par(cex=0.7)
plot(dn.altn, cex=0.5) # histogram over 50 p-values
cat('p-value:',dn.altn$medagg.pval,'\n') # median aggregated p-value

## ----echo=TRUE, include=TRUE, warning=TRUE, results="asis", fig.width=5, fig.height=2.5, fig.align='center'----
## Generate new networks
set.seed(1)
p <- 9 # network with p nodes
n <- 40
hub.net <- generate_2networks(p,graph='hub',n.hub=3,n.hub.diff=1)#generate hub networks
invcov1 <- hub.net[[1]]
invcov2 <- hub.net[[2]]
plot_2networks(invcov1,invcov2,label.pos=0,label.cex=0.7,
               main=c('network 1', 'network 2'),cex.main=0.7)

## Generate data
library('mvtnorm')
x1 <- rmvnorm(n,mean = rep(0,p), sigma = cov2cor(solve(invcov1)))
x2 <- rmvnorm(n,mean = rep(0,p), sigma = cov2cor(solve(invcov2)))

## ----eval=TRUE, echo=TRUE, include=TRUE, warning=TRUE, results="asis"------
## Identify groups with 'gene-sets'
gene.names <- paste('G',1:p,sep='')
gsets <- split(gene.names,rep(1:3,each=3))

## ----eval=TRUE, echo=TRUE, include=TRUE, warning=TRUE, results="asis"------
## Run GGM-GSA
fit.ggmgsa <- ggmgsa_multisplit(x1,x2,b.splits=1,gsets,gene.names,verbose=FALSE)

library(xtable)
print(xtable(summary(fit.ggmgsa),digits=6))

## ----echo=TRUE, include=TRUE, warning=TRUE, results="asis"-----------------
## Number of predictors and sample size
p <- 100
n <- 80

## Predictor matrices
x1 <- matrix(rnorm(n*p),n,p)
x2 <- matrix(rnorm(n*p),n,p)

## Active-sets and regression coefficients
act1 <- sample(1:p,5)
act2 <- c(act1[1:3],sample(setdiff(1:p,act1),2))
beta1 <- beta2 <- rep(0,p)
beta1[act1] <- 0.7
beta2[act2] <- 0.7

## ----echo=TRUE, include=TRUE, warning=TRUE, results="asis", fig.align='center', fig.width=3.5,fig.height=3.5----
## Response vectors under null-hypothesis
y1 <- x1%*%as.matrix(beta1)+rnorm(n,sd=1)
y2 <- x2%*%as.matrix(beta1)+rnorm(n,sd=1)

## Differential regression; b.splits=10
fit.null <- diffregr_multisplit(y1,y2,x1,x2,b.splits=10)
par(cex=0.7)
plot(fit.null,cex=0.5) # histogram of p-values from b.split data splits
cat('p-value: ',fit.null$medagg.pval,'\n') # median aggregated p-value

## ----echo=TRUE, include=TRUE, warning=TRUE, results="asis",fig.align='center',fig.width=3.5,fig.height=3.5----
## Response vectors under alternative-hypothesis
y1 <- x1%*%as.matrix(beta1)+rnorm(n,sd=1)
y2 <- x2%*%as.matrix(beta2)+rnorm(n,sd=1)

## Differential regression (asymptotic p-values)
fit.alt <- diffregr_multisplit(y1,y2,x1,x2,b.splits=10)
par(cex=0.7)
plot(fit.alt)
cat('p-value: ',fit.alt$medagg.pval,'\n')

## ----echo=TRUE, include=TRUE, warning=TRUE, results="asis",fig.align='center',fig.width=3.5,fig.height=3.5----
## Differential regression (permutation-based p-values; 100 permutations)
fit.alt.perm <- diffregr_multisplit(y1,y2,x1,x2,b.splits=5,n.perm=100)

## ----echo=TRUE, include=TRUE-----------------------------------------------
# Generate networks with random means and covariances. 
n = 1000
p = 10
s = 0.9
n.comp = 3

# Create different mean vectors
Mu = matrix(0,p,n.comp)

# Define non-zero means in each group (non-overlapping)
nonzero.mean = split(sample(1:p),rep(1:n.comp,length=p))

# Set non-zero means to fixed value
for(k in 1:n.comp){
  Mu[nonzero.mean[[k]],k] = -2/sqrt(ceiling(p/n.comp))
}

# Generate data
sim.result = sim_mix_networks(n, p, n.comp, s, Mu=Mu)

## ----echo=TRUE, include=TRUE-----------------------------------------------
# Run mixglasso
mixglasso.result = mixglasso(sim.result$data, n.comp=3)

# Calculate adjusted rand index to judge how accurate the clustering is
# Values > 0.7 indicate good agreement.
library(mclust, quietly=TRUE)
adj.rand = adjustedRandIndex(mixglasso.result$comp, sim.result$comp)
cat('Adjusted Rand Index', round(adj.rand, digits=2), '\n')

## ----echo=FALSE, include=TRUE, cache=FALSE, results='asis'-----------------
crosstab <- function(class1,class2){
  tab <- matrix(NA,length(levels(class1)),length(levels(class2)))
  colnames(tab) <- levels(class2)
  rownames(tab) <- levels(class1)
  for (i in levels(class1)){
    tab[i,] <- table(class2[which(class1==i)])
  }
  return(tab)
}

# Relabel true groupings to avoid confusion
sim.grouping = c('A', 'B', 'C')[sim.result$comp]

cross.table = crosstab(factor(mixglasso.result$comp), factor(sim.grouping))

# Generate table
library(xtable)
latex.table = xtable(cross.table, caption=
										 	paste('Cross-tabulation of mixglasso clusters (rows) with',
                    'true group assignments (columns).'),
										 label='table:crosstab')

print(latex.table)

## ----echo=TRUE, include=TRUE, fig.width=5, fig.height=3, fig.align='center'----

# Run mixglasso over a range of numbers of components
mixglasso.result = mixglasso(sim.result$data, n.comp=1:6)

# Repeat with lambda=0 and lambda=Inf for comparison
mixglasso.result.0 = mixglasso(sim.result$data, n.comp=1:6, lambda=0)
mixglasso.result.Inf = mixglasso(sim.result$data, n.comp=1:6, lambda=Inf)

# Aggregate BIC results for plotting
BIC.vals = c(mixglasso.result$bic, mixglasso.result.0$bic,
						 mixglasso.result.Inf$bic)

lambda.labels = rep(c('Default', 'Lambda = 0', 'Lambda = Inf'), each=6)

# Plot to verify that minimum BIC value corresponds with true 
library(ggplot2)
plotting.frame <- data.frame(BIC=BIC.vals, Num.Comps=rep(1:6, 3), Lambda=lambda.labels)

p <- ggplot(plotting.frame) + 
	geom_line(aes(x=Num.Comps, y=BIC, colour=Lambda)) + 
	geom_vline(xintercept=3, linetype='dotted')

print(p)

## ----echo=TRUE, include=TRUE, fig.width=3, fig.height=3, fig.align='center'----

# Retrieve best clustering and networks by BIC
mixglasso.clustering = mixglasso.result$models[[mixglasso.result$bic.opt]]

# Plot networks, omitting edges with absolute partial correlation < 0.5 in 
# every group.
# NOTE: Not displayed.
# plot(mixglasso.clustering, p.corrs.thresh=0.5)

## ----echo=TRUE, include=TRUE, fig.width=5, fig.height=3, fig.align='center'----

# Plot edges, omitting those with absolute partial correlation < 0.5 in every 
# group.
g = dot_plot(mixglasso.clustering, p.corrs.thresh=0.5, dot.size.range=c(1,5))

## ----echo=TRUE, include=TRUE, fig.width=7, fig.height=7, fig.align='center'----

# Specify edges
node.pairs = rbind(c(9,10), c(2,5),c(4,9))

# Create scatter plots of specified edges
g = scatter_plot(mixglasso.clustering, data=sim.result$data,
						node.pairs=node.pairs, cex=0.5)

## ----echo=TRUE, include=TRUE, eval=FALSE-----------------------------------
#  
#  # Save network in CSV format, omitting edges with absolute partial correlation
#  # less than 0.25.
#  #export_network(mixglasso.clustering, file='nethet_network.csv',
#  #							 p.corrs.thresh=0.25)

## ----echo=TRUE, include=TRUE, eval=FALSE-----------------------------------
#  
#  # Save network in CSV format suitable for Cytoscape import
#  #export_network(mixglasso.clustering, file='nethet_network.csv',
#  #							 p.corrs.thresh=0.25, quote=FALSE)

## ----echo=TRUE, include=TRUE, eval=TRUE------------------------------------
sessionInfo()

Try the nethet package in your browser

Any scripts or data that you put into this service are public.

nethet documentation built on Nov. 8, 2020, 6:54 p.m.