inst/doc/SpectralTAD.R

## ----set-options, echo=FALSE, cache=FALSE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
options(width = 400)

## ---- eval = FALSE, message=FALSE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # if (!requireNamespace("BiocManager", quietly=TRUE))
#  #     install.packages("BiocManager")
#  # BiocManager::install("SpectralTAD")
#  devtools::install_github("cresswellkg/SpectralTAD")
#  library(SpectralTAD)

## ---- echo=FALSE, message=FALSE, warning=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(SpectralTAD)

## ---- echo = FALSE, warning = FALSE, message = FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
data("rao_chr20_25_rep")
rao_chr20_25_rep = HiCcompare::sparse2full(rao_chr20_25_rep)
row.names(rao_chr20_25_rep) = colnames(rao_chr20_25_rep) = format(as.numeric(row.names(rao_chr20_25_rep)), scientific = FALSE)
rao_chr20_25_rep[1:5, 1:5]

## ---- echo = FALSE, warning = FALSE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
row.names(rao_chr20_25_rep) = NULL
sub_mat = cbind.data.frame("chr19", as.numeric(colnames(rao_chr20_25_rep)), as.numeric(colnames(rao_chr20_25_rep))+25000, rao_chr20_25_rep)[1:10, 1:10]
colnames(sub_mat) = NULL

sub_mat

## ---- echo = FALSE, warning = FALSE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
head(HiCcompare::full2sparse(rao_chr20_25_rep), 5)

## ---- eval = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  #Read in data
#  cool_mat = read.table("Rao.GM12878.50kb.txt")
#  #Convert to sparse 3-column matrix using cooler2sparse from HiCcompare
#  sparse_mats = HiCcompare::cooler2sparse(cool_mat)
#  #Remove empty matrices if necessary
#  #sparse_mats = sparse_mats$cis[sapply(sparse_mats, nrow) != 0]
#  #Run SpectralTAD
#  spec_tads = lapply(names(sparse_mats), function(x) {
#    SpectralTAD(sparse_mats[[x]], chr = x)
#  })
#  

## ---- eval = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  #Read in both files
#  mat = read.table("amyg_100000.matrix")
#  bed = read.table("amyg_100000_abs.bed")
#  #Convert to modified bed format
#  sparse_mats = HiCcompare::hicpro2bedpe(mat,bed)
#  #Remove empty matrices if necessary
#  #sparse_mats$cis = sparse_mats$cis[sapply(sparse_mats, nrow) != 0]
#  #Go through all matrices
#  sparse_tads = lapply(sparse_mats$cis, function(x) {
#    #Pull out chromosome
#    chr = x[,1][1]
#    #Subset to make three column matrix
#    x = x[,c(2,5,7)]
#    #Run SpectralTAD
#    SpectralTAD(x, chr=chr)
#  })

## ---- message = FALSE, warning = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#Get the rao contact matrix built into the package
data("rao_chr20_25_rep")
head(rao_chr20_25_rep)
#We see that this is a sparse 3-column contact matrix
#Running the algorithm with resolution specified
results = SpectralTAD(rao_chr20_25_rep, chr = "chr20", resolution = 25000, qual_filter = FALSE, z_clust = FALSE)
#Printing the top 5 TADs
head(results$Level_1, 5)
#Repeating without specifying resolution
no_res = SpectralTAD(rao_chr20_25_rep, chr = "chr20", qual_filter = FALSE, z_clust = FALSE)
#We can see below that resolution can be estimated automatically if necessary
identical(results, no_res)

## ---- message = FALSE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#Running SpectralTAD with silhouette score filtering
qual_filt = SpectralTAD(rao_chr20_25_rep, chr = "chr20", qual_filter = TRUE, z_clust = FALSE, resolution = 25000)
#Showing quality filtered results
head(qual_filt$Level_1,5)
#Quality filtering generally has different dimensions
dim(qual_filt$Level_1)
dim(results$Level_1)

## ---- message = FALSE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
z_filt = SpectralTAD(rao_chr20_25_rep, chr = "chr20", qual_filter = FALSE, z_clust = TRUE, resolution = 25000)
head(z_filt$Level_1, 5)
dim(z_filt$Level_1)

## ---- message = FALSE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#Running SpectralTAD with 3 levels and no quality filtering
spec_hier = SpectralTAD(rao_chr20_25_rep, chr = "chr20", resolution = 25000, qual_filter = FALSE, levels = 3)
#Level 1 remains unchanged
head(spec_hier$Level_1,5)
#Level 2 contains the sub-TADs for level 1
head(spec_hier$Level_2,5)
#Level 3 contains even finer sub-TADs for level 1 and level 2
head(spec_hier$Level_3,5)


## ---- eval = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  #Creating replicates of our HiC data for demonstration
#  cont_list = replicate(3,rao_chr20_25_rep, simplify = FALSE)
#  #Creating a vector of chromosomes
#  chr_over = c("chr20", "chr20", "chr20")
#  #Creating a list of labels
#  labels = c("Replicate 1", "Replicate 2", "Replicate 3")
#  SpectralTAD_Par(cont_list = cont_list, chr_over = chr_over, labels = labels, cores = 3)
#  

## ---- message = FALSE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(microbenchmark)
#Converting to nxn
n_n = HiCcompare::sparse2full(rao_chr20_25_rep)
#Converting to nxn+3
n_n_3 = cbind.data.frame("chr20", as.numeric(colnames(n_n)), as.numeric(colnames(n_n))+25000, n_n)
#Defining each function
sparse = SpectralTAD(cont_mat = rao_chr20_25_rep, chr = "chr20", qual_filter = FALSE)
n_by_n = SpectralTAD(cont_mat = n_n, chr = "chr20", qual_filter = FALSE)
n_by_n_3 =SpectralTAD(cont_mat = n_n_3, chr = "chr20", qual_filter = FALSE)

#Benchmarking different parameters
microbenchmark(sparse = SpectralTAD(cont_mat = rao_chr20_25_rep, chr = "chr20", qual_filter = FALSE),
n_by_n = SpectralTAD(cont_mat = n_n, chr = "chr20", qual_filter = FALSE),
n_by_n_3 =SpectralTAD(cont_mat = n_n_3, chr = "chr20", qual_filter = FALSE), unit = "s", times = 3)

## ---- message = FALSE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
microbenchmark(quality_filter = SpectralTAD(cont_mat = n_n, chr = "chr20", qual_filter = TRUE, z_clust = FALSE), no_filter = SpectralTAD(cont_mat = n_n, chr = "chr20", qual_filter = FALSE, z_clust = FALSE), z_clust = SpectralTAD(cont_mat = n_n, chr = "chr20", qual_filter = FALSE, z_clust = TRUE), times = 3, unit = "s")

## ---- eval = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  #Get contact matrix
#  data("rao_chr20_25_rep")
#  head(rao_chr20_25_rep)
#  #Run spectral TAD with output format "hicexplorer" or "bed" and specify the path
#  spec_hier = SpectralTAD(rao_chr20_25_rep, chr = "chr20", resolution = 25000, qual_filter = FALSE, levels = 3, out_format = "hicexplorer", out_path = "chr20.bed")
#  

## ---- eval = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  #Get contact matrix
#  data("rao_chr20_25_rep")
#  head(rao_chr20_25_rep)
#  #Run spectral TAD with output format "hicexplorer" or "bed" and specify the path
#  spec_hier = SpectralTAD(rao_chr20_25_rep, chr = "chr20", resolution = 25000, qual_filter = FALSE, levels = 3, out_format = "juicebox", out_path = "chr20.bedpe")

Try the SpectralTAD package in your browser

Any scripts or data that you put into this service are public.

SpectralTAD documentation built on Nov. 8, 2020, 8:29 p.m.