Nothing
## ----set-options, echo=FALSE, cache=FALSE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
options(width = 400)
knitr::opts_chunk$set(warnings = FALSE, message = FALSE)
## ---- eval = FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# # if (!requireNamespace("BiocManager", quietly=TRUE))
# # install.packages("BiocManager")
# # BiocManager::install("preciseTAD")
# devtools::install_github("dozmorovlab/preciseTAD")
# # Alternatively
# # devtools::install_github("stilianoudakis/preciseTAD")
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(knitr)
library(e1071)
library(preciseTAD)
## ---- eval=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# arrowhead -c 1 \ #chromosome to call TADs on
# -r 5000 \ #HiC data resolution
# ~/GSE63525_GM12878_insitu_primary.hic \ #location of the .HIC file
# ~/arrowhead_output #location to store the output
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
data("arrowhead_gm12878_5kb")
head(arrowhead_gm12878_5kb)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
bounds <- extractBoundaries(domains.mat = arrowhead_gm12878_5kb, preprocess = FALSE, CHR = c("CHR1", "CHR22"), resolution = 5000)
# View unique boundaries
bounds
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# path <- "./path/to/BEDfiles"
# tfbsList <- bedToGRangesList(filepath=path, bedList=NULL, bedNames=NULL, pattern = "*.bed", signal=4)
data("tfbsList")
names(tfbsList)
tfbsList
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
set.seed(123)
tadData <- createTADdata(bounds.GR = bounds,
resolution = 5000,
genomicElements.GR = tfbsList,
featureType = "distance",
resampling = "rus",
trainCHR = "CHR1",
predictCHR = "CHR22")
# View subset of training data
tadData[[1]][1:5,1:4]
# Check it is balanced
table(tadData[[1]]$y)
# View subset of testing data
tadData[[2]][1:5,1:4]
## ----results='hide'-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
set.seed(123)
rfe_res <- TADrfe(trainData = tadData[[1]],
tuneParams = list(ntree = 500, nodesize = 1),
cvFolds = 5,
cvMetric = "Accuracy",
verbose = TRUE)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# View RFE performances
rfe_res[[1]]
# View the variable importance among top n features across each CV fold
head(rfe_res[[2]])
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Restrict the data matrix to include only SMC3, RAD21, CTCF, and ZNF143
tfbsList_filt <- tfbsList[names(tfbsList) %in% c("Gm12878-Ctcf-Broad",
"Gm12878-Rad21-Haib",
"Gm12878-Smc3-Sydh",
"Gm12878-Znf143-Sydh")]
set.seed(123)
tadData <- createTADdata(bounds.GR = bounds,
resolution = 5000,
genomicElements.GR = tfbsList_filt,
featureType = "distance",
resampling = "rus",
trainCHR = "CHR1",
predictCHR = "CHR22")
# Run RF
set.seed(123)
tadModel <- TADrandomForest(trainData = tadData[[1]],
testData = tadData[[2]],
tuneParams = list(mtry = 2,
ntree = 500,
nodesize = 1),
cvFolds = 3,
cvMetric = "Accuracy",
verbose = FALSE,
model = TRUE,
importances = TRUE,
impMeasure = "MDA",
performances = TRUE)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# View model performances
performances <- tadModel[[3]]
performances$Performance <- round(performances$Performance, digits = 2)
rownames(performances) <- performances$Metric
kable(t(performances), caption = "List of model performances when validating an RF built on CHR1 on CHR22 test data.")
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
svmModel <- svm(y ~ ., data = tadData[[1]], kernel = "radial", cost = 1, gamma = 0.5)
svmPreds <- predict(svmModel, tadData[[2]][, -1], positive = "Yes")
# View confusion matrix
table(svmPreds, tadData[[2]][, 1])
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Run preciseTAD
set.seed(123)
pt <- preciseTAD(genomicElements.GR = tfbsList_filt,
featureType = "distance",
CHR = "CHR22",
chromCoords = list(25500000, 27500000),
tadModel = tadModel[[1]],
threshold = 1.0,
verbose = FALSE,
parallel = NULL,
DBSCAN_params = list(10000, 3),
flank = 5000)
# What is getting returned? PTBRs - precide TAD boundary regions, and PTBPs - points
names(pt)
# View the results
pt
## ---- eval=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# # Transform
# pt_juice <- juicer_func(pt[[2]])
## ---- eval=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# filepath = "~/path/to/store/ptbps"
# write.table(pt_juice,
# file.path(filepath, "pt_juice.bed"),
# quote = FALSE,
# col.names = FALSE,
# row.names = FALSE,
# sep = "\t")
## ---- eval=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# # Read in RF model built on chr1-chr21
# tadModel <- readRDS("~/Downloads/CHR22_GM12878_5kb_Peakachu.rds")
## ---- eval=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# # Establishing Hela-specific genomic annotations
# # Reading in CTCF, RAD21, SMC3, and ZNF143 from repository and storing as list
# ctcf <- as.data.frame(read_delim("http://raw.githubusercontent.com/stilianoudakis/preciseTAD_supplementary/master/data/bed/hela/ctcf.bed", delim = "\t", col_names = FALSE))
# rad21 <- as.data.frame(read_delim("http://raw.githubusercontent.com/stilianoudakis/preciseTAD_supplementary/master/data/bed/hela/rad21.bed", delim = "\t", col_names = FALSE))
# smc3 <- as.data.frame(read_delim("http://raw.githubusercontent.com/stilianoudakis/preciseTAD_supplementary/master/data/bed/hela/smc3.bed", delim = "\t", col_names = FALSE))
# znf143 <- as.data.frame(read_delim("http://raw.githubusercontent.com/stilianoudakis/preciseTAD_supplementary/master/data/bed/hela/znf143.bed", delim = "\t", col_names = FALSE))
# helaList <- list(ctcf, rad21, smc3, znf143)
#
# hela.GR <- bedToGRangesList(bedList=helaList, bedNames=c("Ctcf", "Rad21", "Smc3", "Znf143"), signal=5)
## ---- eval=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# # Run preciseTAD
# set.seed(123)
# pt <- preciseTAD(genomicElements.GR = hela.GR,
# featureType = "distance",
# CHR = "CHR22",
# chromCoords = list(25500000, 27500000),
# tadModel = tadModel[[1]],
# threshold = 1.0,
# verbose = FALSE,
# parallel = NULL,
# DBSCAN_params = list(10000, 3),
# flank = 5000)
#
# pt
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.