Nothing
## ----include = FALSE----------------------------------------------------------
library(rmarkdown)
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
## ----echo = T, results = 'hide',message=FALSE---------------------------------
# Load the comrades-OO Library
#install.packages("comradesOO")
library(comradesOO)
## ----echo = T, results = 'hide',message=FALSE---------------------------------
# Here are the other libraries on which comradesOO relies
library(seqinr)
library(GenomicRanges)
library(ggplot2)
library(reshape2)
library(MASS)
library(ggplot2)
library(doParallel)
library(igraph)
library(R4RNA)
library(RColorBrewer)
library(heatmap3)
library(mixtools)
library(TopDom)
library(tidyverse)
library(RRNA)
library(ggrepel)
## -----------------------------------------------------------------------------
# Set up the sample table
sampleTableRow1 = c(system.file("extdata",
"s1.txt",
package="comradesOO"), "s", "1", "s1")
sampleTableRow2 = c(system.file("extdata",
"c1.txt",
package="comradesOO"), "c", "1", "c1")
sampleTable2 = rbind.data.frame(sampleTableRow1, sampleTableRow2)
# add the column names
colnames(sampleTable2) = c("file", "group", "sample", "sampleName")
sampleTable2
## -----------------------------------------------------------------------------
rna = c("ENSG000000XXXXX_NR003286-2_RN18S1_rRNA")
## -----------------------------------------------------------------------------
path18SFata <- system.file("extdata",
"18S.fasta",
package="comradesOO")
rnaRefs = list()
rnaRefs[[rna]] = read.fasta(path18SFata)
## -----------------------------------------------------------------------------
path18SFata <- system.file("extdata",
"ribovision18S.txt",
package="comradesOO")
known18S = read.table(path18SFata,
header = F)
## -----------------------------------------------------------------------------
pathShape <- system.file("extdata",
"reactivities.txt",
package="comradesOO")
shape = read.table(pathShape,
header = F)
## -----------------------------------------------------------------------------
# load the object
cds = comradesDataSet(rnas = rna,
sampleTable = sampleTable2)
## -----------------------------------------------------------------------------
# Check status of instance
cds
## -----------------------------------------------------------------------------
# Returns the size of the RNA
rnaSize(cds)
## -----------------------------------------------------------------------------
# Returns the sample table
sampleTable(cds)
## -----------------------------------------------------------------------------
# Returns indexes of the samples in the control and not control groups
group(cds)
## -----------------------------------------------------------------------------
# Get the sample names of the instance
sampleNames(cds)
## ----eval=FALSE,echo=TRUE-----------------------------------------------------
# # Return the hybFiles slot
# hybFiles(cds)
## ----eval=FALSE, echo=TRUE----------------------------------------------------
# # Return the matrixList slot
# matrixList(cds)
## -----------------------------------------------------------------------------
data = getData(x = cds, # The object
data = "hybFiles", # The Type of data to return
type = "original")[[1]] # The stage of the analysis for the return data
head(data)
## -----------------------------------------------------------------------------
# Returns the RNAs with highest number of assigned reads
# regardless of wether it is an Inter or Intra - RNA interaction.
topTranscripts(cds, # The comradesDataSet instance
2) # The number of entried to return
## -----------------------------------------------------------------------------
# Returns the RNAs that interact with the RNA of interest
topInteracters(cds, # The comradesDataSet instance
1) # The number of entries to return
## -----------------------------------------------------------------------------
# Returns the Interacions with the highest number of assigned reads
topInteractions(cds, # The comradesDataSet instance
2) # The number of entries to return
## -----------------------------------------------------------------------------
features = featureInfo(cds) # The comradesDataSet instance
# Counts for features at the transcript level
features$transcript
# Counts for features at the family level (last field with "_" delimited IDs)
features$family
## -----------------------------------------------------------------------------
# Cluster the reads
clusteredCds = clusterComrades(cds = cds, # The comradesDataSet instance
cores = 1, # The number of cores
stepCount = 2, # The number of steps in the random walk
clusterCutoff = 1) # The minimum number of reads for a cluster to be considered
## -----------------------------------------------------------------------------
# Check status of instance
clusteredCds
## -----------------------------------------------------------------------------
# Returns the number of clusters in each sample
clusterNumbers(clusteredCds)
## -----------------------------------------------------------------------------
# Returns the number reads in clusters
readNumbers( clusteredCds)
## -----------------------------------------------------------------------------
getData(clusteredCds, # The object
"clusterTableList", # The Type of data to return
"original")[[1]] # The stage of the analysis for the return data
## ----eval=FALSE---------------------------------------------------------------
# getData(clusteredCds, # The object
# "clusterGrangesList", # The Type of data to return
# "original")[[1]] # The stage of the analysis for the return data
## -----------------------------------------------------------------------------
# Trim the Clusters
trimmedClusters = trimClusters(clusteredCds = clusteredCds, # The comradesDataSet instance
trimFactor = 1, # The cutoff for cluster trimming (see above)
clusterCutoff = 0) # The minimum number of reads for a cluster to be considered
## -----------------------------------------------------------------------------
# Check status of instance
trimmedClusters
## -----------------------------------------------------------------------------
# Returns the number of clusters in each sample
clusterNumbers(trimmedClusters)
## -----------------------------------------------------------------------------
# Returns the number reads in clusters
readNumbers( trimmedClusters)
## ----error=FALSE,eval = FALSE, results=FALSE,message=FALSE--------------------
# # Fold the RNA in part of whole
# foldedCds = foldComrades(trimmedClusters,
# rnaRefs = rnaRefs,
# start = 1600,
# end = 1869,
# shape = 0,
# ensembl = 20,
# constraintNumber = 30,
# evCutoff = 5)
## ----eval = FALSE-------------------------------------------------------------
# # Check status of instance
# foldedCds
## -----------------------------------------------------------------------------
# Plot heatmaps for each sample
plotMatrices(cds = cds, # The comradesDataSet instance
type = "original", # The "analysis stage"
directory = 0, # The directory for output (0 for standard out)
a = 1, # Start coord for x-axis
b = rnaSize(cds), # End coord for x-axis
c = 1, # Start coord for y-axis
d = rnaSize(cds), # End coord for y-axis
h = 5) # The hight of the image (if saved)
## -----------------------------------------------------------------------------
trimmedClusters
# Plot heatmaps for all samples combined and all controls combined
plotMatricesAverage(cds = trimmedClusters, # The comradesDataSet instance
type = "trimmedClusters", # The "analysis stage"
directory = 0, # The directory for output (0 for standard out)
a = 1, # Start coord for x-axis
b = rnaSize(cds), # End coord for x-axis
c = 1, # Start coord for y-axis
d = rnaSize(cds), # End coord for y-axis
h = 5) # The hight of the image (if saved)
## ----eval = FALSE-------------------------------------------------------------
# domainDF = data.frame()
# for(j in c(20,30,40,50,60,70)){
# #for(i in which(sampleTable(cds)$group == "s")){
#
# timeMats = as.matrix(getData(x = cds,
# data = "matrixList",
# type = "noHost")[[1]])
#
# timeMats = timeMats/ (sum(timeMats)/1000000)
# tmp = tempfile()
# write.table(timeMats, file = tmp,quote = F,row.names = F, col.names = F)
#
# tdData2 = readHiC(
# file = tmp,
# chr = "rna18s",
# binSize = 10,
# debug = getOption("TopDom.debug", FALSE)
# )
#
# tdData = TopDom(
# tdData2 ,
# window.size = j,
# outFile = NULL,
# statFilter = TRUE,
# debug = getOption("TopDom.debug", FALSE)
# )
#
# td = tdData$domain
# td$sample = sampleTable(cds)$sampleName[1]
# td$window = j
# domainDF = rbind.data.frame(td, domainDF)
#
# }
#
#
#
# ggplot(domainDF) +
# geom_segment(aes(x = from.coord/10,
# xend = to.coord/10, y = as.factor(sub("s","",sample)),
# yend = (as.factor(sub("s","",sample)) ), colour = tag),
# size = 20, alpha = 0.8) +
# facet_grid(window~.)+
# theme_bw()
#
## ----eval = FALSE-------------------------------------------------------------
# plotEnsemblePCA(foldedCds,
# labels = T, # plot labels for structures
# split = T) # split samples over different facets (T/f)
#
## ----eval = FALSE-------------------------------------------------------------
# plotComparisonArc(foldedCds = foldedCds,
# s1 = "c1", # The sample of the 1st structure
# s2 = "s1", # The sample of the 2nd structure
# n1 = 13, # The number of the 1st structure
# n2 = 16) # The number of the 2nd structure
## ----eval = F-----------------------------------------------------------------
# plotStructure(foldedCds = foldedCds,
# rnaRefs = rnaRefs,
# s = "s1", # The sample of the structure
# n = 1) # The number of the structure
## -----------------------------------------------------------------------------
getInteractions(cds,
"ENSG00000XXXXXX_NR003287-2_RN28S1_rRNA") %>%
mutate(sample =sub("\\d$","",sample) )%>%
group_by(rna,Position,sample)%>%
summarise(sum = sum(depth)) %>%
ggplot()+
geom_area(aes(x = Position,
y = sum,
fill = sample),
stat = "identity")+
facet_grid(sample~.) +
theme_bw()
## -----------------------------------------------------------------------------
getReverseInteractions(cds,
rna) %>%
mutate(sample =sub("\\d$","",sample) )%>%
group_by(rna,Position,sample)%>%
summarise(sum = sum(depth)) %>%
ggplot()+
geom_area(aes(x = Position,
y = sum,
fill = sample),
stat = "identity")+
facet_grid(sample~.)+
theme_bw()
## -----------------------------------------------------------------------------
expansionSize = 5
knownMat = matrix(0, nrow = rnaSize(cds), ncol = rnaSize(cds))
for(i in 1:nrow(known18S)){
knownMat[ (known18S$V1[i]-expansionSize):(known18S$V1[i]+expansionSize),
(known18S$V2[i]-expansionSize):(known18S$V2[i]+expansionSize)] =
knownMat[(known18S$V1[i]-expansionSize):(known18S$V1[i]+expansionSize),
(known18S$V2[i]-expansionSize):(known18S$V2[i]+expansionSize)] +1
}
knownMat = knownMat + t(knownMat)
## -----------------------------------------------------------------------------
# use compare known to gett he known and not know clusters
knowClusteredCds = compareKnown(trimmedClusters, # The comradesDataSet instance
knownMat, # A contact matrix of know interactions
"trimmedClusters") # The analysis stage of clustering to compare
knowClusteredCds
## -----------------------------------------------------------------------------
# Plot heatmaps for all samples combined and all controls combined
plotMatricesAverage(cds = knowClusteredCds, # The comradesDataSet instance
type = "KnownAndNovel", # The "analysis stage"
directory = 0, # The directory for output (0 for standard out)
a = 1, # Start coord for x-axis
b = rnaSize(cds), # End coord for x-axis
c = 1, # Start coord for y-axis
d = rnaSize(cds), # End coord for y-axis
h = 5) # The hight of the image (if saved)
## -----------------------------------------------------------------------------
# Get the number of clusters for each analysis Stage
clusterNumbers(knowClusteredCds)
## -----------------------------------------------------------------------------
# Get the number of reads in each cluster for each analysis stage
readNumbers(knowClusteredCds)
## ----eval = FALSE-------------------------------------------------------------
# head(compareKnownStructures(foldedCds,
# known18S)) # the comarison set
## ----eval = FALSE-------------------------------------------------------------
# ggplot(compareKnownStructures(foldedCds, known18S)) +
# geom_hline(yintercept = c(0.5,0.25,0.75,0,1),
# colour = "grey",
# alpha = 0.2)+
# geom_vline(xintercept = c(0.5,0.25,0.75,0,1),
# colour = "grey",
# alpha = 0.2)+
# geom_point(aes(x = sensitivity,
# y = precision,
# size = as.numeric(as.character(unlist(foldedCds@dgs))),
# colour = str_sub(structureID,
# start = 1 ,
# end = 2))) +
# xlim(0,1)+
# ylim(0,1)+
# theme_classic()
#
## ----eval = FALSE-------------------------------------------------------------
# # make an example dataset
# cds = makeExampleComradesDataSet()
# cds
## ----eval = FALSE-------------------------------------------------------------
# # Have a look at the reads for each sample
# plotMatricesAverage(cds = cds,
# type = "original",
# directory = 0,
# a = 1,
# b = rnaSize(cds),
# c = 1,
# d = rnaSize(cds),
# h = 5)
#
## ----eval = FALSE-------------------------------------------------------------
# topInteracters(cds,2)
## ----eval = FALSE-------------------------------------------------------------
# featureInfo(cds)
## ----eval = FALSE-------------------------------------------------------------
# clusteredCds = clusterComrades(cds = cds,
# cores = 1,
# stepCount = 2,
# clusterCutoff = 0)
#
#
#
#
#
#
# trimmedClusters = trimClusters(clusteredCds = clusteredCds,
# trimFactor = 1,
# clusterCutoff = 0)
## ----eval = FALSE-------------------------------------------------------------
# plotMatricesAverage(cds = clusteredCds,
# type = "originalClusters",
# directory = 0,
# a = 1,
# b = rnaSize(cds),
# c = 1,
# d = rnaSize(cds),
# h = 5)
## ----eval = FALSE-------------------------------------------------------------
# plotMatricesAverage(cds = trimmedClusters,
# type = "trimmedClusters",
# directory = 0,
# a = 1,
# b = rnaSize(cds),
# c = 1,
# d = rnaSize(cds),
# h = 5)
#
## ----eval = FALSE-------------------------------------------------------------
#
# fasta = paste(c(rep('A',25),
# rep('T',25),
# rep('A',10),
# rep('T',23)),collapse = "")
#
# header = '>transcript1'
#
#
# fastaFile = tempfile()
# writeLines(paste(header,fasta,sep = "\n"),con = fastaFile)
#
#
# rnaRefs = list()
# rnaRefs[[rnas(cds)]] = read.fasta(fastaFile)
# rnaRefs
#
#
#
# foldedCds = foldComrades(trimmedClusters,
# rnaRefs = rnaRefs,
# start = 1,
# end = 83,
# shape = 0,
# ensembl = 5,
# constraintNumber = 1,
# evCutoff = 1)
#
## ----eval = FALSE-------------------------------------------------------------
# # make an example table of "know" interactions
# file = data.frame(V1 = c(6),
# V2 = c(80))
#
# compareKnownStructures(foldedCds,file)
#
## ----eval = FALSE-------------------------------------------------------------
#
# plotEnsemblePCA(foldedCds, labels = T,split = T)
## ----eval = FALSE-------------------------------------------------------------
# plotComparisonArc(foldedCds = foldedCds,"s1","c1",2,3)
## ----eval = FALSE-------------------------------------------------------------
# plotStructure(foldedCds = foldedCds,"c1",1)
#
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.