## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
fig.width = 6,
fig.height = 5.5,
collapse = TRUE,
warning = FALSE,
comment = "#>"
)
## ----load_data----------------------------------------------------------------
library(movAPA)
# load demo peaks generated by scAPAtrap,
# which contains a list(peaks.meta, peaks.count)
data("demo_peaks")
peaks=demo_peaks
names(peaks)
head(peaks$peaks.meta)
head(peaks$peaks.count[, 1:10])
## ----create_PACds-------------------------------------------------------------
PACds=createPACdataset(counts=peaks$peaks.count, anno=peaks$peaks.meta)
PACds
rm(peaks)
## -----------------------------------------------------------------------------
summary(PACds)
## ----data_filtering-----------------------------------------------------------
PACds=subsetPACds(PACds, totPACtag = 10, minExprConds = 10, verbose=TRUE)
summary(PACds)
## ----load_bsgenome, warning=FALSE, message=FALSE------------------------------
library(BSgenome.Hsapiens.UCSC.hg38, quietly = TRUE, verbose = FALSE)
bsgenome<-BSgenome.Hsapiens.UCSC.hg38
head(GenomeInfoDb::seqnames(bsgenome))
head(unique(PACds@anno$chr))
## ----remove_IP----------------------------------------------------------------
PACdsIP=removePACdsIP(PACds, bsgenome, returnBoth=TRUE,
up=-10, dn=10, conA=6, sepA=7)
length(PACdsIP$real)
length(PACdsIP$ip)
# summary of IPs and real PAs
summary(PACdsIP$real)
summary(PACdsIP$ip)
## -----------------------------------------------------------------------------
# use real PA for further analysis
PACds=PACdsIP$real
## ----load_humanRefPA, eval=FALSE----------------------------------------------
# annodb=read.table("Human_hg38.PolyA_DB.bed", sep="\t", header = FALSE)
# head(annodb)
# annodb=annodb[,c(1,2,6)]
# colnames(annodb)=c("chr","coord","strand")
# annodb=readPACds(annodb, colDataFile = NULL)
## ----ovpIP, eval=FALSE--------------------------------------------------------
# PACdsIP$ip@anno$UPA_start=PACdsIP$ip@anno$start
# PACdsIP$ip@anno$UPA_end=PACdsIP$ip@anno$end
#
# PACdsIP$real@anno$UPA_start=PACdsIP$real@anno$start
# PACdsIP$real@anno$UPA_end=PACdsIP$real@anno$end
#
# ## find overlapping IPs
# IP.ovp=findOvpPACds(qryPACds=PACdsIP$ip, sbjPACds=annodb,
# d=50,
# qryMode='region', sbjMode='point',
# returnNonOvp=TRUE)
## ----combine_PA, eval=FALSE---------------------------------------------------
# anno=rbind(PACdsIP$real@anno, IP.ovp$ovp@anno)
# count=rbind(PACdsIP$real@counts, IP.ovp$ovp@counts)
# PACds=createPACdataset(counts=count, anno=anno)
# summary(PACds)
## ----annotatePAC, warning=FALSE, message=FALSE--------------------------------
library(TxDb.Hsapiens.UCSC.hg38.knownGene, quietly = TRUE, verbose = FALSE)
txdb=TxDb.Hsapiens.UCSC.hg38.knownGene
# annotate PAs
PACds=annotatePAC(PACds, txdb)
# after annotation, the gene and ftr information are present in PACds@anno.
summary(PACds)
## ----testExt3UTR--------------------------------------------------------------
testExt3UTR(PACds, seq(1000, 10000, by=1000))
## ----utr_len, warning=FALSE---------------------------------------------------
utrid=which(PACds@anno$ftr=='3UTR')
utrs=unique(PACds@anno[utrid, c('ftr_end','ftr_start')])
summary(utrs$ftr_end-utrs$ftr_start+1)
## ----extend3UTR, warning=FALSE------------------------------------------------
# extend 3UTR by 1000bp
PACds=ext3UTRPACds(PACds, 1000)
# after 3UTR extension
summary(PACds)
## ----utr_filter, warning=FALSE------------------------------------------------
PACds=PACds[PACds@anno$ftr=='3UTR']
summary(PACds)
## ----base_comp----------------------------------------------------------------
faFiles=faFromPACds(PACds, bsgenome, what='updn', fapre='updn',
up=-300, dn=100, byGrp='ftr')
## ----fig.dim=c(6,4)-----------------------------------------------------------
plotATCGforFAfile(faFiles, ofreq=FALSE, opdf=FALSE,
refPos=301, mergePlots = TRUE)
## -----------------------------------------------------------------------------
v=getVarGrams('mm')
priority=c(1,2,rep(3, length(v)-2))
## ----polyA_signal-------------------------------------------------------------
PACdsMM=annotateByPAS(PACds, bsgenome, grams=v,
priority=priority,
from=-50, to=-1, label='mm')
table(PACdsMM@anno$mm_gram)
## percent of PA without PAS
noPAS=sum(is.na(PACdsMM@anno$mm_gram))
noPAS/length(PACdsMM)
## ----seqlogo, message=FALSE, warning=FALSE------------------------------------
pas=PACdsMM@anno$mm_gram[!is.na(PACdsMM@anno$mm_gram)]
plotSeqLogo(pas)
## ----PACdsList----------------------------------------------------------------
## the two PACds for merging
PACdsList=list(ds1=PACds, ds2=PACds)
## ----merge_samples------------------------------------------------------------
pacds.merge=mergePACds(PACdsList, d=24, by='coord')
# summary of PACds#1
summary(PACds)
# summary of the merged PACds
summary(pacds.merge)
head(pacds.merge@counts[, 1:10])
head(pacds.merge@anno)
## ----load_RefPA---------------------------------------------------------------
annodb=read.table("./Human_hg38.PolyA_DB.bed", sep="\t", header = FALSE)
head(annodb)
annodb=annodb[,c(1,2,6)]
colnames(annodb)=c("chr","coord","strand")
annodb=readPACds(annodb, colDataFile = NULL)
## ----build_ref----------------------------------------------------------------
## we can build a reference PACds with human known PA and the given PACds list
## or we can use annodb only
refPA=buildRefPACdsAnno(refPACds=annodb, PACdsList=PACdsList,
by='coord', d=24,
min.counts = 50, min.smps=10, max.width=500,
verbose=TRUE)
## ----merge_samples_ref--------------------------------------------------------
pacds.merge=mergePACds(PACdsList, d=24, by='coord', refPACds=refPA)
# summary of PACds#1
summary(PACds)
# summary of the merged PACds
summary(pacds.merge)
head(pacds.merge@counts[, 1:10])
head(pacds.merge@anno)
## ----annotate_merged_smps, eval=F---------------------------------------------
# # annotate PAs
# pacds.merge=annotatePAC(pacds.merge, txdb)
#
# # after annotation, the gene and ftr information are present in PACds@anno.
# summary(pacds.merge)
## ----APA_dist-----------------------------------------------------------------
s=getNearbyPAdist(PACds)
## ----smartRUD_index-----------------------------------------------------------
# get proximal and distal PA for each gene
pd=get3UTRAPApd(pacds=PACds, minDist=50, maxDist=5000, minRatio=0.05,
fixDistal=FALSE, addCols='pd')
# get smartRUD index
rud=movAPAindex(pd, method="smartRUD", sRUD.oweight=TRUE, clearPAT=1)
head(rud$rud[, 1:5])
head(rud$weight[, 1:5])
## we can also calculate the RUD index
## rud1=movAPAindex(pd, method="RUD")
## ----vizAPA-------------------------------------------------------------------
library(vizAPA, quietly = TRUE)
# to choose one gene, first we get the total counts of each PA
PACds@anno$TN=rowSums(PACds@counts)
pd@anno$TN=rowSums(pd@counts)
# construct the genome annotation object
annoSource=new("annoHub")
annoSource=addAnno(annoSource, txdb)
gene=pd@anno$gene[1]
vizTracks(gene=gene,
PACds.list=list(pA=PACds, pd=pd),
PA.show=c("pos","TN"),
annoSource=annoSource,
PA.columns="coord", PA.width=10,
space5=1000, space3=1000)
## ----gene_id------------------------------------------------------------------
library(Homo.sapiens)
orgdb=Homo.sapiens
genes=getAnnoGenes(orgdb)
head(genes)
genes[genes$gene_entrezid==gene, ]
genes[genes$gene_entrezid==10003, ]
## -----------------------------------------------------------------------------
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.