# Need the knitr package to set chunk options knitr::opts_chunk$set(cache=TRUE, autodep=TRUE, cache.comments=FALSE, warning=FALSE,message=FALSE,cache.lazy=FALSE)
library("devtools"); library("roxygen2"); package_path = "~/Dropbox/Research/hyochoi_GitHub/" setwd(package_path) # create("MiSCseq") # Save functions # Add documentation setwd("./Scissors") document() setwd("..") install("Scissors")
package_path = "~/Dropbox/Research/hyochoi_GitHub/" library("Scissors")
# Other functions CoveragePath = "~/Dropbox/Research/Data/TCGA_extended_exon/" dataset = "~/Dropbox/Research/Hayes-lab/NDA-2014/dataset/" Rcode = "~/Dropbox/Research/Hayes-lab/NDA-2014/code/" # source(paste0(Rcode,"graphic_code_v1.R"));
load(paste0(package_path,"Scissors/toydata/toygene_coverage.RData")) GeneName = toygene_name rawdata = toygene_coverage # Raw counts data exon = toygene_exon # Exon info.
cat(paste(" GeneName =",GeneName," coverage ready"),"\n") cat(paste(" ",exon),"\n") cat(paste(" # Subjects =",ncol(rawdata)),"\n")
annotation0 = annotate_pileup(pileup=rawdata,exon=exon,input.type="whole_intron", output.type="whole_intron",intron.len=NULL) exonset0 = annotation0$dai$epm data0 = annotation0$out.pileup RNAcurve(datmat=data0,exonset=exonset0,indlist=NULL, plot.title="Gene TOY: raw coverage")
data.annotation = annotate_pileup(pileup=rawdata,exon=exon, input.type="whole_intron",output.type="part_intron",intron.len=NULL) data1 = data.annotation$out.pileup dai = data.annotation$dai exonset = dai$epm exonset dai$intron.len
x.mda <- svd(data1) ; projmat <- diag(x.mda$d)%*%t(x.mda$v) ; projmat[1,] <- -projmat[1,] ; colmat <- rainbow(n=length(projmat[1,]), start=0, end=0.756)[rank(-projmat[1,])] RNAcurve(datmat=data1,exonset=exonset,indlist=NULL, plot.title="data 1",colmat=colmat)
There are 8 outliers in the toy dataset:
par(mfrow=c(4,2),mar=c(2,2,2,2)) for (case in 1:8) { RNAcurve(datmat=data1,exonset=exonset,indlist=case, lwd=2,same.yaxis=TRUE, plot.title=paste("Subject",case),colmat=colmat) }
data.log = logtransform_data(data=data1) data2 = data.log$outdata logshift.val = data.log$logshift.val logshift.val
RNAcurve(datmat=data2,exonset=exonset,indlist=NULL, plot.title="log-transformed",colmat=colmat)
par(mfrow=c(4,2),mar=c(2,2,2,2)) for (case in 1:8) { RNAcurve(datmat=data2,exonset=exonset,indlist=case,lwd=2, plot.title=paste("Subject",case),colmat=colmat) }
data.centered = center_data(data=data2,type=1) data3 = data.centered$outdata msf = data.centered$msf data.center = data.centered$data.center
RNAcurve(datmat=data3,exonset=exonset,indlist=NULL, plot.title="Centered",colmat=colmat)
g = estimate_offset(data.centered=data.centered,rawmat=data.annotation$out.pileup,exonset=exonset, smoothness=0.7,draw.plot=T,main=GeneName) data4 = sweep(x=data.centered$outdata,2,g,FUN="/") RNAcurve(datmat=data4,exonset=exonset,indlist=NULL, plot.title="Normalized",colmat=colmat)
data.normalized = normalize_data(datalog=data2,rawmat=data.annotation$out.pileup, exonset=exonset,draw.plot=T,main=GeneName) msf = data.normalized$msf g = data.normalized$g data4 = data.normalized$outdata
data.weighted = weigh_data(data=data4,dai=dai,method="mean") data5 = data.weighted$outdata w = data.weighted$w plot(w,type="l",main="Weights on each base position")
data.process = process_data(rawdata=rawdata,exon=exon, input.type="whole_intron",output.type="part_intron",intron.len=NULL) dataI = data.process$dataI # raw data with new annotation datalog = data.process$datalog # log-transformed data dataS = data.process$dataS # normalized data
set.seed(0); step1out = miscGlobal(data=dataS,siglev=1e-5, PCnum=NULL,eps=NULL, rm.PCdir=TRUE,ADcutoff=10,filter.dir=TRUE,less.return=FALSE) outliers1 = step1out$outliers
cat(paste0(" # PCs = ",length(step1out$PCsubset)),"\n") cat(paste0(" # step 1 outliers = ",length(outliers1)),"\n")
appmat=dataS-step1out$resmat RNAcurve(datmat=appmat,exonset=exonset,indlist=1:8,lwd=2, plot.title=paste("Low-rank pproximation"),colmat=colmat) kdeplot.hy(step1out$OS,indlist=outliers1,main="Step 1", text=TRUE,high=0.9,low=0.1) legend("topright",bty="n", legend=c(paste("# detected =",length(outliers1))))
Step 1 outliers:
par(mfrow=c(4,2),mar=c(2,2,2,2)) for (case in outliers1) { RNAcurve(datmat=datalog,exonset=exonset,indlist=case,lwd=2, plot.title=paste("Subject",case),colmat=colmat) }
step2out = miscLocal(miscGlobalobj=step1out,covermat=dataI,exonset=exonset, siglev=1e-5,cutoff=NULL, winlength=100,readconstr=10) outliers2 = step2out$outliers
cat(paste0(" # step 2 outliers = ",length(outliers2)),"\n")
RNAcurve(datmat=step1out$resmat,exonset=exonset,indlist=1:8,lwd=2, plot.title=paste("Residual"),colmat=colmat) out2res = step2out$out2res par(mar=c(2,2,2,2)) kdeplot.hy(out2res$onoff_stat,indlist=out2res$outliers,main="Step 2", text=TRUE,textwhat=outliers2, high=0.9,low=0.1) abline(v=out2res$cutoff) legend("topright",bty="n", legend=c(paste("# detected =",length(outliers2))))
par(mfrow=c(1,2),mar=c(2,2,2,2)) for (case in outliers2) { RNAcurve(datmat=datalog,exonset=exonset,indlist=case,lwd=2, plot.title=paste("Subject",case),colmat=colmat) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.