Realignment Summary

\
Output is saved at r paste0(output.dir,"/04_realign") \

Table of alignment

\

realign.df=report.summary$realign

realign.df=realign.df[,1:13]
realign.df[,-1]=apply(realign.df[,-1],2,as.numeric)

realign.perc.df=realign.df[,c(1:4,6,8,10,12)]
realign.perc.df[,-1]=apply(realign.perc.df[,-1],2, function(a) format(a/1000,big.mark = ","))

Mapped.Reads.R1.pct=specify_decimal(realign.df$Mapped.reads.R1.pct,3)
Mapped.Reads.R2.pct=specify_decimal(realign.df$Mapped.reads.R2.pct,3)
Paired.Mapped.pct=specify_decimal(realign.df$Paired.Mapped.pct,3)
Unmapped.reads.R1.pct=specify_decimal(realign.df$Unmapped.reads.R1.pct,3)
Unmapped.reads.R2.pct=specify_decimal(realign.df$Unmapped.reads.R2.pct,3)

realign.perc.df[,4]=paste(realign.perc.df[,4],paste0('(',Mapped.Reads.R1.pct,'%)'),sep = '\n')
realign.perc.df[,5]=paste(realign.perc.df[,5],paste0('(',Mapped.Reads.R2.pct,'%)'),sep = '\n')
realign.perc.df[,6]=paste(realign.perc.df[,6],paste0('(',Paired.Mapped.pct,'%)'),sep = '\n')
realign.perc.df[,7]=paste(realign.perc.df[,7],paste0('(',Unmapped.reads.R1.pct,'%)'),sep = '\n')
realign.perc.df[,8]=paste(realign.perc.df[,8],paste0('(',Unmapped.reads.R2.pct,'%)'),sep = '\n')

colnames(realign.perc.df)[1]='Sample\nName'
colnames(realign.perc.df)[2]='Total\nReads'
colnames(realign.perc.df)[3]='PF\nReads'
colnames(realign.perc.df)[4]='Mapped\nReads\nR1'
colnames(realign.perc.df)[5]='Mapped\nReads\nR2'
colnames(realign.perc.df)[6]='Paired\nMapped\nReads'
colnames(realign.perc.df)[7]='Unmapped\nReads\nR1'
colnames(realign.perc.df)[8]='Unmapped\nReads\nR2'

pander(realign.perc.df, justify = "center", style = "multiline", keep.line.breaks = TRUE,missing = '.',caption = "\t\t\t\t\t\t\t(In Thousand) Summary table of Alignment\n")
cat("Sample Name : Sample name provided by user","Total Reads : Total number of read filtered out in the remove duplicates process","PF reads : The number of PF reads where PF is defined as passing Illumina's filter","Mapped Reads R1 : Total number of aligned read 1","Mapped Reads R2 :  Total number of aligned read 2","Paired-mapped Reads : Total number of read aligned to the pair","Unmapped Reads R1 : Total number of unmapped read 1 (Unmapped Reads 1= Total Reads 1 - Mapped Reads 1) ","Unmapped Reads R2 : Total number of unmapped read 2 (Unmapped Reads 2= Total Reads 2 - Mapped Reads 2)", sep = "\n")

\newpage

Result plot of realignment

realign.perc=realign.df[,c(1,5,7,11,13)]
colnames(realign.perc)[1]="Sample.Name"
stack.df=melt(realign.perc, id.vars = "Sample.Name")
stack.df[3]=as.numeric(stack.df[,3])*100
stack.df$treat=sapply(as.character(stack.df$variable), function(a) strsplit(a, split="\\.")[[1]][3])
stack.df$state=sapply(as.character(stack.df$variable), function(a) strsplit(a, split="\\.R")[[1]][1])

if(length(unique(stack.df[,1]))>10) coord_flip=coord_flip() else coord_flip=NULL
if(length(unique(stack.df[,1]))<10) opts=theme(axis.text.x=element_text(angle=90 , hjust = 1)) else opts=NULL

realign.plot=ggplot(stack.df, aes(x = Sample.Name, y = value, fill = state)) + geom_bar(stat = 'identity', position = 'stack', width=0.5) +coord_flip +
  scale_fill_manual(values=c("goldenrod1", "lightpink2"),labels=c("Mapped reads","Unmapped reads"))+
  theme(legend.position="top", axis.text.x = element_text(face="bold"), axis.text.y = element_text(face="bold"))+
  xlab("Sample Name") + opts + labs(fill = "") + ylab("Proportion(%)")+ facet_wrap(~treat)

plot.stack.df=stack.df[rev(rownames(stack.df)),]
plot.stack.df$Sample.Name=as.factor(plot.stack.df$Sample.Name)
realign.plot + ggtitle("Mapping proportion in each sample") + theme(plot.title = element_text(hjust = 0.5))+scale_x_discrete(limits = rev(levels(plot.stack.df$Sample.Name)))

\newpage



omicsCore/SEQprocess documentation built on May 7, 2020, 4:18 a.m.