#' @title ProcessOutputFilesFrom5Cases
#'
#' @description read the mapping results from the Dogs of each gene
#'
#' @param dir.name: the path for input files
#' @param input.file.pattern: input file pattern
#'
#'
#'
#' @return
#' @export
#'
#' @examples
#'
#' dir.name="/media/aiminyan/DATA/Ramin_azhang/Counts5CasesEachSample/"
#' input.file.pattern="*count.2.txt"
#'
#' dir.name.gene.list="/media/H_driver/2016/Ramin_azhang/"
#' pattern.4.gene.list="final_list.csv"
#'
#' out.dir.name="/media/aiminyan/DATA/Ramin_azhang/Counts5CasesEachSample/"
#'
#' out.file.pattern.interested="DoGs_adjust_by_batch_interested_gene"
#' out.file.pattern.positive.gene="DoGs_adjust_by_batch_positive"
#' out.file.pattern.negative.gene="DoGs_adjust_by_batch_negative"
#' out.file.pattern.all= "DoGs_adjust_by_batch_all"
#'
#' Re.unadjusted.adjusted<-ProcessOutputFilesFrom5Cases(dir.name,input.file.pattern,out.dir.name,out.file.pattern.interested,
#' out.file.pattern.positive.gene,
#' out.file.pattern.negative.gene,
#' out.file.pattern.all,
#' dir.name.gene.list,
#' pattern.4.gene.list,
#' adjust_by_batch="YES")
#'
#' save.image(file=paste0(out.dir.name,"re_save_2.RData"))
#' savehistory(file=paste0(out.dir.name,"re_save_2.Rhistory"))
#'
ProcessOutputFilesFrom5Cases<-function(dir.name,input.file.pattern,out.dir.name,out.file.pattern.interested,out.file.pattern.positive.gene,
out.file.pattern.negative.gene,out.file.pattern.all,dir.name.gene.list,pattern.4.gene.list,adjust_by_batch){
file.name=paste0(dir.name,dir(dir.name,recursive = TRUE,pattern=input.file.pattern))
file.name.2<-as.list(file.name)
names(file.name.2)=sapply(strsplit(file.name,split="\\/"),"[[",7)
print(file.name.2)
re.out<-lapply(file.name.2,function(u){
re=read.table(u,header=F)
colnames(re)=c("Count","GeneName")
re
})
temp.name<-strsplit(names(file.name.2),split="\\.")
temp.name.2<-trimws(do.call("rbind",lapply(temp.name,"[[",1)))
temp.name.3<-unique(temp.name.2[,1])
temp.name.4<-as.list(temp.name.3)
names(temp.name.4)<-temp.name.3
txs.name<-lapply(re.out,"[[",2)
txs.name<-unlist(txs.name)
txs.name<-unique(as.character(txs.name))
txs.name.count<-data.frame(Count=rep(0,length(txs.name)),GeneName=txs.name)
#dim(txs.name.count)
Process4EachSample <- function(one_sample,txs.name.count,re.out) {
count.gene.matched<-re.out[(grep(one_sample,names(re.out)))]
count.gene.matched.minus.gene<-count.gene.matched[(grep("minus.gene",names(count.gene.matched)))]
count.gene.matched.plus.gene<-count.gene.matched[(grep("plus.gene",names(count.gene.matched)))]
ReformatCount <- function(count.gene.matched.minus.gene,txs.name.count) {
reformat.count.gene.matched<-lapply(count.gene.matched.minus.gene,function(x,txs.name.count){
y<-txs.name.count
if(dim(x[which(x$GeneName %in% y$GeneName),])[1]!=0)
{
y[match(x$GeneName,y$GeneName),]$Count<-x$Count
}
y
},txs.name.count)
reformat.count.gene.matched.2<-cbind(apply(do.call("cbind",lapply(reformat.count.gene.matched,"[",2)),1,unique),
do.call("cbind",lapply(reformat.count.gene.matched,"[",1)))
colnames(reformat.count.gene.matched.2)=c("GeneName",names(count.gene.matched.minus.gene))
gene<-as.character(reformat.count.gene.matched.2$GeneName)
count.DoGs.plus.read<-apply(reformat.count.gene.matched.2[,c(grep("plus.read.DoGs.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("plus.read.overlap.Gene.and.DoGs.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("plus.read.overlap.Gene.and.DoGs.start.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("plus.read.overlap.Gene.and.DoGs.end.count.2.txt",colnames(reformat.count.gene.matched.2)))],1,sum)
count.DoGs.minus.read<-apply(reformat.count.gene.matched.2[,c(grep("minus.read.DoGs.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("minus.read.overlap.Gene.and.DoGs.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("minus.read.overlap.Gene.and.DoGs.start.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("minus.read.overlap.Gene.and.DoGs.end.count.2.txt",colnames(reformat.count.gene.matched.2)))],1,sum)
count.gene.plus.read<-apply(reformat.count.gene.matched.2[,c(grep("plus.read.gene.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("plus.read.overlap.Gene.and.DoGs.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("plus.read.overlap.Gene.and.DoGs.start.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("plus.read.overlap.Gene.and.DoGs.end.count.2.txt",colnames(reformat.count.gene.matched.2)))],1,sum)
count.gene.minus.read<-apply(reformat.count.gene.matched.2[,c(grep("minus.read.gene.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("minus.read.overlap.Gene.and.DoGs.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("minus.read.overlap.Gene.and.DoGs.start.count.2.txt",colnames(reformat.count.gene.matched.2)),
grep("minus.read.overlap.Gene.and.DoGs.end.count.2.txt",colnames(reformat.count.gene.matched.2)))],1,sum)
reformat.count.gene.matched.3<-as.data.frame(cbind(gene,count.gene.plus.read,count.gene.minus.read,count.DoGs.plus.read,count.DoGs.minus.read))
return(reformat.count.gene.matched.3)
}
re1<-ReformatCount(count.gene.matched.minus.gene,txs.name.count)
re2<-ReformatCount(count.gene.matched.plus.gene,txs.name.count)
re<-rbind(re1,re2)
re<-re[order(re$gene),]
return(re)
}
re.8.samples<-lapply(temp.name.4,function(u,txs.name.count,re.out){
YY<-Process4EachSample(u,txs.name.count,re.out)
YY},txs.name.count,re.out)
DEAnalysis <- function(countData) {
colData <- data.frame(condition=factor(c(rep("Dox",4),rep("WT",4))))
dds <- DESeqDataSetFromMatrix(countData, colData, formula(~ condition))
#size.factor<-estimateSizeFactors(dds)
#print(size.factor)
re.DESeq<-results(DESeq(dds))
re.FC<-cbind(as.data.frame(re.DESeq),2^re.DESeq[,2],counts(dds))
colnames(re.FC)[7]="FoldChange(WT/Dox)"
txs.gene<-ReformatTxsGene()
re.FC<-merge(re.FC,txs.gene$txs_genes_DF_2,by=0)
re.FC.sorted<-re.FC[order(re.FC$pvalue),]
return(re.FC.sorted)
}
ProcessEachCorner <- function(re.8.samples,i){
gene<-apply(do.call("cbind",lapply(re.8.samples,"[",1)),1,unique)
count.gene.plus.read.8.samples<-cbind(gene,do.call("cbind",lapply(re.8.samples,"[",i)))
colnames(count.gene.plus.read.8.samples)=c("gene",names(re.8.samples))
df<-count.gene.plus.read.8.samples
df<-apply(df[,-1],2,as.numeric)
index<-rowSums(df[, -1])>0
dff<-count.gene.plus.read.8.samples[index,]
rownames(dff)<-dff$gene
dff<-dff[,-1]
countData <- apply(dff[,c(1,3,5,6,2,4,7,8)], 2, as.numeric)
rownames(countData)<-rownames(dff)
re.FC<-DEAnalysis(countData)
#re.DoGs.adjusted.by.batch<-DEAnalysisAdjustByBatch(countData)
# re.FC<-cbind(as.data.frame(re.DESeq),2^re.DESeq[,2],counts(dds))
# colnames(re.FC)[7]="FoldChange(WT/Dox)"
#
# txs.gene<-ReformatTxsGene()
#
#re.FC<-list(re.FC=re.FC,re.DoGs.adjusted.by.batch=re.DoGs.adjusted.by.batch)
return(re.FC)
}
re.BL<-ProcessEachCorner(re.8.samples,2) #BL
re.TL<-ProcessEachCorner(re.8.samples,3) #TL
re.BR<-ProcessEachCorner(re.8.samples,4) #BR
re.TR<-ProcessEachCorner(re.8.samples,5) #TR
#head(re.BL)
#head(re.BR)
#head(re.TL)
#head(re.TR)
#re.TR[which(re.TR$Row.names =="uc003tzi.4"),]
#Check for uc003tzi.4:gene(+)
#re.BL[[1]][which(re.BL[[1]]$Row.names =="uc003tzi.4"),] #BL for gene(+)
#re.TL[[1]][which(re.TL[[1]]$Row.names =="uc003tzi.4"),] #TL for gene(+) gene
#re.BR[[1]][which(re.BR[[1]]$Row.names =="uc003tzi.4"),] #BR for gene(+)
#re.TR[[1]][which(re.TR[[1]]$Row.names =="uc003tzi.4"),] #TR for gene(+) DoGs
#Check for uc002szf.1:gene(-)
#re.BL[[1]][which(re.BL[[1]]$Row.names =="uc002szf.1"),] #BR for gene(-)
#re.TL[[1]][which(re.TL[[1]]$Row.names =="uc002szf.1"),] #
#re.BR[[1]][which(re.BR[[1]]$Row.names =="uc002szf.1"),] #
#re.TR[[1]][which(re.TR[[1]]$Row.names =="uc002szf.1"),] #
#Check for uc001aac.4:gene(-)
#re.BL[[1]][which(re.BL[[1]]$Row.names =="uc001aac.4"),]#BR for gene(-) Gene
#re.TL[[1]][which(re.TL[[1]]$Row.names =="uc001aac.4"),]#TR for gene(-)
#re.TR[[1]][which(re.BR[[1]]$Row.names =="uc001aac.4"),]#TL for gene(-)
#re.BR[[1]][which(re.TR[[1]]$Row.names =="uc001aac.4"),]#BL for gene(-) DoGs
#if(adjust_by_batch=="NO"){
re.BL.4.plus.gene.BR.4.minus.gene<-re.BL
re.TL.4.plus.gene.TR.4.minus.gene<-re.TL
re.BR.4.plus.gene.BL.4.minus.gene<-re.BR
re.TR.4.plus.gene.TL.4.minus.gene<-re.TR
#}else{
# re.BL.4.plus.gene.BR.4.minus.gene<-re.BL[[2]]
# re.TL.4.plus.gene.TR.4.minus.gene<-re.TL[[2]]
# re.BR.4.plus.gene.BL.4.minus.gene<-re.BR[[2]]
# re.TR.4.plus.gene.TL.4.minus.gene<-re.TR[[2]]
#}
#Get the counts for DoGs of plus and minus gene
DoGs.4.plus.Gene<-re.TR.4.plus.gene.TL.4.minus.gene[which(re.TR.4.plus.gene.TL.4.minus.gene$strand=="+"),]
DoGs.4.minus.Gene<-re.BR.4.plus.gene.BL.4.minus.gene[which(re.BR.4.plus.gene.BL.4.minus.gene$strand=="-"),]
DoGs.4.plus.minus.Gene<-rbind(DoGs.4.plus.Gene, DoGs.4.minus.Gene)
#DoGs.4.plus.Gene[which(DoGs.4.plus.Gene$Row.names =="uc003tzi.4"),] #TR for gene(+) DoGs
#DoGs.4.minus.Gene[which( DoGs.4.minus.Gene$Row.names =="uc001aac.4"),]#BL for gene(-) DoGs
GeneTypeBasedDE <- function(DoGs.4.plus.Gene) {
Count.DoGs.4.plus.Gene<-DoGs.4.plus.Gene[,c(1,9:16)]
rownames(Count.DoGs.4.plus.Gene)<-Count.DoGs.4.plus.Gene$Row.names
Count.DoGs.4.plus.Gene.2<-Count.DoGs.4.plus.Gene[,-1]
head(Count.DoGs.4.plus.Gene.2)
if(adjust_by_batch=="NO"){
re.DESeq.DoGs.plus.gene<-DEAnalysis(Count.DoGs.4.plus.Gene.2)
}else{
re.DESeq.DoGs.plus.gene<-DEAnalysisAdjustByBatch(Count.DoGs.4.plus.Gene.2)
}
return(re.DESeq.DoGs.plus.gene)
}
re.DESeq.DoGs.plus.gene<-GeneTypeBasedDE(DoGs.4.plus.Gene)
#head(re.DESeq.DoGs.plus.gene[which(re.DESeq.DoGs.plus.gene$log2FoldChange<0&re.DESeq.DoGs.plus.gene$pvalue<0.05),])
re.DESeq.DoGs.minus.gene<-GeneTypeBasedDE(DoGs.4.minus.Gene)
#head(re.DESeq.DoGs.minus.gene[which(re.DESeq.DoGs.minus.gene$log2FoldChange<0&re.DESeq.DoGs.minus.gene$pvalue<0.05),])
re.DESeq.DoGs.plus.minus.gene<-GeneTypeBasedDE(DoGs.4.plus.minus.Gene)
#head(re.DESeq.DoGs.plus.minus.gene[which(re.DESeq.DoGs.plus.minus.gene$log2FoldChange<0&re.DESeq.DoGs.plus.minus.gene$pvalue<0.05),])
gene.interested<-ReadGeneList(dir.name.gene.list,pattern.4.gene.list)
re.DESeq.DoGs.plus.minus.gene.interested<-re.DESeq.DoGs.plus.minus.gene[which(re.DESeq.DoGs.plus.minus.gene$gene %in% gene.interested[,1]),]
#re.DESeq.DoGs.plus.minus.gene.interested[which(re.DESeq.DoGs.plus.minus.gene.interested$gene %in% c("TPCN2")),]
#re.FC.sorted<-re.FC[order(re.FC$pvalue),]
write.csv(re.DESeq.DoGs.plus.gene,file=paste0(out.dir.name,"3UTR_DE_",out.file.pattern.positive.gene,".csv"),row.names = FALSE,quote=FALSE)
write.csv(re.DESeq.DoGs.minus.gene,file=paste0(out.dir.name,"3UTR_DE_",out.file.pattern.negative.gene,".csv"),row.names = FALSE,quote=FALSE)
write.csv(re.DESeq.DoGs.plus.minus.gene,file=paste0(out.dir.name,"3UTR_DE_",out.file.pattern.all,".csv"),row.names = FALSE,quote=FALSE)
#Re.unadjusted.adjusted$DE
#write.csv(Re.unadjusted.adjusted$DE,file=paste0(out.dir.name,"3UTR_DE_",out.file.pattern.all,".csv"),row.names = FALSE,quote=FALSE)
write.csv(re.DESeq.DoGs.plus.minus.gene.interested,file=paste0(out.dir.name,"3UTR_DE_",out.file.pattern.interested,".csv"))
#write.csv(re.DESeq.DoGs.plus.minus.gene,file=paste0(out.dir.name,"3UTR_DE_",out.file.pattern.all,".csv"),row.names = FALSE,quote=FALSE)
#Re.unadjusted.adjusted$DE
#write.csv(Re.unadjusted.adjusted$DE,file=paste0(out.dir.name,"3UTR_DE_",out.file.pattern.all,".csv"),row.names = FALSE,quote=FALSE)
re.out.3<-list(re.out=re.out,
re.8.samples=re.8.samples,
DE_positive_gene=re.DESeq.DoGs.plus.gene,
DE_negative_gene=re.DESeq.DoGs.minus.gene,
DE=re.DESeq.DoGs.plus.minus.gene,
DE_interested= re.DESeq.DoGs.plus.minus.gene.interested)
return(re.out.3)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.