R/ChipSeq.R

#' AnnotatePeak2
#'
#' @param input.file.dir 
#' @param input.file.pattern 
#' @param index.file 
#' @param output.file.dir 
#' @param genome 
#'
#' @return
#' @export
#'
#' @examples
#' 
#' input.file.dir="/media/H_driver/2016/Danny/Danny_chip_PeakCall/"
#' input.file.pattern="*macs142_peaks.bed"
#' output.file.dir="/media/H_driver/2016/Danny/Danny_chip_PeakCall/"
#'
#' AnnotatePeak(input.file.dir,input.file.pattern,8,output.file.dir,genome="Mm")
#' AnnotatePeak(input.file.dir,input.file.pattern,7,output.file.dir,genome="Hs")
#' 
#' 
#' 
AnnotatePeak2 <- function(input.file.dir,input.file.pattern,index.file,output.file.dir,genome) {
  
  library(ChIPpeakAnno)
  
  dir.name=input.file.dir
  input.file.pattern=input.file.pattern
  
  dir.name=reformatPath(dir.name)
  output.dir.name=reformatPath(output.file.dir)
  
  #print(output.dir.name)
  #temp=Sys.time()
  #temp1=gsub(":","-",Sys.time())
  #temp2=gsub(" ","-",temp1)
  #temp3=paste0(output.dir.name,"AnalysisResults_at_",temp2)
  
  temp3=output.dir.name
  
  dir.create(temp3)
  
  file.name=paste0(dir.name,dir(dir.name,recursive = TRUE,pattern=input.file.pattern))
  file.name.2<-as.list(file.name)
  
  names(file.name.2)=sapply(strsplit(file.name,split="\\/"),"[[",index.file)
  
  print(file.name.2)
  
  file.name.3<-file.name.2
  
  #sample.name<-sapply(strsplit(names(file.name.3),split="_peaks_"),"[[",1)
  
  #names(file.name.3)=sample.name
  
  file.name.4 <-file.name.3
  
  re.out<-lapply(file.name.4,function(u){
    re=toGRanges(u,format="BED")
    #colnames(re)=c("Count","GeneName")
    re
  })
  
  head(re.out[[1]])
  
  re.out.L<-lapply(re.out,function(u){
    re=length(u)
    #colnames(re)=c("Count","GeneName")
    re
  })
  
  if(genome=="Mm"){
    
    annoData <- toGRanges(EnsDb.Mmusculus.v75, feature="gene")
    
    ol <- findOverlapsOfPeaks(re.out[c(2,4,1)])
    
    overlaps<-ol$peaklist$`11_2470IUPUI_WT_BM_SMC1_peaks.bed///13_2470IUPUI_WT_BM_Rad21_peaks.bed///10_WT_BM_ASXL1_peaks.bed`
    
    binOverFeature(overlaps, annotationData=annoData,
                   radius=5000, nbins=20, FUN=length, errFun=0,
                   ylab="count",
                   main="Distribution of aggregated peak numbers around TSS")
    
    overlaps.trimmed<-trim(overlaps, use.names=TRUE)
    
    library(EnsDb.Mmusculus.v79)
    dd.GRCm39.mm10<-toGRanges(EnsDb.Mmusculus.v75)
    #seqinfo(dd.GRCm39.mm10)
    #seqlevels(dd.GRCm39.mm10)
    
    seqlevels(dd.GRCm39.mm10,force=TRUE) <- c("chr1","chr10","chr11","chr12","chr13",
                                              "chr14","chr15","chr16","chr17","chr18","chr19","chr2",
                                              "chr3","chr4","chr5","chr6","chr7","chr8","chr9","chrX","chrY")
    
    seqinfo(overlaps)<-seqinfo(dd.GRCm39.mm10)
    #GRCm38/mm10
    #dd<-toGRanges(EnsDb.Mmusculus.v79)
    #seqinfo(dd)
    #library(ensembldb)
    #library(GenomeInfoDb)
    seqlevelsStyle(overlaps.trimmed) <- seqlevelsStyle(dd.GRCm39.mm10)
    overlaps.anno<-annoPeaks(overlaps.trimmed,dd.GRCm39.mm10)
    write.table(overlaps.anno,file=paste0(temp3,"/","annotation.txt"),row.names = FALSE,quote=FALSE,sep="\t")
  }else if(genome=="Hs"){
    
    library(EnsDb.Hsapiens.v75)
    #annoData<-toGRanges(EnsDb.Hsapiens.v75, feature="gene")
    
    dd.hs<-toGRanges(EnsDb.Hsapiens.v75)
    
    print(seqinfo(dd.hs))
    print(seqlevels(dd.hs))
    
    #print(seqlevels(dd.hs)[,1])
    
    #print(seqlevels(re.out[[1]])[,1])
    
    # seqlevels(dd.hs,force=TRUE) <- c("chr1","chr10","chr11","chr12","chr13",
    #                                           "chr14","chr15","chr16","chr17","chr18","chr19","chr2",
    #                                           "chr3","chr4","chr5","chr6","chr7","chr8","chr9","chrX","chrY")
    
    #temp4=
    
    re.out.L<-lapply(1:length(re.out),function(u,re.out,dd.hs){
      
      x=re.out[[u]]
      x_name=names(re.out)[u]
      
      seqlevels(dd.hs,force=TRUE)<-seqinfo(x)@seqnames
      #print(seqinfo(re.out.trimmed))
      #print(seqlevels(re.out.trimmed))
      seqinfo(x)<-seqinfo(dd.hs)
      #GRCm38/mm10
      #dd<-toGRanges(EnsDb.Mmusculus.v79)
      #seqinfo(dd)
      #library(ensembldb)
      #library(GenomeInfoDb)
      seqlevelsStyle(x) <- seqlevelsStyle(dd.hs)
      re.out.trimmed<-trim(x, use.names=TRUE)
      overlaps.anno<-annoPeaks(re.out.trimmed,dd.hs)
      
      write.table(overlaps.anno,file=paste0(temp3,"/",x_name,"_annotation.txt"),row.names = FALSE,quote=FALSE,sep="\t")
    },re.out,dd.hs)
    
  }
  
}

#' AnnotatePeak3
#'
#' @param input.file.dir 
#' @param output.file.dir 
#' @param genome 
#'
#' @return
#' @export
#'
#' @examples
#' 
#' input.file.dir="/media/H_driver/2016/Danny/Danny_chip_PeakCall/"
#' output.file.dir="/media/H_driver/2016/Danny/Danny_chip_PeakCall/"
#'
#' AnnotatePeak3(input.file.dir,output.file.dir,genome="Mm")
#' AnnotatePeak3(input.file.dir,output.file.dir,genome="Hs")
#' 
#' 
#' 
AnnotatePeak3<- function(input.file.dir,output.file.dir,genome) {
  
  library(ChIPpeakAnno)
  
  re<-ParserReadFiles(input.file.dir,"bed",output.file.dir)
  
  #print(file.name.2)
  
  file.name.2<-re$input
  output.dir.name=output.file.dir
  
  #dir.name=input.file.dir
  #input.file.pattern=input.file.pattern
  
  #dir.name=reformatPath(dir.name)
  
  #output.dir.name=reformatPath(output.file.dir)
  
  #print(output.dir.name)
  #temp=Sys.time()
  #temp1=gsub(":","-",Sys.time())
  #temp2=gsub(" ","-",temp1)
  #temp3=paste0(output.dir.name,"AnalysisResults_at_",temp2)
  
  temp3=output.dir.name
  
  dir.create(temp3)
  
  #file.name=paste0(dir.name,dir(dir.name,recursive = TRUE,pattern=input.file.pattern))
  #file.name.2<-as.list(file.name)
  
  #names(file.name.2)=sapply(strsplit(file.name,split="\\/"),"[[",index.file)
  
  #print(file.name.2)
  
  file.name.3<-file.name.2
  
  #sample.name<-sapply(strsplit(names(file.name.3),split="_peaks_"),"[[",1)
  
  #names(file.name.3)=sample.name
  
  file.name.4 <-file.name.3
  
  re.out<-lapply(file.name.4,function(u){
    re=toGRanges(u,format="BED")
    #colnames(re)=c("Count","GeneName")
    re
  })
  
  head(re.out[[1]])
  
  re.out.L<-lapply(re.out,function(u){
    re=length(u)
    #colnames(re)=c("Count","GeneName")
    re
  })
  
  if(genome=="Mm"){
    
    annoData <- toGRanges(EnsDb.Mmusculus.v75, feature="gene")
    
    ol <- findOverlapsOfPeaks(re.out[c(2,4,1)])
    
    overlaps<-ol$peaklist$`11_2470IUPUI_WT_BM_SMC1_peaks.bed///13_2470IUPUI_WT_BM_Rad21_peaks.bed///10_WT_BM_ASXL1_peaks.bed`
    
    binOverFeature(overlaps, annotationData=annoData,
                   radius=5000, nbins=20, FUN=length, errFun=0,
                   ylab="count",
                   main="Distribution of aggregated peak numbers around TSS")
    
    overlaps.trimmed<-trim(overlaps, use.names=TRUE)
    
    library(EnsDb.Mmusculus.v79)
    dd.GRCm39.mm10<-toGRanges(EnsDb.Mmusculus.v75)
    #seqinfo(dd.GRCm39.mm10)
    #seqlevels(dd.GRCm39.mm10)
    
    seqlevels(dd.GRCm39.mm10,force=TRUE) <- c("chr1","chr10","chr11","chr12","chr13",
                                              "chr14","chr15","chr16","chr17","chr18","chr19","chr2",
                                              "chr3","chr4","chr5","chr6","chr7","chr8","chr9","chrX","chrY")
    
    seqinfo(overlaps)<-seqinfo(dd.GRCm39.mm10)
    #GRCm38/mm10
    #dd<-toGRanges(EnsDb.Mmusculus.v79)
    #seqinfo(dd)
    #library(ensembldb)
    #library(GenomeInfoDb)
    seqlevelsStyle(overlaps.trimmed) <- seqlevelsStyle(dd.GRCm39.mm10)
    overlaps.anno<-annoPeaks(overlaps.trimmed,dd.GRCm39.mm10)
    write.table(overlaps.anno,file=paste0(temp3,"/","annotation.txt"),row.names = FALSE,quote=FALSE,sep="\t")
  }else if(genome=="Hs"){
    
    library(EnsDb.Hsapiens.v75)
    #annoData<-toGRanges(EnsDb.Hsapiens.v75, feature="gene")
    
    dd.hs<-toGRanges(EnsDb.Hsapiens.v75)
    
    print(seqinfo(dd.hs))
    print(seqlevels(dd.hs))
    
    #print(seqlevels(dd.hs)[,1])
    
    #print(seqlevels(re.out[[1]])[,1])
    
    # seqlevels(dd.hs,force=TRUE) <- c("chr1","chr10","chr11","chr12","chr13",
    #                                           "chr14","chr15","chr16","chr17","chr18","chr19","chr2",
    #                                           "chr3","chr4","chr5","chr6","chr7","chr8","chr9","chrX","chrY")
    
    #temp4=
    
    re.out.L<-lapply(1:length(re.out),function(u,re.out,dd.hs){
      
      x=re.out[[u]]
      x_name=names(re.out)[u]
      
      seqlevels(dd.hs,force=TRUE)<-seqinfo(x)@seqnames
      #print(seqinfo(re.out.trimmed))
      #print(seqlevels(re.out.trimmed))
      seqinfo(x)<-seqinfo(dd.hs)
      #GRCm38/mm10
      #dd<-toGRanges(EnsDb.Mmusculus.v79)
      #seqinfo(dd)
      #library(ensembldb)
      #library(GenomeInfoDb)
      seqlevelsStyle(x) <- seqlevelsStyle(dd.hs)
      re.out.trimmed<-trim(x, use.names=TRUE)
      overlaps.anno<-annoPeaks(re.out.trimmed,dd.hs)
      
      write.table(overlaps.anno,file=paste0(temp3,"/",x_name,"_annotation.txt"),row.names = FALSE,quote=FALSE,sep="\t")
    },re.out,dd.hs)
    
  }
  
}

#' AnntationUsingChipSeeker
#' 
#' Use ChipSeeker to annotate data
#'
#' @param dir.name: path for bed files
#' @param input.file.pattern : input file pattern
#' @param out.dir.name output file directory
#' @param txdb Annotation databased to be used("hg19","hg38")
#' @param DD distance definition around TSS
#' 
#' @return
#' @export
#'
#' @examples
#' 
#' dir.name="/Volumes/Bioinformatics$/2017/DannyNewData/BindDiff/common_peaks_bed"
#' input.file.pattern="common_peaks.bed"
#' out.dir.name="/Volumes/Bioinformatics$/2017/DannyNewData/AnnotationNew4DBA"
#' txdb="hg19"
#' DD=5000
#' 
#' AnntationUsingChipSeeker(dir.name,input.file.pattern,out.dir.name,txdb=txdb,DD,distanceToTSS_cutoff=5000, AP=c("Promoter","Intron"))
#'
#' res.promoter <- AnntationUsingChipSeeker(dir.name,input.file.pattern,out.dir.name,txdb=txdb,DD,distanceToTSS_cutoff=5000,AP=c("Promoter"))
#' 
#' AnntationUsingChipSeeker(dir.name,input.file.pattern,out.dir.name,txdb=txdb,DD,distanceToTSS_cutoff=5000,AP=c("Intron"))
#'   
AnntationUsingChipSeeker <- function(dir.name,input.file.pattern,out.dir.name,txdb=c("hg19","hg38"),DD,distanceToTSS_cutoff=5000,assignGenomicAnnotation=TRUE,AP=c("Promoter", "5UTR", "3UTR", "Exon", "Intron","Downstream", "Intergenic")) {
  
  #re<-ParserReadFiles(dir.name,input.file.pattern)
  
  #re.bed<-re$input
  
  #re.peaks.only.bed.2 <- re.bed
  
  file.1 <- list.files(dir.name,pattern=input.file.pattern,all.files = TRUE,full.names = TRUE,recursive = TRUE,include.dirs = TRUE)
  
  #re.bed<-re$input
  
  re.peaks.only.bed.2 <- file.1
  
  # if(length(dir(dir.name,pattern="peaks.bed"))!=0)
  # {
  # re.peaks.only.bed.2<-FL(re.bed,'peaks')
  # cat("peaks\n")
  # print(re.peaks.only.bed.2)
  # }
  # 
  # if(length(dir(dir.name,pattern="summits.bed"))!=0){
  # re.summits.only.bed<-FL(re.bed,'summits')
  # cat("summits\n")
  # print(re.summits.only.bed)
  # }
  
  txdb<-match.arg(txdb)
  
  switch (txdb,
          hg38 = {
            cat("Use hg38\n")
            txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
          },
          {
            cat("Use hg19\n") 
            txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
          }
  )
  
  APpath <- paste(AP,collapse = "_")
  
  temp3=file.path(out.dir.name,"Annotation",APpath)
  
  if(!dir.exists(temp3)){dir.create(temp3,recursive = TRUE)}
  
  d=DD
  res <- lapply(1:length(re.peaks.only.bed.2),function(u,re.peaks.only.bed.2,d){
    
    peaks=readPeakFile(re.peaks.only.bed.2[[u]],as="data.frame")
    
    print(head(peaks))
    
    peakAnno <- annotatePeak(re.peaks.only.bed.2[[u]], tssRegion=c(-d, d),
                             TxDb=txdb,assignGenomicAnnotation=assignGenomicAnnotation,genomicAnnotationPriority=AP,annoDb="org.Hs.eg.db")
    
    #select.index <- which(peakAnno$distanceToTSS<20,000 && peakAnno$distanceToTSS >= -20,000)
    dropAnnoM <- function (csAnno, distanceToTSS_cutoff) 
    {
      idx <- which(abs(mcols(csAnno@anno)[["distanceToTSS"]]) < 
                     distanceToTSS_cutoff)
      csAnno@anno <- csAnno@anno[idx]
      csAnno@peakNum <- length(idx)
      if (csAnno@hasGenomicAnnotation) {
        csAnno@annoStat <- ChIPseeker:::getGenomicAnnoStat(csAnno@anno)
        csAnno@detailGenomicAnnotation = csAnno@detailGenomicAnnotation[idx, 
                                                                        ]
      }
      csAnno
    }
    
    peakAnno <- dropAnnoM(peakAnno,distanceToTSS_cutoff = distanceToTSS_cutoff)
    
    x_name=names(re.peaks.only.bed.2)[u]
    cat(x_name)
    png(file.path(temp3,paste0(x_name,"_",d,APpath,"_around_tss_annotation_pie.png")))
    plotAnnoPie(peakAnno)
    dev.off()
    
    peaks.anno=as.data.frame(peakAnno)
    
    print(head(peaks.anno))
    
    #print(paste0(peaks[,c(2,3)]))
    print(colnames(peaks.anno))
    write.table(peaks.anno,file=file.path(temp3,paste0(x_name,"_",d,APpath,"_around_tss_annotation_4_only_mapped_peaks.xls")),
                row.names = FALSE,quote=FALSE,sep="\t")
    
    unmapped.peaks<-peaks[-which(paste0(peaks[,2],"_",peaks[,3]) %in% paste0(peaks.anno[,2],"_",peaks.anno[,3])),]
    
    cat(dim(peaks)[1]," ",dim(peaks.anno)[1]," ",dim(unmapped.peaks)[1],"\n")
    
    
    if(dim(unmapped.peaks)[1]!=0){
      
      colnames(unmapped.peaks)=colnames(peaks.anno)[1:6]
      
      unmapped.peaks.3<-smartbind(peaks.anno,unmapped.peaks)
      
      unmapped.peaks.4<-unmapped.peaks.3[order(unmapped.peaks.3[,1],unmapped.peaks.3[,2]),]
      
      write.table(unmapped.peaks.4,file=file.path(temp3,paste0(x_name,"_",d,APpath,"_around_tss_annotation_4_all_peaks.xls")),row.names = FALSE,quote=FALSE,sep="\t")
    }
    
    re <- list(peaksMappedOnly=peaks.anno,peaksAll=unmapped.peaks.4)
    re
  },re.peaks.only.bed.2,d)
  
  res
}

#' BamFileSortIndexVisualization 
#'
#' @param input.file.dir 
#' @param output.file.dir 
#' @param genome 
#'
#' @return
#' @export
#'
#' @examples
#' 
#' input.file.dir="/projects/scratch/bbc/Project/Danny_chip/Alignment/BWA/"
#' output.file.dir="/scratch/projects/bbc/aiminy_project/"
#' genome="Hs"
#' d=4000
#' BamFileSortIndexVisualization(input.file.dir,output.file.dir,4000,genome)
#' 
#'bsub -P bbc -J "zhaoCJun" -o %J.zhaoCJun.log -e %J.zhaoCJun.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::BamFileSortIndexVisualization("/projects/scratch/bbc/Project/Danny_chip/Alignment/BWA","/scratch/projects/bbc/aiminy_project/7_27_2017",400000,"Hs")'
#'
#' 
BamFileSortIndexVisualization <- function(input.file.dir,input.pattern,output.file.dir,d,genome,bam.sort=FALSE) {
  
  #library(ChIPpeakAnno)
  
  #re<-ParserReadFiles(input.file.dir,"bam")

  file.1 <- list.files(input.file.dir,pattern=input.pattern, all.files = TRUE,full.names = TRUE,recursive = TRUE,include.dirs = TRUE)
  
  file.name.2<-file.1
  #output.dir.name=re$output
  
  #temp3=paste0(output.dir.name,"_visualization")
  
  temp3=output.file.dir
  
  if (!dir.exists(temp3))
  {
    dir.create(temp3, recursive = TRUE)
  }
    
  #dir.create(temp3)
  
  re.out<-file.name.2
  
  if(bam.sort==FALSE){
  
  cmd1="samtools sort"
  
  lapply(1:length(re.out),function(u,re.out,temp3){
    
    x=re.out[[u]]
    x_name=tools::file_path_sans_ext(basename(x))
    cmd2=paste0(cmd1," ",x," ","-o"," ",file.path(temp3,paste0(x_name,"_sorted.bam")))
    
    print(cmd2)
    system(cmd2)
    
  },re.out,temp3)
  
  cmd3="samtools index"
  
  lapply(1:length(re.out),function(u,re.out,temp3){
    
    x=re.out[[u]]
    x_name=tools::file_path_sans_ext(basename(x))
    cmd4=paste0(cmd3," ",file.path(temp3,paste0(x_name,"_sorted.bam")))
    
    #print(cmd2)
    system(cmd4)
    
  },re.out,temp3)
}
  
  cmd5="ngs.plot.r -G hg19 -R tss -C"
  cmd6="-O"
  #cmd7="-L 4000"
  cmd7=paste("-L",d,sep=" ")
  
  #cmd3="-L 4000 -RR 1 -CD 1 -CO \\\"blue\\\""
  
  #ngs.plot.r -G hg19 -R tss -C $1 -O $2 -L 4000 -RR 1 -CD 1 -CO "blue"
  
  #file.name.3<-file.name.2[-6]
  
  lapply(1:length(re.out),function(u,re.out,temp3){
    
    x=re.out[[u]]
    x_name=tools::file_path_sans_ext(basename(x))
    
    cmd8=paste(cmd5,file.path(temp3,paste0(x_name,"_sorted.bam")),cmd6,file.path(temp3,x_name),cmd7,sep=" ")
    
    print(cmd8)
    system(cmd8, intern = TRUE, ignore.stderr = TRUE)
    
    #re=read.table(u,header=FALSE)
    #  re<-as.character(re[,1])
    #  #colnames(re)=c("Count","GeneName")
    #  re
  },re.out,temp3)
  
  
}

convertBam2StrandBw2 <- function(input.bam.file.dir, output.bw.file.dir, BigMem = FALSE,
                                 cores = 15, Memory = 25000, Wall.time = "72:00", span.ptile = 8)
{
  re <- parserreadfiles(input.bam.file.dir, "bam")
  
  res <- re$input
  
  m.id <- grep("login", system("hostname", intern = TRUE))
  
  if (!dir.exists(output.bw.file.dir))
  {
    dir.create(output.bw.file.dir, recursive = TRUE)
  }
  
  # job.name=paste0('bamSort[',length(res),']')
  
  cmd.l <- lapply(1:length(res), function(u, m.id, Wall.time, cores, Memory,
                                          span.ptile, res, output.bw.file.dir)
  {
    # path_name = dirname(res[[u]]) path_name2 <- basename(path_name)
    
    file_name = file_path_sans_ext(basename(res[[u]]))
    
    # file_name <- paste0(path_name2,'-',file_name)
    #u <- 3
    if (m.id == 1)
    {
      if (BigMem == TRUE)
      {
        cmd0 = paste(Wall.time, "-n", cores, "-q bigmem -R 'rusage[mem=",
                     Memory, "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu",
                     sep = " ")
      } else
      {
        cmd0 = paste(Wall.time, "-n", cores, "-q general -R 'rusage[mem=",
                     Memory, "] span[ptile=", span.ptile, "]' aimin.yan@med.miami.edu",
                     sep = " ")
      }
      
      job.name = paste0("bam2wig.", u)
      cmd1 = paste0("bsub -P bbc -J \"", job.name, paste0("\" -o %J.",
                                                          job.name, ".log "), paste0("-e %J.", job.name, ".err -W"))
      
      # job.name=paste0('bamSort[',length(res),']') cmd1 = paste0('bsub -w
      # \'done(\'bamSort[*]\')\'', 'bsub -P bbc -J \'',job.name,paste0('\'
      # -o %J.log '),paste0('-e %J.err -W')) job.name=paste0('Bdg[',u,']') cmd1 =
      # paste0('bsub -w \'done(\'bamIndex[*]\') && done(\'Chrosome\')\'',
      # 'bsub -P bbc -J \'',job.name,paste0('\' -o %J.',job.name,'.log
      # '),paste0('-e %J.',job.name,'.err -W'))
      if (u <= 6)
      {
        cmd2 = paste("bam2wig.pl -pe --pos span --strand --bw  --bwapp $HOME/kentUtils/bin/wigToBigWig --out",
                     file.path(output.bw.file.dir, paste0(file_name, "_2.bw")),
                     "--in", res[[u]], sep = " ")
      } else
      {
        cmd2 = paste("bam2wig.pl --pos span --strand --bw --bwapp $HOME/kentUtils/bin/wigToBigWig --out",
                     file.path(output.bw.file.dir, paste0(file_name, "_2.bw")),
                     "--in", res[[u]], sep = " ")
      }
      cmd3 = paste(cmd1, cmd0, cmd2, sep = " ")
    } else
    {
      if (u <= 6)
      {
        cmd3 = paste("bam2wig.pl -pe --pos span --strand --bw --bwapp $HOME/kentUtils/bin/wigToBigWig --out",
                     file.path(output.bw.file.dir, paste0(file_name, "_2.bw")),
                     "--in", res[[u]], sep = " ")
      } else
      {
        cmd3 = paste("bam2wig.pl --pos span --strand --bw --bwapp $HOME/kentUtils/bin/wigToBigWig --out",
                     file.path(output.bw.file.dir, paste0(file_name, "_2.bw")),
                     "--in", res[[u]], sep = " ")
      }
    }
    
    cmd <- cmd3
    
    system(cmd)
    
    cmd
  }, m.id, Wall.time, cores, Memory, span.ptile, res, output.bw.file.dir)
  
}

#'R -e 'library(ChipSeq);ChipSeq:::plotBam(input.file.dir="/scratch/projects/bbc/Project/Danny_chip2/Alignment/BWA",file.type="*marked.bam",output.file.dir="/scratch/projects/bbc/aiminy_project/DannyNewNgsPlot")'

#'R -e 'library(ChipSeq); x <- ChipSeq:::plotBam(input.file.dir="/scratch/projects/bbc/Project/Danny_chip2/Alignment/BWA",file.type="*marked.bam",output.file.dir="/scratch/projects/bbc/aiminy_project/DannyNewNgsPlot",cores = 8, Memory = 16000,span.ptile = 4,wait = FALSE)'

#'R -e 'library(ChipSeq); x <- ChipSeq:::plotBam(input.file.dir="/scratch/projects/bbc/Project/Danny_chip2/Alignment/BWA",file.type="*marked.bam",output.file.dir="/scratch/projects/bbc/aiminy_project/DannyNewNgsPlot",job.option = "parallel", cores = 32, Memory = 16000,span.ptile = 16,wait = TRUE)'

plotBam <- function(input.file.dir,file.type,output.file.dir,job.option = "general",cores = 15, Memory = 25000, Wall.time = "72:00", span.ptile = 8,wait=TRUE) {
  
  #library(ChIPpeakAnno)
  
  #cat(job.option,"\n")
  #job.option <- as.character(job.option)
  #cat(job.option,"\n")
  
  re<-ParserReadFiles(input.file.dir,file.type)
  
  file.name.2<-re$input
  #output.dir.name=re$output
  
  re.out<-file.name.2
  
  m.id <- grep("login", system("hostname", intern = TRUE))
  
  if (!dir.exists(output.file.dir))
  {
    dir.create(output.file.dir, recursive = TRUE)
  }
  
  temp3= output.file.dir
  
  #cmd1="samtools sort"
  
  cmd.1 <- lapply(1:length(re.out),function(u,m.id,job.option,Wall.time, cores, Memory,
                                            span.ptile,re.out,temp3){
    
    file_name = file_path_sans_ext(basename(re.out[[u]]))
    
    # file_name <- paste0(path_name2,'-',file_name)
    #u <- 3
    if (m.id == 1)
    {
      
      #job.option <- match.arg(job.option)
      
      switch (job.option,
              parallel = {
                cmd0 = paste(Wall.time, "-n", cores, "-q parallel -R 'rusage[mem=",
                             Memory, "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu",
                             sep = " ")        
              },
              bigmem = {
                cmd0 = paste(Wall.time, "-n", cores, "-q bigmem -R 'rusage[mem=",
                             Memory, "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu",
                             sep = " ")
              },
              general = {
                cmd0 = paste(Wall.time, "-n", cores, "-q general -R 'rusage[mem=",
                             Memory, "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu",
                             sep = " ")              
              }
      )
      
      job.name = paste0("bamSort.", u)
      cmd1 = paste0("bsub -P bbc -J \"", job.name, paste0("\" -o %J.",
                                                          job.name, ".log "), paste0("-e %J.", job.name, ".err -W"))
      
      
      cmd2=paste("samtools sort",re.out[[u]],file.path(temp3,paste0(file_name, "_sorted")),sep=" ")
      cmd3 = paste(cmd1, cmd0, cmd2, sep = " ")
    } else
    {
      cmd3 = paste("samtools sort",re.out[[u]],file.path(temp3,paste0(file_name, "_sorted")),sep=" ")
    }
    
    cmd <- cmd3
    
    system(cmd)
    
    cmd
    
  },m.id,job.option,Wall.time,cores,Memory,span.ptile,re.out,temp3)
  
  
  cmd.2 <- lapply(1:length(re.out),function(u,m.id,job.option,Wall.time, cores, Memory,
                                            span.ptile,re.out,temp3){
    
    file_name = file_path_sans_ext(basename(re.out[[u]]))
    
    # file_name <- paste0(path_name2,'-',file_name)
    #u <- 3
    if (m.id == 1)
    {
      
      #job.option <- match.arg(job.option)
      
      switch (job.option,
              parallel = {
                cmd0 = paste(Wall.time, "-n", cores, "-q parallel -R 'rusage[mem=",
                             Memory, "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu",
                             sep = " ")        
              },
              bigmem = {
                cmd0 = paste(Wall.time, "-n", cores, "-q bigmem -R 'rusage[mem=",
                             Memory, "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu",
                             sep = " ")
              },
              general = {
                cmd0 = paste(Wall.time, "-n", cores, "-q general -R 'rusage[mem=",
                             Memory, "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu",
                             sep = " ")              
              }
      )
      
      job.name = paste0("bamIndex.", u)
      wait.job.name = paste0("bamSort.", u)
      
      if(wait == TRUE){
        cmd1 = paste0("bsub -w \"done(\"", wait.job.name, "\")\"", " -P bbc -J \"",
                      job.name, paste0("\" -o %J.", job.name, ".log "), paste0("-e %J.",
                                                                               job.name, ".err -W"))
      }else{
        cmd1 = paste0("bsub -P bbc -J \"",job.name, paste0("\" -o %J.", job.name, ".log "), paste0("-e %J.",
                                                                                                   job.name, ".err -W"))
      }
      cmd2=paste("samtools index",file.path(temp3,paste0(file_name, "_sorted.bam")),sep=" ")
      cmd3 = paste(cmd1, cmd0, cmd2, sep = " ")
    } else
    {
      cmd3 = paste("samtools index",file.path(temp3,paste0(file_name, "_sorted.bam")),sep=" ")
    }
    
    cmd <- cmd3
    
    system(cmd)
    
    cmd
    
  },m.id,job.option,Wall.time,cores,Memory,span.ptile,re.out,temp3)
  
  
  cmd.3 <- lapply(1:length(re.out),function(u,m.id,job.option,Wall.time, cores, Memory,
                                            span.ptile,re.out,temp3){
    
    file_name = file_path_sans_ext(basename(re.out[[u]]))
    
    # file_name <- paste0(path_name2,'-',file_name)
    #u <- 3
    if (m.id == 1)
    {
      #job.option <- match.arg(job.option)
      
      switch (job.option,
              parallel = {
                cmd0 = paste(Wall.time, "-n", cores, "-q parallel -R 'rusage[mem=",
                             Memory, "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu",
                             sep = " ")        
              },
              bigmem = {
                cmd0 = paste(Wall.time, "-n", cores, "-q bigmem -R 'rusage[mem=",
                             Memory, "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu",
                             sep = " ")
              },
              general = {
                cmd0 = paste(Wall.time, "-n", cores, "-q general -R 'rusage[mem=",
                             Memory, "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu",
                             sep = " ")              
              }
      )
      
      job.name = paste0("bamPlot.", u)
      wait.job.name = paste0("bamIndex.", u)
      cmd1 = paste0("bsub -w \"done(\"", wait.job.name, "\")\"", " -P bbc -J \"",
                    job.name, paste0("\" -o %J.", job.name, ".log "), paste0("-e %J.",
                                                                             job.name, ".err -W"))
      cmd5="ngs.plot.r -G hg19 -R tss -C"
      cmd6="-O"
      cmd7="-L 4000"
      cmd8=paste(cmd5,file.path(temp3,paste0(file_name,"_sorted.bam")),cmd6,file.path(temp3,paste0(file_name,"_sorted")),cmd7,sep=" ")
      
      
      cmd3 = paste(cmd1, cmd0, cmd8, sep = " ")
    } else
    {
      
      
      "ngs.plot.r -G hg19 -R tss -C config.txt -O 2017-03-02-01_S5_R1.tss -T 2017-03-02-01_S5_R1 -L 4000 -RR 1"
      "ngs.plot.r -G hg19 -R tss -C config.txt -O 2017-03-02-01_S5_R1.tss -T 2017-03-02-01_S5_R1 -L 4000 -RR 1 -GO max -SC global"
      
      cmd5="ngs.plot.r -G hg19 -R tss -C"
      cmd6="-O"
      cmd7="-L 4000"
      cmd8=paste(cmd5,file.path(temp3,paste0(file_name,"_sorted.bam")),cmd6,file.path(temp3,paste0(file_name,"_sorted")),cmd7,sep=" ")
      
      cmd3 = cmd8
    }
    
    cmd <- cmd3
    
    system(cmd)
    
    cmd
    
  },m.id,job.option,Wall.time,cores,Memory,span.ptile,re.out,temp3)
  
  #re <- list(cmd.1 = cmd.1,cmd.2=cmd.2,cmd.3=cmd.3)
  
  # return(re)
}

#' DrawVenn
#'
#' @param devtools
#' @param install_github
#' @param Vennerable
#' @param venneuler
#' @param VennDiagram
#'
#' @return
#' @export
#'
#' @examples
#'
#' out.dir.name="/media/H_driver/2016/Yang/Results/"
#' DrawVenn(out.dir.name)
#'
DrawVenn <- function(out.dir.name) {
  
  devtools::install_github("js229/Vennerable");
  library(Vennerable)
  
  BB=c(1:6153,6154:25736)
  
  CC=c(6154:25736,25737:29579)
  
  AA=c(6026:6153,6154:8462,25737:25776,29650:29719)
  
  Vstem <- Venn(list(ASXL1=AA,SMC1A=BB,RAD21=CC))
  
  SetLabels <- VennGetSetLabels(Vstem)
  
  Cstem3 <- compute.Venn(Vstem,doWeights=TRUE)
  
  SetLabels <- VennGetSetLabels(Cstem3)
  FaceLabels <- VennGetFaceLabels(Cstem3)
  
  FaceLabels[FaceLabels$FaceName=="010","x"] <- 10
  FaceLabels[FaceLabels$FaceName=="010","y"] <- 3
  
  SetLabels[SetLabels$Label=="SMC1A","hjust"] <- "right"
  SetLabels[SetLabels$Label=="SMC1A","x"] <- 110
  SetLabels[SetLabels$Label=="SMC1A","y"] <- 58.78653
  
  SetLabels[SetLabels$Label=="RAD21","x"] <- -80
  SetLabels[SetLabels$Label=="RAD21","y"] <- 58.78653
  
  SetLabels[SetLabels$Label=="ASXL1","y"] <-80
  
  Cstem3 <- VennSetSetLabels(Cstem3,SetLabels)
  
  Cstem3<-VennSetFaceLabels(Cstem3,FaceLabels)
  
  Cstem3@FaceLabels[which(Cstem3@FaceLabels$FaceName=="010"),]$x<-90
  Cstem3@FaceLabels[which(Cstem3@FaceLabels$FaceName=="010"),]$y<--35
  
  Cstem3@FaceLabels[which(Cstem3@FaceLabels$FaceName=="001"),]$x<--90
  Cstem3@FaceLabels[which(Cstem3@FaceLabels$FaceName=="001"),]$y<--35
  
  Cstem3@FaceLabels[which(Cstem3@FaceLabels$FaceName=="101"),]$x<--18
  #Cstem3@FaceLabels[which(Cstem3@FaceLabels$FaceName=="101"),]$y<--
  Cstem3@FaceLabels[which(Cstem3@FaceLabels$FaceName=="101"),]$vjust<-"center"
  
  Cstem3@FaceLabels[which(Cstem3@FaceLabels$FaceName=="110"),]$x<-19
  Cstem3@FaceLabels[which(Cstem3@FaceLabels$FaceName=="110"),]$y<-49.86767
  
  c.3.set=sum(Cstem3@IndicatorWeight[,4])
  Cstem3@IndicatorWeight[which(row.names(Cstem3@IndicatorWeight)=="000"),4]=35000-c.3.set
  
  Cstem3@FaceLabels[which(Cstem3@FaceLabels$FaceName=="DarkMatter"),]$y<--130
  
  temp<-ol$venn_cnt
  
  index.A=grep(colnames(Cstem3@IndicatorWeight)[1],colnames(temp))
  index.B=grep("SMC1",colnames(temp))
  index.C=grep("Rad21",colnames(temp))
  
  temp2<-as.data.frame(temp[,c(index.A,index.B,index.C,4)])
  colnames(temp2)[1:3]=colnames(Cstem3@IndicatorWeight)[1:3]
  
  row.names(temp2)<-with(temp2,paste0(temp2$ASXL1,temp2$SMC1A,temp2$RAD21))
  
  row.names(temp2)<-row.names(Cstem3@IndicatorWeight)
  
  temp3<-merge(Cstem3@IndicatorWeight,temp2,by=0,sort=FALSE)
  
  Cstem3@IndicatorWeight[,4]<-temp3$Counts
  
  c.3.set=sum(Cstem3@IndicatorWeight[,4])
  Cstem3@IndicatorWeight[which(row.names(Cstem3@IndicatorWeight)=="000"),4]=35000-c.3.set
  
  grid.newpage()
  
  pdf(paste0(out.dir.name,"venn.pdf"))
  plot(Cstem3)
  dev.off()
  
}

#'  GetResultsFromDiffBind
#'  
#'  Use DiffBind to process peak profiles
#'
#' @param mcf7
#' @param Mergereplicates
#' @param output.file.dir 
#'
#' @return
#' @export
#'
#' @examples
#' output.file.dir="/Volumes/Bioinformatics$/2017/DannyNewData/BindDiff_7_21_2017_2"
#' mcf7=resmcf
#' re<-GetResultsFromDiffBind(mcf7,"yes",output.file.dir)
#' re<-GetResultsFromDiffBind(mcf2,"yes",output.file.dir)
#' 
GetResultsFromDiffBind<-function(mcf7,Mergereplicates=c("yes","no"),output.file.dir){
  
  if (!dir.exists(output.file.dir))
  {
    dir.create(output.file.dir, recursive = TRUE)
  }
  
  temp<-mcf7
  
  sampID.v<-colnames(temp$class) 
  sampID.v.2<-unlist(lapply(1:length(sampID.v),function(u,sampID.v){
    
    x=sampID.v[u]
    
    y=x
    y
  },sampID.v))
  
  
  colnames(temp$class)<-sampID.v.2
  temp$class[1,]<-sampID.v.2
  
  #temp<-dba(temp,mask =c(1,2,13,14,11,12,15,16))
  
  #Merge the replicates of each set 
  if(Mergereplicates=="yes"){
    temp2<-dba.peakset(temp,consensus = -DBA_REPLICATE)
    #temp22<-dba(temp2,mask =c(9,16,17:23))
    print(temp2)
    ##need to set the flexiable number identified from data sets
    t <- temp2
    temp22<-dba(t,mask =which(t$class[which(rownames(t$class)=="Replicate"),]=="1-2"))
  }else{
    temp22<-temp
  }
  
  print(temp22)
  
  Tissue1<-temp22$class[2,]
  Tissue2<-unique(temp22$class[2,])
  
  TF<-unique(temp22$class[3,])
  TF.n<-length(TF)
  
  temp3=file.path(output.file.dir,"Venn")
  
  if(!dir.exists(temp3))
  {
    dir.create(temp3)
  }
  
  for(i in 1:length(Tissue2)){
    po<-which(Tissue1 %in% Tissue2[i])
    
    print(po)
    
    if(length(po)==2)
    {
      png(file.path(temp3,paste0(paste0(colnames(temp22$class)[po],collapse = "-vs-"),".png")))
      dba.plotVenn(temp22,mask=po,main=Tissue2[i])
      dev.off()
      
    }else if(length(po)==4){
      po1<-po[c(1,3)]
      po2<-po[c(2,4)]
      
      png(file.path(temp3,paste0(paste0(colnames(temp22$class)[po1],collapse = "-vs-"),".png")))
      dba.plotVenn(temp22,mask=po1,main=Tissue2[i])
      dev.off()
      
      png(file.path(temp3,paste0(paste0(colnames(temp22$class)[po2],collapse = "-vs-"),".png")))
      dba.plotVenn(temp22,mask=po2,main=Tissue2[i])
      dev.off()
      
    }else
    {
      cat(paste0("For ",Tissue2[i],": Only one peak profile for ",TF.n," TFs\n"))
    }
  }
  
  p.common<-lapply(1:length(Tissue2),function(u,Tissue1,Tissue2,temp22){
    
    po<-which(Tissue1 %in% Tissue2[u])
    
    if(length(po)==2)
    {
      common.peaks<-dba.overlap(temp22,mask=po)
      y<-common.peaks$inAll
    }else if(length(po)==4){
      po1<-po[c(1,3)]
      po2<-po[c(2,4)]
      
      common.peaks.1<-dba.overlap(temp22,mask=po1)
      y1<-common.peaks.1$inAll
      
      common.peaks.2<-dba.overlap(temp22,mask=po2)
      y2<-common.peaks.2$inAll
      
      y<-list(y1=y1,y2=y2)
      
      names(y)[1]<-paste0(colnames(temp22$class)[po1],collapse = "-vs-")
      names(y)[2]<-paste0(colnames(temp22$class)[po2],collapse = "-vs-")
      
    }else{
      y<-NULL}
    y
  },Tissue1,Tissue2,temp22)
  
  names(p.common)<-Tissue2
  
  p.common<-unlist(p.common,recursive = F)
  
  p.common<-p.common[lapply(p.common,length) > 0]
  
  if(!dir.exists(file.path(output.file.dir,"common_peaks_bed")))
  {
    dir.create(file.path(output.file.dir,"common_peaks_bed"))
    dir.create(file.path(output.file.dir,"common_peaks_bed","ucsc"))
    dir.create(file.path(output.file.dir,"common_peaks_bed","igv"))
  }
  
  #output common peaks to bed files
  
  lapply(1:length(p.common),function(u,p.common,output.file.dir){
    x=p.common[[u]]
    
    x_name=names(p.common)[u]
    
    df <- data.frame(seqnames=seqnames(x),
                     #starts=start(x)-1,
                     starts=start(x),
                     ends=end(x),
                     names=c(rep(".", length(x))),
                     scores=elementMetadata(x)[,1],
                     strands=strand(x))
    
    #assign strand
    df.str <- data.frame(seqnames=seqnames(x),
                         #starts=start(x)-1,
                         starts=start(x),
                         ends=end(x),
                         names=c(rep(".", length(x))),
                         scores=elementMetadata(x)[,1],
                         strands=c(rep(".", length(x))))
    
    df.str.1<-df.str[-grep("random",df.str$seqnames),]
    
    df.str.2<-df.str.1
    
    df.str.3<-df.str.2[-grep("chrUn",df.str.2$seqnames),]
    
    write.table(df,file=file.path(output.file.dir,"common_peaks_bed",paste0(x_name,"_cp_with_header.bed")),
                col.names=TRUE,row.names = FALSE,quote=FALSE,sep="\t")
    
    write.table(df,file=file.path(output.file.dir,"common_peaks_bed","ucsc",paste0(x_name,"_4_ucsc.bed")),
                col.names=FALSE,row.names = FALSE,quote=FALSE,sep="\t")
    
    write.table(df,file=file.path(output.file.dir,"common_peaks_bed",paste0(x_name,"_common_peaks.bed")),
                col.names=FALSE,row.names = FALSE,quote=FALSE,sep="\t")
    
    write.table(df,file=file.path(output.file.dir,"common_peaks_bed","igv",paste0(x_name,"_4_igv.bed")),
                col.names=FALSE,row.names = FALSE,quote=FALSE,sep="\t")
    
  },p.common,output.file.dir)
  
  AnntationUsingChipSeeker(file.path(output.file.dir,"common_peaks_bed","igv"),"bed",file.path(output.file.dir,"common_peaks_bed")
                           ,txdb="hg19",DD=5000,distanceToTSS_cutoff=10000)
  
  return(p.common)
}

#' GetSampleInfo
#'
#' GetSampleInfo 
#' 
#' @param input.sample.file
#' @param input.bam.file 
#'  
#' 
#' @return
#' @export
#'
#' @examples
#' 
#' input.sample.file <- '/Volumes/Bioinformatics$/2017/DannyNewData/SampleID_INFO_ChIP_new_Danny.csv'
#' 
#' input.bam.file <- '/Volumes/Bioinformatics$/2017/DannyNewData/sorted_bam_files.txt'
#' 
#' input.bam.file <- '/Volumes/Bioinformatics$/2017/DannyNewData/NewRe2Danny/sorted_bam_files_2.txt'
#' 
#' re <- GetSampleInfo(input.sample.file,input.bam.file)
#' 
#' 
GetSampleInfo <- function(input.sample.file, input.bam.file)
{
  
  re <- read.csv(input.sample.file)
  re.c <- colnames(re)
  
  bam.file <- read.table(input.bam.file, header = FALSE)
  
  yy <- apply(bam.file, 1, function(u)
  {
    y <- basename(u)
    pos <- regexpr("\\.", y)
    pos <- pos - 1
    y <- substr(y, 1, pos)
    y
  })
  
  bam.file.sample.name <- cbind(bam.file, yy)
  
  colnames(bam.file.sample.name) <- c("file.name", "ID")
  
  Ab.type <- unique(as.character(re[, colnames(re) == re.c[3]]))
  Cell.type <- unique(as.character(re[, colnames(re) == re.c[2]]))
  
  re1 <- cbind(re[, 1:3], paste0(re[, 2], "_", re[, 3]))
  
  re11 <- merge(re1, bam.file.sample.name, by = "ID")
  
  colnames(re11)[4] = "Cell_TF"
  
  chipseqCell <- function(n, es)
  {
    value <- list(name = n, es = es)
    attr(value, "class") <- "ChipSeqCell"
    value
  }
  
  cellType <- unique(as.character(re1[re1$Type_TF == "Input", ]$Type_Cell))
  
  
  yy <- lapply(cellType, function(u, re11)
  {
    n <- nchar(u)
    index1 <- which(substr(re11$Type_Cell, 1, n) == u)
    z <- re11[index1, ]
    index2 <- which(nchar(as.character(z$Type_Cell)) <= n + 2)
    
    z2 <- z[index2, ]
    
    cc <- chipseqCell(u, z2)
    
    cc
  }, re11)
  
  colnames(re1)[4] = "Cell_TF"
  
  Cell.Ab.type <- unique(as.character(re1[, 4]))
  
  re2 <- lapply(1:length(Cell.Ab.type), function(u, Cell.Ab.type, re1)
  {
    
    x = Cell.Ab.type[u]
    z = re1
    
    ZZ <- as.character(z[which(z[, 4] == x), 1])
    ZZ
    
  }, Cell.Ab.type, re1)
  
  names(re2) <- Cell.Ab.type
  
  re21 <- re2[lapply(re2, length) == 1]
  
  re22 <- re2[lapply(re2, length) == 2]
  
  re3 <- list(re1 = re1, re2 = re2, re21 = re21, re22 = re22, re11 = re11, 
              y = yy)
  
  return(re3)
  
}

#' peakcallwithinput
#' 
#' @export
#' @example 
#' 
#' genome='Hs'
#' re <- peakcallwithinput(input.sample.file,input.bam.file,genome,output.dir,peakcaller)
#' 
#' 
peakcallwithinput <- function(input.sample.file, input.bam.file, genome = c("Hs", 
                                                                            "hs", "HS", "hS"), output.dir, peakcaller = c("macs14", "macs2"), peakPvalue)
{
  
  re <- GetSampleInfo(input.sample.file, input.bam.file)
  
  cellInfo <- re$y
  
  output.dir.name = dirname(input.sample.file)
  
  temp3 = file.path(output.dir.name, output.dir)
  
  if (!dir.exists(temp3))
  {
    dir.create(temp3)
  }
  
  peakcaller <- match.arg(peakcaller)
  
  genome <- match.arg(genome)
  cmd10 <- paste("-f BAM", "-g", genome, "-n", sep = " ")
  
  switch(peakcaller, macs2 = {
    
    PATH1 = Sys.getenv("PATH")
    
    macs2_Lib = file.path("/nethome/axy148/NGS_tools/MACS/bin/")
    
    Sys.setenv(PATH = paste0(macs2_Lib, ":", PATH1))
    
    cmd1 <- Sys.which("macs2")[[1]]
    
    cat(cmd1, "\n")
    
    cmd9 = paste(cmd1, "callpeak -t", sep = " ")
    
    cmd11 <- paste("-p", peakPvalue, sep = " ")
    
  }, {
    cmd9 = "macs14 -t "
    
    cmd11 <- paste("-p", peakPvalue, sep = " ")
    
  })
  
  cellInfo.run <- lapply(1:length(cellInfo), function(u, cellInfo, temp3)
  {
    
    x.name = cellInfo[[u]]$name
    
    es <- cellInfo[[u]]$es
    
    x.input <- es[es$Type_TF == "Input", ]$file.name
    
    x.sample <- es[es$Type_TF != "Input", ]
    
    x.run <- apply(x.sample, 1, function(x)
    {
      
      y <- x
      
      ID <- y[1]
      Type_Cell <- y[2]
      Type_TF <- y[3]
      Cell_TF <- y[4]
      file.name <- y[5]
      xx <- file.name
      xx.name = paste(ID, gsub(" ", "-", Type_Cell), Type_TF, sep = "-")
      
      cmd12 = paste(cmd9, xx, "-c", x.input, cmd10, file.path(temp3, paste0(xx.name, 
                                                                            "_hs_1.00e-05_", peakcaller)), cmd11, sep = " ")
      
      cmd12
      
    })
    
    x.run
    
  }, cellInfo, temp3)
  
  
  names(cellInfo.run) = unlist(lapply(cellInfo, function(u)
  {
    u$name
  }))
  
  zzz <- unlist(cellInfo.run)
  
  lapply(1:length(zzz), function(u, zzz)
  {
    
    cat(as.character(zzz[u][[1]]), "\n")
    cat("\n")
    
    system(as.character(zzz[u][[1]]))
    
  }, zzz)
  
  
  re <- list(cellInforun = cellInfo.run, zzz = zzz)
  
  AnntationUsingChipSeeker(temp3, "peaks.bed", temp3, DD = 5000)
  
  return(re)
  
}

select.sample <- c("MDA MB 231-DD-1_cJun", "MDA MB 231-1_cJun", "1833-1_cJun")
output.config.dir <- "~/"

configAndMultiplot <- function(res, select.sample, output.config.dir)
{
  
  if (!dir.exists(output.config.dir))
  {
    dir.create(output.config.dir)
  }
  
  x <- res$re11
  
  xx <- x[which(x$Cell_TF %in% select.sample), ]
  
  xxx <- cbind.data.frame(xx$file.name, rep("-1", dim(xx)[1]), gsub(" ", "-", 
                                                                    xx$Cell_TF))
  
  config.sample.name <- paste(gsub(" ", "-", select.sample), collapse = "-and-")
  
  config.file.name <- file.path(output.config.dir, paste0(config.sample.name, 
                                                          "-config.txt"))
  
  write.table(xxx, file = config.file.name, col.names = FALSE, row.names = FALSE, 
              quote = FALSE, sep = "\t")
  
  cmd0 <- "ngs.plot.r -G hg19 -R tss -C"
  
  cmd1 <- "-O"
  
  cmd2 <- "-L 4000 -RR 1 -CD 1 -CO \"blue\""
  
  output.results <- file.path(output.config.dir, paste0(config.sample.name, 
                                                        "results"))
  
  cmd <- paste(cmd0, config.file.name, cmd1, output.results, cmd2, sep = " ")
  
  system(cmd)
  
  return(config.file.name)
  
}

#' input.sample.file <- '/Volumes/Bioinformatics$/2017/DannyNewData/SampleID_INFO_ChIP_new_Danny.csv'
#' 
#' input.bam.file <- '/Volumes/Bioinformatics$/2017/DannyNewData/NewRe2Danny/sorted_bam_files_2.txt'
#' 
#' re <- ChipSeq:::matchBamInputGene(input.sample.file,input.bam.file,'$HOME/all_common_gene_unique.txt','$HOME/NgsConfigFile')
#' 
#' R -e 'libraray(ChipSeq);re <- ChipSeq:::matchBamInputGene('/scratch/projects/bbc/aiminy_project/DannyNewData2/SampleID_INFO_ChIP_new_Danny.csv','/scratch/projects/bbc/aiminy_project/DannyNewData2/sorted_bam_files_2.txt','$HOME/all_common_gene_unique.txt','$HOME/NgsConfigFile',,job.option = 'parallel')'
#' 
#' bsub -P bbc -J 'bamPlot' -o %J.bamPlot.log -e %J.bamPlot.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::matchBamInputGene('/scratch/projects/bbc/aiminy_project/DannyNewData2/SampleID_INFO_ChIP_new_Danny.csv','/scratch/projects/bbc/aiminy_project/DannyNewData2/sorted_bam_files_2.txt','~/all_common_gene_unique.txt','NgsConfigFile')'
#' 
#' bsub -P bbc -J "bamPlot" -o %J.bamPlot.log -e %J.bamPlot.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::matchBamInputGene("/scratch/projects/bbc/aiminy_project/DannyNewData2/SampleID_INFO_ChIP_new_Danny.csv","/scratch/projects/bbc/aiminy_project/DannyNewData2/sorted_bam_files_2.txt","~/all_common_gene_unique.txt","NgsConfigFile3",add.input= "yes")'
matchBamInputGene <- function(input.sample.file, input.bam.file, input.gene.list, 
                              output.dir, ngs.para = c("hg19", 4000, 1, 1, "total"), add.input = NULL)
{
  
  re <- GetSampleInfo(input.sample.file, input.bam.file)
  
  cellInfo <- re$y
  
  # output.dir.name = dirname(input.sample.file)
  
  if (!dir.exists(output.dir))
  {
    dir.create(output.dir, recursive = TRUE)
  }
  
  temp3 = output.dir
  
  # cmd9 = 'ngs.plot.r -G' cmd10 = '-R' cmd11 = '-C' cmd12 = '-O' cmd13 = '-T'
  # cmd14 = '-L' cmd15 = '-RR' cmd16 = '-CD' cmd17= '-GO'
  
  
  
  cellInfo.run <- lapply(1:length(cellInfo), function(u, add.input, cellInfo, 
                                                      temp3)
  {
    
    x.name = cellInfo[[u]]$name
    
    es <- cellInfo[[u]]$es
    
    x.input <- es[es$Type_TF == "Input", ]$file.name
    
    x.sample <- es[es$Type_TF != "Input", ]
    
    x.run <- apply(x.sample, 1, function(x, add.input, temp3)
    {
      
      y <- x
      
      ID <- y[1]
      Type_Cell <- y[2]
      Type_TF <- y[3]
      Cell_TF <- y[4]
      file.name <- y[5]
      xx <- file.name
      xx.name = paste(ID, gsub(" ", "-", Type_Cell), Type_TF, sep = "-")
      
      
      if (!is.null(add.input))
      {
        cmd12 = paste(paste0(xx, ":", x.input), input.gene.list, paste0(gsub(" ", 
                                                                             "-", Cell_TF), "_cJunAndp27"), sep = "\t")
      } else
      {
        cmd12 = paste(xx, input.gene.list, paste0(gsub(" ", 
                                                       "-", Cell_TF), "_cJunAndp27"), sep = "\t")
      }
      cat(cmd12, file = file.path(temp3, paste0(xx.name, "_config_cJunAndp27.txt")), 
          sep = "\t")
    }, add.input, temp3)
    
    x.run
    
  }, add.input, cellInfo, temp3)
  
  
  # dir.name=temp3 dir.name=reformatPath(dir.name)
  
  file.name = file.path(temp3, dir(temp3, recursive = TRUE))
  
  file.name.2 <- as.list(file.name)
  
  
  # names(file.name.2) = unlist(lapply(file.name.2, function(u) { u$name }))
  
  zzz <- unlist(file.name.2)
  
  lapply(1:length(zzz), function(u, zzz)
  {
    
    dir.name = dirname(zzz[u][[1]])
    file_name = file_path_sans_ext(basename(zzz[u][[1]]))
    
    cmd = paste("ngs.plot.r -G hg19 -R tss -C", zzz[u][[1]], "-O", file.path(dir.name, 
                                                                             paste0(file_name, ".tss")), "-T", file_name, "-L 4000 -RR 1 -CD 1 -GO total", 
                sep = " ")
    
    
    
    # system(as.character(zzz[u][[1]])) job.name = paste0('bamPlot.', u)
    # cmd.pegasus = usePegasus(job.option, Wall.time = '72:00',cores = 32,Memory
    # = 16000,span.ptile = 16,job.name) cmd2 = paste(cmd.pegasus,cmd,sep = ' ')
    
    cmd2 = cmd
    cat(cmd2, "\n")
    cat("\n")
    system(cmd2)
  }, zzz)
  
  
  re <- list(cellInforun = cellInfo.run, zzz = zzz)
  
  # AnntationUsingChipSeeker(temp3, 'peaks.bed', temp3, DD = 5000)
  
  return(re)
  
}

# /deepTools-1.5/bin/bamCompare --bamfile1 ChIP.bam --bamfile2 Input.bam \
# --binSize 25 --fragmentLength 200 --missingDataAsZero no \
# --ratio log2 --scaleFactorsMethod SES -o log2ratio_ChIP_vs_Input.bw

#' bsub -P bbc -J "bamCompare" -o %J.bamCompare.log -e %J.Compare.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::useBamCompare("/scratch/projects/bbc/aiminy_project/DannyNewData2/SampleID_INFO_ChIP_new_Danny.csv","/scratch/projects/bbc/aiminy_project/DannyNewData2/sorted_bam_files_2.txt","~/BamCompare")'

useBamCompare <- function(input.sample.file,input.bam.file,output.dir)
{
  
  re <- GetSampleInfo(input.sample.file, input.bam.file)
  
  cellInfo <- re$y
  
  # output.dir.name = dirname(input.sample.file)
  
  if (!dir.exists(output.dir))
  {
    dir.create(output.dir, recursive = TRUE)
  }
  
  temp3 = output.dir
  
  # cmd9 = 'ngs.plot.r -G' cmd10 = '-R' cmd11 = '-C' cmd12 = '-O' cmd13 = '-T'
  # cmd14 = '-L' cmd15 = '-RR' cmd16 = '-CD' cmd17= '-GO'
  
  
  
  cellInfo.run <- lapply(1:length(cellInfo), function(u,cellInfo, 
                                                      temp3)
  {
    
    x.name = cellInfo[[u]]$name
    
    es <- cellInfo[[u]]$es
    
    x.input <- es[es$Type_TF == "Input", ]$file.name
    
    x.sample <- es[es$Type_TF != "Input", ]
    
    #print(x.sample)
    #print(x.input)
    
    x.run <- apply(x.sample, 1, function(x, x.input, temp3)
    {
      
      y <- x
      
      ID <- y[1]
      Type_Cell <- y[2]
      Type_TF <- y[3]
      Cell_TF <- y[4]
      file.name <- y[5]
      xx <- file.name
      xx.name = paste(ID, gsub(" ", "-", Type_Cell), Type_TF, sep = "-")
      
      # ~/python/Python-2.7.11/python  ~/NGS_tools/deepTools/bin/bamCompare  --bamfile1 /scratch/projects/bbc/aiminy_project/DannyNewNgsPlot/2017-03-02-03_S11_R1.marked_sorted.bam --bamfile2 /scratch/projects/bbc/aiminy_project/DannyNewNgsPlot/2017-03-02-17_S1_R1.marked_sorted.bam --binSize 25 --ratio log2 -o ~/BamCompare/log2ratio_2017-03-02-03_S11_R1.marked_sorted.bam_vs_2017-03-02-17_S1_R1.marked_sorted.bam.bw 
      
      cmd1 <- paste("~/python/Python-2.7.11/python  ~/NGS_tools/deepTools/bin/bamCompare --bamfile1",xx,"--bamfile2",x.input,sep=" ")
      cmd2 <- "--binSize 25"
      cmd3 <- "--ratio log2 -o"
      
      cmd4 <- paste(cmd1,cmd2,cmd3,sep=" ") 
      
      cmd5 <- file.path(output.dir,paste0("log2ratio_",basename(as.character(xx)),"_vs_",basename(as.character(x.input)),".bw"))
      
      
      cmd6 <- paste(cmd4,cmd5,sep=" ") 
      
      cmd6
      
      #cat(cmd6, "\n")
      #cat("\n")
    }, x.input, temp3)
    
    x.run
    
  }, cellInfo,temp3)
  
  
  names(cellInfo.run) = unlist(lapply(cellInfo, function(u)
  {
    u$name
  }))
  
  sysyem("module unload python/2.7.3")
  
  zzz <- unlist(cellInfo.run)
  
  lapply(1:length(zzz), function(u, zzz)
  {
    
    cat(as.character(zzz[u][[1]]), "\n")
    cat("\n")
    
    system(as.character(zzz[u][[1]]))
    
  }, zzz)
  # # dir.name=temp3 dir.name=reformatPath(dir.name)
  # 
  # file.name = file.path(temp3, dir(temp3, recursive = TRUE))
  # 
  # file.name.2 <- as.list(file.name)
  # 
  # 
  # # names(file.name.2) = unlist(lapply(file.name.2, function(u) { u$name }))
  # 
  # zzz <- unlist(file.name.2)
  # 
  # lapply(1:length(zzz), function(u, zzz)
  # {
  #   
  #   dir.name = dirname(zzz[u][[1]])
  #   file_name = file_path_sans_ext(basename(zzz[u][[1]]))
  #   
  #   cmd = paste("ngs.plot.r -G hg19 -R tss -C", zzz[u][[1]], "-O", file.path(dir.name, 
  #                                                                            paste0(file_name, ".tss")), "-T", file_name, "-L 4000 -RR 1 -CD 1 -GO total", 
  #               sep = " ")
  #   
  #   
  #   
  #   # system(as.character(zzz[u][[1]])) job.name = paste0('bamPlot.', u)
  #   # cmd.pegasus = usePegasus(job.option, Wall.time = '72:00',cores = 32,Memory
  #   # = 16000,span.ptile = 16,job.name) cmd2 = paste(cmd.pegasus,cmd,sep = ' ')
  #   
  #   cmd2 = cmd
  #   cat(cmd2, "\n")
  #   cat("\n")
  #   system(cmd2)
  # }, zzz)
  # 
  # 
  # re <- list(cellInforun = cellInfo.run, zzz = zzz)
  # 
  # # AnntationUsingChipSeeker(temp3, 'peaks.bed', temp3, DD = 5000)
  
  #return(re)
  
}

#' x <- ChipSeq:::usePegasus('parallel', Wall.time = '72:00',cores = 32,Memory = 16000,span.ptile = 16,'bamPlot')

usePegasus <- function(job.option = c("general", "parallel", "bigmem"), Wall.time, 
                       cores, Memory, span.ptile, job.name, wait.job.name = NULL)
{
  
  job.option <- match.arg(job.option)
  
  switch(job.option, parallel = {
    cmd0 = paste(Wall.time, "-n", cores, "-q parallel -R 'rusage[mem=", 
                 Memory, "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu", 
                 sep = " ")
  }, bigmem = {
    cmd0 = paste(Wall.time, "-n", cores, "-q bigmem -R 'rusage[mem=", Memory, 
                 "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu", sep = " ")
  }, general = {
    cmd0 = paste(Wall.time, "-n", cores, "-q general -R 'rusage[mem=", Memory, 
                 "] span[ptile=", span.ptile, "]' -u aimin.yan@med.miami.edu", sep = " ")
  })
  
  if (!is.null(wait.job.name))
  {
    cmd1 = paste0("bsub -w \"done(\"", wait.job.name, "\")\"", " -P bbc -J \"", 
                  job.name, paste0("\" -o %J.", job.name, ".log "), paste0("-e %J.", 
                                                                           job.name, ".err -W"))
  } else
  {
    cmd1 = paste0("bsub -P bbc -J \"", job.name, paste0("\" -o %J.", job.name, 
                                                        ".log "), paste0("-e %J.", job.name, ".err -W"))
  }
  
  cmd = paste(cmd1, cmd0, sep = " ")
  
  return(cmd)
}

#' IndexBamFile
#'
#' @param input.file.dir 
#' @param input.file.pattern 
#' @param index.file 
#' @param output.file.dir 
#' @param genome 
#'
#' @return
#' @export
#'
#' @examples
#' 
#' input.file.dir="/projects/scratch/bbc/Project/Danny_chip/Alignment/BWA/"
#' input.file.pattern="*.bam"
#' index.file=9
#' output.file.dir="/scratch/projects/bbc/aiminy_project/
#' genome="Hs"
#' 
#' IndexBamFile(input.file.dir,input.file.pattern,index.file,output.file.dir,genome)
#' 
IndexBamFile <- function(input.file.dir,input.file.pattern,index.file,output.file.dir,genome) {
  
  #library(ChIPpeakAnno)
  
  dir.name=input.file.dir
  input.file.pattern=input.file.pattern
  
  dir.name=reformatPath(dir.name)
  output.dir.name=reformatPath(output.file.dir)
  
  #print(output.dir.name)
  temp=Sys.time()
  temp1=gsub(":","-",Sys.time())
  temp2=gsub(" ","-",temp1)
  temp3=paste0(output.dir.name,"AnalysisResults_at_",temp2)
  
  dir.create(temp3)
  
  file.name=paste0(dir.name,dir(dir.name,recursive = TRUE,pattern=input.file.pattern))
  file.name.2<-as.list(file.name)
  
  names(file.name.2)=sapply(strsplit(file.name,split="\\/"),"[[",index.file)
  
  #print(file.name.2)
  
  re.out<-file.name.2
  
  cmd1="samtools index "
  
  lapply(1:length(re.out),function(u,re.out,temp3){
    
    x=re.out[[u]]
    x_name=names(re.out)[u]
    cmd2=paste0(cmd1," ",x," ",paste0(temp3,"/",x_name,"_sorted"))
    
    #print(cmd2)
    system(cmd2)
    
  },re.out,temp3)
  
  #sample.name<-sapply(strsplit(names(file.name.3),split="_peaks_"),"[[",1)
  
  #names(file.name.3)=sample.name
  
  #file.name.4 <-file.name.3[-1]
  
  #re.out<-lapply(file.name.4,function(u){
  #  re=toGRanges(u,format="BED")
  #colnames(re)=c("Count","GeneName")
  #  re
  #})
  
  #head(re.out[[1]])
  
  #   re.out.L<-lapply(re.out,function(u){
  #     re=length(u)
  #     #colnames(re)=c("Count","GeneName")
  #     re
  #   })
  #   
  #   
  # #  cmd="samtools sort"
  # #  input.file="/scratch/projects/bbc/Project/Danny_chip2/Alignment/BWA/""2016-10-14-Hyunho-Yoon-16-1833-PF1502-1-p27_S4_R1.bam" 
  # #  output.file="/scratch/projects/bbc/aiminy_project/Bam_marked_sorted/""2016-10-14-Hyunho-Yoon-16-1833-PF1502-1-p27_S4_R1.bam_sorted"
  #   
  #   if(genome=="Mm"){
  #     
  #     annoData <- toGRanges(EnsDb.Mmusculus.v75, feature="gene")
  #     
  #     ol <- findOverlapsOfPeaks(re.out[c(2,4,1)])
  #     
  #     overlaps<-ol$peaklist$`11_2470IUPUI_WT_BM_SMC1_peaks.bed///13_2470IUPUI_WT_BM_Rad21_peaks.bed///10_WT_BM_ASXL1_peaks.bed`
  #     
  #     binOverFeature(overlaps, annotationData=annoData,
  #                    radius=5000, nbins=20, FUN=length, errFun=0,
  #                    ylab="count",
  #                    main="Distribution of aggregated peak numbers around TSS")
  #     
  #     overlaps.trimmed<-trim(overlaps, use.names=TRUE)
  #     
  #     library(EnsDb.Mmusculus.v79)
  #     dd.GRCm39.mm10<-toGRanges(EnsDb.Mmusculus.v75)
  #     #seqinfo(dd.GRCm39.mm10)
  #     #seqlevels(dd.GRCm39.mm10)
  #     
  #     seqlevels(dd.GRCm39.mm10,force=TRUE) <- c("chr1","chr10","chr11","chr12","chr13",
  #                                               "chr14","chr15","chr16","chr17","chr18","chr19","chr2",
  #                                               "chr3","chr4","chr5","chr6","chr7","chr8","chr9","chrX","chrY")
  #     
  #     seqinfo(overlaps)<-seqinfo(dd.GRCm39.mm10)
  #     #GRCm38/mm10
  #     #dd<-toGRanges(EnsDb.Mmusculus.v79)
  #     #seqinfo(dd)
  #     #library(ensembldb)
  #     #library(GenomeInfoDb)
  #     seqlevelsStyle(overlaps.trimmed) <- seqlevelsStyle(dd.GRCm39.mm10)
  #     overlaps.anno<-annoPeaks(overlaps.trimmed,dd.GRCm39.mm10)
  #     write.table(overlaps.anno,file=paste0(temp3,"/","annotation.txt"),row.names = FALSE,quote=FALSE,sep="\t")
  #   }else if(genome=="Hs"){
  #     
  #     library(EnsDb.Hsapiens.v75)
  #     #annoData<-toGRanges(EnsDb.Hsapiens.v75, feature="gene")
  #     
  #     dd.hs<-toGRanges(EnsDb.Hsapiens.v75)
  #     
  #     print(seqinfo(dd.hs))
  #     print(seqlevels(dd.hs))
  #     
  #     #print(seqlevels(dd.hs)[,1])
  #     
  #     #print(seqlevels(re.out[[1]])[,1])
  #     
  #     # seqlevels(dd.hs,force=TRUE) <- c("chr1","chr10","chr11","chr12","chr13",
  #     #                                           "chr14","chr15","chr16","chr17","chr18","chr19","chr2",
  #     #                                           "chr3","chr4","chr5","chr6","chr7","chr8","chr9","chrX","chrY")
  #     
  #     #temp4=
  #     
  #     re.out.L<-lapply(1:length(re.out),function(u,re.out,dd.hs){
  #       
  #       x=re.out[[u]]
  #       x_name=names(re.out)[u]
  #       
  #       seqlevels(dd.hs,force=TRUE)<-seqinfo(x)@seqnames
  #       #print(seqinfo(re.out.trimmed))
  #       #print(seqlevels(re.out.trimmed))
  #       seqinfo(x)<-seqinfo(dd.hs)
  #       #GRCm38/mm10
  #       #dd<-toGRanges(EnsDb.Mmusculus.v79)
  #       #seqinfo(dd)
  #       #library(ensembldb)
  #       #library(GenomeInfoDb)
  #       seqlevelsStyle(x) <- seqlevelsStyle(dd.hs)
  #       re.out.trimmed<-trim(x, use.names=TRUE)
  #       overlaps.anno<-annoPeaks(re.out.trimmed,dd.hs)
  #       
  #       write.table(overlaps.anno,file=paste0(temp3,"/",x_name,"_annotation.txt"),row.names = FALSE,quote=FALSE,sep="\t")
  #     },re.out,dd.hs)
  #     
  #   }
  
}

#' InstallRequiredPackage
#'
#' @return
#' @export
#'
#' @examples
#' InstallRequiredPackage()
#'
InstallRequiredPackage <- function() {
  
  #install from bioc
  
  source("http://bioconductor.org/biocLite.R")
  biocLite("GenomicRanges")
  biocLite("ChIPseeker")
  biocLite("regioneR")
  biocLite("DiffBind")
  biocLite("BSgenome.Hsapiens.UCSC.hg19")
  biocLite("TxDb.Mmusculus.UCSC.mm9.knownGene")
  biocLite("EnsDb.Hsapiens.v75")
  devtools::install_github("sheffien/simpleCache")
  biocLite("LOLA")
  biocLite("ChIPpeakAnno")
  biocLite("BSgenome.Mmusculus.UCSC.mm9")
  biocLite("motifStack")
  biocLite("BSgenome.Ecoli.NCBI.20080805")
  biocLite("EnsDb.Mmusculus.v75")
  biocLite("EnsDb.Mmusculus.v79")
  biocLite("BSgenome.Mmusculus.UCSC.mm10")
  biocLite("org.Mm.eg.db")
  
  #install from git
  devtools::install_github("nsheff/LOLA")
  
  
  #loading
  library(BiocInstaller)
  library(BiocInstaller)
  library(ChIPseeker)
  library(DiffBind)
  library(TxDb.Hsapiens.UCSC.hg19.knownGene)
  library(TxDb.Mmusculus.UCSC.mm9.knownGene)
  library(LOLA)
  library("simpleCache")
  library(ChIPpeakAnno)
  library(rtracklayer)
  library(org.Mm.eg.db)
  library(motifStack)
  
  library(BSgenome.Mmusculus.UCSC.mm10)
  library(BSgenome.Mmusculus.UCSC.mm9)
  library(EnsDb.Mmusculus.v75)
  library(EnsDb.Mmusculus.v79)
  
  library(ggplot2)
}

#' Title
#'
#' @return
#' @export
#'
#' @examples
#' LoadRequiredPackage()
#'
LoadRequiredPackage <- function() {
  library(LOLA)
  
  library(ChIPseeker)
  library(TxDb.Hsapiens.UCSC.hg19.knownGene)
  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
  library(clusterProfiler)
  library(regioneR)
  library(BSgenome.Hsapiens.UCSC.hg19)
}

#' ParserBamFile4NgsPlot
#'
#' @param dir.name
#' @param input.file.pattern
#'
#' @return
#' @export
#'
#' @examples
#'
#'
#' sh ~/Code/BashRunMACS1-4-2_4_Danny_chip_seq3.sh /scratch/projects/bbc/aiminy_project/Bam_sorted/ 7 ".sorted.bam$" /scratch/projects/bbc/aiminy_project/Bam_marked_sorted/
#'
#' dir.name="/scratch/projects/bbc/aiminy_project/Bam_sorted/"
#' index=7
#' input.file.pattern=".sorted.bam$"
#' out.dir.name="/scratch/projects/bbc/aiminy_project/Bam_marked_sorted/"
#' outfile="test.txt"
#'
#' re.from.bed.peng.4.venn<-ParserBamFile4NgsPlot(dir.name,index,input.file.pattern,out.dir.name,outfile)
#'
#' save.image(file=paste0(out.dir.name,"re_save_peng.RData"))
#' savehistory(file=paste0(out.dir.name,"re_save_peng.Rhistory"))
#'
ParserBamFile4NgsPlot<-function(dir.name,index,input.file.pattern,out.dir.name,outfile){
  
  file.name=paste0(dir.name,dir(dir.name,recursive = TRUE,pattern=input.file.pattern))
  file.name.2<-as.list(file.name)
  
  
  #file.name="/scratch/projects/bbc/Project/Danny_chip/Alignment/BWA/2016-10-14-Hyunho-Yoon-02-MDA-MB-231-2-cJun_S5_R1.marked.bam"
  
  #strsplit(file.name,split="\\/")
  
  names(file.name.2)=sapply(strsplit(file.name,split="\\/"),"[[",index)
  
  print(file.name.2)
  
  ab<-names(file.name.2)
  
  #ab<-"2016-10-14-Hyunho-Yoon-13-MDA-MB-231-DD-2-p27_S15_R1.marked.bam"
  
  ab2<-sapply(strsplit(ab,split="\\."),"[[",1)
  
  print(ab2)
  #ab3<-sapply(strsplit(ab2,split="\\-"),"[[",length(strsplit(ab2,split="\\-")[[1]]))
  
  
  re<-cbind(paste0(dir.name,names(file.name.2)),rep(-1,length(names(file.name.2))),ab2)
  
  write.table(re, file =paste0(out.dir.name,outfile), append = FALSE, quote = F, sep = " ",eol = "\n", na = "NA", dec = ".", row.names = F, col.names = F)
  
  
  cmd1="ngs.plot.r -G hg19 -R tss -C"
  cmd2="-O"
  cmd3="-L 4000"
  #cmd3="-L 4000 -RR 1 -CD 1 -CO \\\"blue\\\""
  
  #ngs.plot.r -G hg19 -R tss -C $1 -O $2 -L 4000 -RR 1 -CD 1 -CO "blue"
  
  
  #file.name.3<-file.name.2[-6]
  
  re.out<-lapply(file.name.2,function(u){
    
    cmd4=paste(cmd1,u,cmd2,u,cmd3,sep=" ")
    
    print(cmd4)
    system(cmd4, intern = TRUE, ignore.stderr = TRUE)
    
    #re=read.table(u,header=FALSE)
    #  re<-as.character(re[,1])
    #  #colnames(re)=c("Count","GeneName")
    #  re
  })
  
  #sample.name<-sapply(strsplit(names(re.out),split="_peaks_"),"[[",1)
  
  #names(re.out)=sample.name
  
  # A=re.out[[1]] #10_2470IUPUI_WT_BM_ASXL1
  # B=re.out[[2]] #10_WT_BM_ASXL1
  # C=re.out[[3]] #11_2470IUPUI_WT_BM_SMC1
  # D=re.out[[5]] #13_2470IUPUI_WT_BM_Rad21
  #
  # A.name=sample.name[1] #10_2470IUPUI_WT_BM_ASXL1
  # B.name=sample.name[2] #10_WT_BM_ASXL1
  # C.name=sample.name[3] #11_2470IUPUI_WT_BM_SMC1
  # D.name=sample.name[5] #13_2470IUPUI_WT_BM_Rad21
  #
  # #A <- createRandomRegions(nregions=50, length.mean=5000000, length.sd=3000000)
  # #B <- c(A[1:25], createRandomRegions(nregions=25, length.mean=500000, length.sd=30000))
  # #class(A)
  # #class(B)
  #
  #
  # # TestOverlap <- function(A, B, A.name,B.name,out.dir.name,n_permutations) {
  # #   NumOverLap<-numOverlaps(A, B, count.once=TRUE)
  # #   pt <- overlapPermTest(A=A, B=B, ntimes=n_permutations)
  # #   png(filename=paste0(out.dir.name,A.name,B.name,n_permutations,"_overlap_test.png"))
  # #   plot(pt)
  # #   dev.off()
  # #
  # #   lz <- localZScore(pt=pt, A=A, B=B)
  # #   png(filename=paste0(out.dir.name,A.name,B.name,n_permutations,"_localZScore.png"))
  # #   plot(lz)
  # #   dev.off()
  # # }
  # #
  # # TestOverlap(A,B,A.name,B.name,out.dir.name,100)
  # # TestOverlap(A,C,A.name,C.name,out.dir.name,100)
  # # TestOverlap(A,D,A.name,D.name,out.dir.name,100)
  # #
  # # TestOverlap(B,C,B.name,C.name,out.dir.name,100)
  # # TestOverlap(B,D,B.name,D.name,out.dir.name,100)
  # # TestOverlap(C,D,C.name,D.name,out.dir.name,100)
  # #
  #
  # # temp.name<-strsplit(names(file.name.2),split="\\.")
  #
  # #a = readBed(system.file("extdata", "examples/combined_regions.bed",
  # #                        package="LOLA"))
  #
  #
  # venn.plot <- venn.diagram(
  #   x = re.out[c(1,2)],
  #   filename = paste0(out.dir.name,names(re.out)[1],names(re.out)[2],"_overlap_venn.tiff"),
  #   height = 3000,
  #   width = 3500,
  #   resolution = 1000,
  #   col = "black",
  #   lty = "dotted",
  #   lwd = 1,
  #   fill = c("red","blue"),
  #   alpha = 0.50,
  #   label.col = c(rep("white",3)),
  #   cex = 0.5,
  #   fontfamily = "serif",
  #   fontface = "bold",
  #   cat.col = c("red","blue"),
  #   cat.cex = 0.5,
  #   cat.pos = 0.5,
  #   cat.dist = 0.05,
  #   cat.fontfamily = "serif"
  # )
  #
  # venn.plot <- venn.diagram(
  #   x = re.out[c(1,3,5)],
  #   filename =paste0(out.dir.name,names(re.out)[1],names(re.out)[3],names(re.out)[5],"_overlap_venn.tiff"),
  #   col = "black",
  #   lty = "dotted",
  #   lwd = 2,
  #   fill = c("red", "orange", "blue"),
  #   alpha = 0.50,
  #   label.col = c(rep("white",7)),
  #   cex = 1,
  #   fontfamily = "serif",
  #   fontface = "bold",
  #   cat.col = c("red", "orange", "blue"),
  #   cat.cex = 0.8,
  #   cat.fontfamily = "serif"
  # )
  
  
  #return(re.out)
}

#' Title
#'
#' @param dir.name
#' @param input.file.pattern
#'
#' @return
#' @export
#'
#' @examples
#'
#' dir.name="/media/H_driver/2016/Yang/BedFromPeakCall/"
#' input.file.pattern="*bam_mm_summits_from_PeakCall4Yang.bed"
#'
#' out.dir.name="/media/H_driver/2016/Yang/Results/"
#' output.test.file= "SMC1A_RAD21_overlap_test"
#'
#'
#' dir.name.2="/media/H_driver/2016/Yang/BedFromPeakCall_mm_100e_07/"
#' input.file.pattern.2="*_bam_mm_1_from_PeakCall4Yang.bed"
#'
#' out.dir.name.2="/media/H_driver/2016/Yang/Results/"
#' output.test.file.2= "SMC1A_RAD21_mm_100e-07_peaks_overlap_test"
#'
#' ParserBedFile(dir.name,input.file.pattern,out.dir.name)
#'
#' re.mm.100e.07<-ParserBedFile(dir.name.2,input.file.pattern.2,out.dir.name)
#'
#'
#' save.image(file=paste0(out.dir.name,"re_save_2.RData"))
#' savehistory(file=paste0(out.dir.name,"re_save_2.Rhistory"))
#'
ParserBedFile<-function(dir.name,input.file.pattern,out.dir.name){
  
  file.name=paste0(dir.name,dir(dir.name,recursive = TRUE,pattern=input.file.pattern))
  file.name.2<-as.list(file.name)
  
  names(file.name.2)=sapply(strsplit(file.name,split="\\/"),"[[",7)
  
  print(file.name.2)
  
  file.name.3<-file.name.2[-6]
  
  re.out<-lapply(file.name.3,function(u){
    re=readBed(u)
    #colnames(re)=c("Count","GeneName")
    re
  })
  
  sample.name<-sapply(strsplit(sapply(strsplit(names(re.out),split="_i"),"[[",1),split="2470IUPUI_"),"[[",2)
  
  A=re.out[[2]] #SMC1
  B=re.out[[4]] #Rad21
  C=re.out[[1]] #ASXL1
  
  A.name=sample.name[2] #SMC1 name
  B.name=sample.name[4] #Rad21 name
  C.name=sample.name[1] #ASXL1 name
  
  #A <- createRandomRegions(nregions=50, length.mean=5000000, length.sd=3000000)
  #B <- c(A[1:25], createRandomRegions(nregions=25, length.mean=500000, length.sd=30000))
  #class(A)
  #class(B)
  
  
  TestOverlap <- function(A, B, A.name,B.name,out.dir.name) {
    NumOverLap<-numOverlaps(A, B, count.once=TRUE)
    pt <- overlapPermTest(A=A, B=B, ntimes=50)
    png(filename=paste0(out.dir.name,A.name,B.name,"_overlap_test.png"))
    plot(pt)
    dev.off()
    
    lz <- localZScore(pt=pt, A=A, B=B)
    png(filename=paste0(out.dir.name,A.name,B.name,"_localZScore.png"))
    plot(lz)
    dev.off()
  }
  
  TestOverlap(A,B,A.name,B.name,out.dir.name)
  TestOverlap(A,C,A.name,C.name,out.dir.name)
  TestOverlap(B,C,B.name,C.name,out.dir.name)
  
  # temp.name<-strsplit(names(file.name.2),split="\\.")
  
  #a = readBed(system.file("extdata", "examples/combined_regions.bed",
  #                        package="LOLA"))
  
  return(re.out)
}

#' ParserBedFile4Peng
#'
#' @param dir.name
#' @param input.file.pattern
#'
#' @return
#' @export
#'
#' @examples
#'
#' dir.name="/media/H_driver/2016/Yang/BedFromPeng/"
#' input.file.pattern="*.bed"
#'
#' out.dir.name="/media/H_driver/2016/Yang/Results/"

#'
#' re.from.bed.peng<-ParserBedFile4Peng(dir.name,input.file.pattern,out.dir.name)
#'
#'
#' save.image(file=paste0(out.dir.name,"re_save_peng.RData"))
#' savehistory(file=paste0(out.dir.name,"re_save_peng.Rhistory"))
#'
ParserBedFile4Peng<-function(dir.name,input.file.pattern,out.dir.name){
  
  file.name=paste0(dir.name,dir(dir.name,recursive = TRUE,pattern=input.file.pattern))
  file.name.2<-as.list(file.name)
  
  names(file.name.2)=sapply(strsplit(file.name,split="\\/"),"[[",7)
  
  print(file.name.2)
  
  #file.name.3<-file.name.2[-6]
  
  re.out<-lapply(file.name.2,function(u){
    re=readBed(u)
    #colnames(re)=c("Count","GeneName")
    re
  })
  
  sample.name<-sapply(strsplit(names(re.out),split="_peaks_"),"[[",1)
  
  A=re.out[[1]] #10_2470IUPUI_WT_BM_ASXL1
  B=re.out[[2]] #10_WT_BM_ASXL1
  C=re.out[[3]] #11_2470IUPUI_WT_BM_SMC1
  D=re.out[[5]] #13_2470IUPUI_WT_BM_Rad21
  
  A.name=sample.name[1] #10_2470IUPUI_WT_BM_ASXL1
  B.name=sample.name[2] #10_WT_BM_ASXL1
  C.name=sample.name[3] #11_2470IUPUI_WT_BM_SMC1
  D.name=sample.name[5] #13_2470IUPUI_WT_BM_Rad21
  
  #A <- createRandomRegions(nregions=50, length.mean=5000000, length.sd=3000000)
  #B <- c(A[1:25], createRandomRegions(nregions=25, length.mean=500000, length.sd=30000))
  #class(A)
  #class(B)
  
  
  TestOverlap <- function(A, B, A.name,B.name,out.dir.name,n_permutations) {
    NumOverLap<-numOverlaps(A, B, count.once=TRUE)
    pt <- overlapPermTest(A=A, B=B, ntimes=n_permutations)
    png(filename=paste0(out.dir.name,A.name,B.name,n_permutations,"_overlap_test.png"))
    plot(pt)
    dev.off()
    
    lz <- localZScore(pt=pt, A=A, B=B)
    png(filename=paste0(out.dir.name,A.name,B.name,n_permutations,"_localZScore.png"))
    plot(lz)
    dev.off()
  }
  
  TestOverlap(A,B,A.name,B.name,out.dir.name,100)
  TestOverlap(A,C,A.name,C.name,out.dir.name,100)
  TestOverlap(A,D,A.name,D.name,out.dir.name,100)
  
  TestOverlap(B,C,B.name,C.name,out.dir.name,100)
  TestOverlap(B,D,B.name,D.name,out.dir.name,100)
  TestOverlap(C,D,C.name,D.name,out.dir.name,100)
  
  
  # temp.name<-strsplit(names(file.name.2),split="\\.")
  
  #a = readBed(system.file("extdata", "examples/combined_regions.bed",
  #                        package="LOLA"))
  
  return(re.out)
}

#' ParserBedFile4PengVenn
#'
#' @param dir.name
#' @param input.file.pattern
#'
#' @return
#' @export
#'
#' @examples
#'
#' dir.name="/media/H_driver/2016/Yang/BedFromPeng/"
#' input.file.pattern="*combine_chr_st_end.bed"
#'
#' input.file.pattern="*combine_chr_st.bed"
#'
#'
#'
#' out.dir.name="/media/H_driver/2016/Yang/Results/"
#'
#' re.from.bed.peng.4.venn<-ParserBedFile4PengVenn(dir.name,input.file.pattern,out.dir.name)
#'
#'
#' save.image(file=paste0(out.dir.name,"re_save_peng.RData"))
#' savehistory(file=paste0(out.dir.name,"re_save_peng.Rhistory"))
#'
ParserBedFile4PengVenn<-function(dir.name,input.file.pattern,out.dir.name){
  
  file.name=paste0(dir.name,dir(dir.name,recursive = TRUE,pattern=input.file.pattern))
  file.name.2<-as.list(file.name)
  
  names(file.name.2)=sapply(strsplit(file.name,split="\\/"),"[[",7)
  
  print(file.name.2)
  
  #file.name.3<-file.name.2[-6]
  
  re.out<-lapply(file.name.2,function(u){
    re=read.table(u,header=FALSE)
    re<-as.character(re[,1])
    #colnames(re)=c("Count","GeneName")
    re
  })
  
  sample.name<-sapply(strsplit(names(re.out),split="_peaks_"),"[[",1)
  
  names(re.out)=sample.name
  
  A=re.out[[1]] #10_2470IUPUI_WT_BM_ASXL1
  B=re.out[[2]] #10_WT_BM_ASXL1
  C=re.out[[3]] #11_2470IUPUI_WT_BM_SMC1
  D=re.out[[5]] #13_2470IUPUI_WT_BM_Rad21
  
  A.name=sample.name[1] #10_2470IUPUI_WT_BM_ASXL1
  B.name=sample.name[2] #10_WT_BM_ASXL1
  C.name=sample.name[3] #11_2470IUPUI_WT_BM_SMC1
  D.name=sample.name[5] #13_2470IUPUI_WT_BM_Rad21
  
  #A <- createRandomRegions(nregions=50, length.mean=5000000, length.sd=3000000)
  #B <- c(A[1:25], createRandomRegions(nregions=25, length.mean=500000, length.sd=30000))
  #class(A)
  #class(B)
  
  
  # TestOverlap <- function(A, B, A.name,B.name,out.dir.name,n_permutations) {
  #   NumOverLap<-numOverlaps(A, B, count.once=TRUE)
  #   pt <- overlapPermTest(A=A, B=B, ntimes=n_permutations)
  #   png(filename=paste0(out.dir.name,A.name,B.name,n_permutations,"_overlap_test.png"))
  #   plot(pt)
  #   dev.off()
  #
  #   lz <- localZScore(pt=pt, A=A, B=B)
  #   png(filename=paste0(out.dir.name,A.name,B.name,n_permutations,"_localZScore.png"))
  #   plot(lz)
  #   dev.off()
  # }
  #
  # TestOverlap(A,B,A.name,B.name,out.dir.name,100)
  # TestOverlap(A,C,A.name,C.name,out.dir.name,100)
  # TestOverlap(A,D,A.name,D.name,out.dir.name,100)
  #
  # TestOverlap(B,C,B.name,C.name,out.dir.name,100)
  # TestOverlap(B,D,B.name,D.name,out.dir.name,100)
  # TestOverlap(C,D,C.name,D.name,out.dir.name,100)
  #
  
  # temp.name<-strsplit(names(file.name.2),split="\\.")
  
  #a = readBed(system.file("extdata", "examples/combined_regions.bed",
  #                        package="LOLA"))
  
  
  venn.plot <- venn.diagram(
    x = re.out[c(1,2)],
    filename = paste0(out.dir.name,names(re.out)[1],names(re.out)[2],"_overlap_venn.tiff"),
    height = 3000,
    width = 3500,
    resolution = 1000,
    col = "black",
    lty = "dotted",
    lwd = 1,
    fill = c("red","blue"),
    alpha = 0.50,
    label.col = c(rep("white",3)),
    cex = 0.5,
    fontfamily = "serif",
    fontface = "bold",
    cat.col = c("red","blue"),
    cat.cex = 0.5,
    cat.pos = 0.5,
    cat.dist = 0.05,
    cat.fontfamily = "serif"
  )
  
  venn.plot <- venn.diagram(
    x = re.out[c(1,3,5)],
    filename =paste0(out.dir.name,names(re.out)[1],names(re.out)[3],names(re.out)[5],"_overlap_venn.tiff"),
    col = "black",
    lty = "dotted",
    lwd = 2,
    fill = c("red", "orange", "blue"),
    alpha = 0.50,
    label.col = c(rep("white",7)),
    cex = 1,
    fontfamily = "serif",
    fontface = "bold",
    cat.col = c("red", "orange", "blue"),
    cat.cex = 0.8,
    cat.fontfamily = "serif"
  )
  
  
  return(re.out)
}

#' ParserBedFile4PengXls
#'
#' @param dir.name
#' @param input.file.pattern
#'
#' @return
#' @export
#'
#' @examples
#'
#' dir.name="/media/H_driver/2016/Yang/MACS/MACS/"
#' input.file.pattern="*.xls"
#'
#' out.dir.name="/media/H_driver/2016/Yang/Results/"

#'
#' re.from.bed.peng.Xls<-ParserBedFile4PengXls(dir.name,input.file.pattern,out.dir.name)
#'
#' re.from.bed.peng.Xls.2<-ParserBedFile4PengXls(dir.name,input.file.pattern,out.dir.name)
#'
#' re.from.bed.peng.Xls.3<-ParserBedFile4PengXls(dir.name,input.file.pattern,out.dir.name)
#'
#' save.image(file=paste0(out.dir.name,"re_save_peng.RData"))
#' savehistory(file=paste0(out.dir.name,"re_save_peng.Rhistory"))
#'
ParserBedFile4PengXls<-function(dir.name,input.file.pattern,out.dir.name){
  
  dir.name=reformatPath(dir.name)
  out.dir.name=reformatPath(out.dir.name)
  
  file.name=paste0(dir.name,dir(dir.name,recursive = TRUE,pattern=input.file.pattern))
  file.name.2<-as.list(file.name)
  
  names(file.name.2)=sapply(strsplit(file.name,split="\\/"),"[[",8)
  
  print(file.name.2)
  
  re.out<-lapply(names(file.name.2),function(u){
    
    
    #print(names(u))
    uu=file.name.2[[u]]
    
    re=read.table(uu,header=TRUE)
    colnames(re)[c(7,9)]=c("-10*LOG10(pvalue)","FDR(%)")
    temp<-re
    temp$start<-temp$start-1
    temp$end<-temp$end-1
    temp$summit<-temp$summit-1
    temp
    
    summitPeak<-temp$start+temp$summit
    
    temp2<-temp
    
    temp2$start<-summitPeak-49
    temp2$end<-summitPeak+50
    temp2$summit<-summitPeak
    temp2$length<-temp2$end-temp2$start+1
    temp2
    rownames(temp2)<-paste0("MACS_peak_",rownames(temp2))
    temp3<-toGRanges(temp2)
    names(temp3)<-rownames(temp2)
    
    
    dd.GRCm39.mm10<-toGRanges(EnsDb.Mmusculus.v75)
    genome(temp3)<-genome(dd.GRCm39.mm10)
    
    seqlevels(dd.GRCm39.mm10,force=TRUE) <- seqlevels(temp3)
    
    seqinfo(temp3)<-seqinfo(dd.GRCm39.mm10)
    
    temp3.trimmed<-trim(temp3, use.names=TRUE)
    
    genome.mm10<-getBSgenome("BSgenome.Mmusculus.UCSC.mm10")
    
    genome(temp3.trimmed)<-"mm10"
    
    seq.temp3<-getAllPeakSequence(temp3.trimmed,genome=genome.mm10)
    
    write2FASTA(seq.temp3, paste0(out.dir.name,u,".fa"))
    
    re2<-list(originalPeak=re,down1Peak=temp,aroundSummit100Peak=temp2,GR=temp3)
    re2
  })
  
  sample.name<-sapply(strsplit(names(file.name.2),split="_peaks_"),"[[",1)
  
  names(re.out)<-sample.name
  
  
  return(re.out)
}

#' ParserReadFiles
#'
#' @param input.file.dir 
#' @param output.file.dir 
#'
#' @return
#' @export
#'
#' @examples
#' 
#' input.file.dir="~/TestBam/"
#' out.file.dir="~/"
#' testR<-ParserReadFiles(input.file.dir) 
#' 
#' 
ParserReadFiles <- function(input.file.dir,input.file.type) {
  
  #For input
  dir.name=input.file.dir
  dir.name=reformatPath(dir.name)
  
  file.name=file.path(dir.name,dir(dir.name,recursive = TRUE))
  
  file.name.2<-as.list(file.name)
  
  file.name.3<-lapply(1:length(file.name.2),function(u,input.file.type,file.name.2){
    
    tmp=file.name.2
    
    x=tmp[[u]]
    path_name=dirname(x)
    file_name=basename(x)
    
    n <- length(grep(input.file.type,file_name))
    
    #if(file_ext(file_name)==input.file.type)
    if(n == 1) {
      re<-file.path(path_name,file_name)
      #names(re)[u]<-file_name
    }else
    {re<-NULL}
    re
  },input.file.type,file.name.2)
  
  file.name.4<-file.name.3[lapply(file.name.3, length) > 0]
  
  names(file.name.4)=unlist(lapply(1:length(file.name.4),function(u,file.name.4){
    tmp=file.name.4
    x=tmp[[u]]
    path_name=dirname(x)
    file_name=basename(x)
    file_name
  },file.name.4))
  
  #print(file.name.4)
  
  #For output
  #output.dir.name=reformatPath(output.file.dir)
  #print(output.dir.name)
  
  #temp=Sys.time()
  #temp1=gsub(":","-",temp)
  #temp2=gsub(" ","-",temp1)
  #temp3=file.path(output.dir.name,"ReadBam_at_",temp2)
  #temp3=output.dir.name
  #dir.create(temp3)
  
  re2<-list(input=file.name.4)
  
  return(re2)
}

#' ChipSeq:::getGeneBedName("/Volumes/Bioinformatics$/2017/DannyNewData/NewRe2Danny","peaks.csv","/Volumes/Bioinformatics$/2017/DannyNewData/NewRe2Danny")
#' 
getGeneBedName <- function(input.file.dir,file.pattern,output.file.dir) {
  
  re<-ParserReadFiles(input.file.dir,file.pattern)
  
  file.name.2<-re$input
  
  re.out<-file.name.2
  
  if (!dir.exists(output.file.dir))
  {
    dir.create(output.file.dir, recursive = TRUE)
  }
  
  temp3 =output.file.dir
  
  cmd.1 <- lapply(1:length(re.out),function(u,re.out,temp3){
    
    x <- read.csv(re.out[[u]],header = TRUE)
    
    xx <- x[grep("Promoter",x$annotation),]
    
    chr <- which(colnames(xx) == "seqnames")
    gs  <- which(colnames(xx) == "geneStart")
    ge  <- which(colnames(xx) == "geneEnd")
    
    file_name = file_path_sans_ext(basename(re.out[[u]]))
    
    write.table(xx[,c(chr,gs,ge)],file=file.path(temp3,paste0(file_name,"_gene.bed")),quote = F,col.names = F,row.names = F,sep="\t")
    
    write.table(unique(xx$SYMBOL),file=file.path(temp3,paste0(file_name,"_gene_name.txt")),quote = F,col.names = F,row.names = F)
    
    ps  <- which(colnames(xx) == "start")
    pe  <- which(colnames(xx) == "end")
    
    write.table(xx[,c(chr,ps,pe)],file=file.path(temp3,paste0(file_name,"_peaks.bed")),quote = F,col.names = F,row.names = F,sep="\t")
    
  },re.out,temp3)
  
}

#' PeakCallAndAnnotation
#'
#' Call peak for bam files using macs14 and perform peak annotation using ChIPpeakAnno
#' 
#' @param input.file.dir 
#' @param input.file.pattern 
#' @param index.file 
#' @param output.file.dir 
#' @param genome 
#'
#' @return
#' @export
#'
#' @examples
#' 
#' input.file.dir="/projects/scratch/bbc/Project/Danny_chip/Alignment/BWA/"
#' input.file.pattern="*.bam"
#' index.file=9
#' output.file.dir="/scratch/projects/bbc/aiminy_project/
#' genome="Hs"
#' 
#' PeakCallAndAnnotation(input.file.dir,output.file.dir,genome)
#' 
PeakCallAndAnnotation <- function(input.file.dir,output.file.dir,genome) {
  
  
  re<-ParserReadFiles(input.file.dir,"bam",output.file.dir)
  
  
  file.name.2<-re$input
  output.dir.name=re$output
  
  
  temp3=paste0(output.dir.name,"_PeakCall")
  
  dir.create(temp3)
  
  re.out<-file.name.2
  
  cmd9="macs14 -t "
  cmd10="-f BAM -g hs -n "
  cmd11=" -m 6,18 --bw=200 -p 0.00001"
  
  lapply(1:length(re.out),function(u,re.out,temp3){
    
    x=re.out[[u]]
    x_name=names(re.out)[u]
    
    cmd12=paste(cmd9,x,cmd10,paste0(temp3,"/",paste0(x_name,"_hs_1.00e-05_macs142")),cmd11,sep=" ")
    
    print(cmd12)
    system(cmd12, intern = TRUE, ignore.stderr = TRUE)
    
    #re=read.table(u,header=FALSE)
    #  re<-as.character(re[,1])
    #  #colnames(re)=c("Count","GeneName")
    #  re
  },re.out,temp3)
  
  #AnnotatePeak2(paste0(temp3,"/"),"*macs142_peaks.bed",7,paste0(output.dir.name,"PeakAnnotation_at_",temp2),genome="Hs")
  
  AnnotatePeak3(paste0(temp3,"/"),paste0(output.dir.name,"_PeakAnnotation"),
                genome="Hs")
  
  BamFileSortIndexVisualization2(re,genome)
  
}

BamFileSortIndexVisualization2 <- function(input,genome) {
  
  #library(ChIPpeakAnno)
  
  re<-input
  
  file.name.2<-re$input
  output.dir.name=re$output
  
  temp3=paste0(output.dir.name,"_visualization")
  
  dir.create(temp3)
  
  re.out<-file.name.2
  
  cmd1="samtools sort"
  
  lapply(1:length(re.out),function(u,re.out,temp3){
    
    x=re.out[[u]]
    x_name=names(re.out)[u]
    cmd2=paste0(cmd1," ",x," ",paste0(temp3,"/",x_name,"_sorted"))
    
    #print(cmd2)
    system(cmd2)
    
  },re.out,temp3)
  
  cmd3="samtools index"
  
  lapply(1:length(re.out),function(u,re.out,temp3){
    
    x=re.out[[u]]
    x_name=names(re.out)[u]
    cmd4=paste0(cmd3," ",paste0(temp3,"/",x_name,"_sorted.bam"))
    
    #print(cmd2)
    system(cmd4)
    
  },re.out,temp3)
  
  cmd5="ngs.plot.r -G hg19 -R tss -C"
  cmd6="-O"
  cmd7="-L 4000"
  #cmd3="-L 4000 -RR 1 -CD 1 -CO \\\"blue\\\""
  
  #ngs.plot.r -G hg19 -R tss -C $1 -O $2 -L 4000 -RR 1 -CD 1 -CO "blue"
  
  #file.name.3<-file.name.2[-6]
  
  lapply(1:length(re.out),function(u,re.out,temp3){
    
    x=re.out[[u]]
    x_name=names(re.out)[u]
    
    cmd8=paste(cmd5,paste0(temp3,"/",paste0(x_name,"_sorted.bam")),cmd6,paste0(temp3,"/",paste0(x_name,"_sorted")),cmd7,sep=" ")
    
    print(cmd8)
    system(cmd8, intern = TRUE, ignore.stderr = TRUE)
    
    #re=read.table(u,header=FALSE)
    #  re<-as.character(re[,1])
    #  #colnames(re)=c("Count","GeneName")
    #  re
  },re.out,temp3)
  
}

#' ProcessUsingCHIPpeakAnno
#'
#' @param input.file.dir
#' @param input.file.pattern
#' @param output.file.dir
#'
#' @return
#' @export
#'
#' @examples
#' input.file.dir="/media/H_driver/2016/Yang/MACS/MACS/"
#' input.file.pattern="*.bed"
#' output.file.dir="/media/H_driver/2016/Yang/MACS/MACS/"
#'
#' ProcessUsingCHIPpeakAnno(input.file.dir,input.file.pattern,output.file.dir)
#'
ProcessUsingCHIPpeakAnno <- function(input.file.dir,input.file.pattern,output.file.dir) {
  
  library(ChIPpeakAnno)
  
  dir.name=input.file.dir
  input.file.pattern=input.file.pattern
  
  dir.name=reformatPath(dir.name)
  
  output.dir.name=reformatPath(output.file.dir)
  dir.create(output.dir.name)
  
  file.name=paste0(dir.name,dir(dir.name,recursive = TRUE,pattern=input.file.pattern))
  file.name.2<-as.list(file.name)
  
  names(file.name.2)=sapply(strsplit(file.name,split="\\/"),"[[",8)
  
  print(file.name.2)
  
  file.name.3<-file.name.2
  
  sample.name<-sapply(strsplit(names(file.name.3),split="_peaks_"),"[[",1)
  
  names(file.name.3)=sample.name
  
  file.name.4 <-file.name.3[-1]
  
  re.out<-lapply(file.name.4,function(u){
    re=toGRanges(u,format="BED")
    #colnames(re)=c("Count","GeneName")
    re
  })
  
  head(re.out[[1]])
  
  re.out.L<-lapply(re.out,function(u){
    re=length(u)
    #colnames(re)=c("Count","GeneName")
    re
  })
  
  annoData <- toGRanges(EnsDb.Mmusculus.v75, feature="gene")
  annoData[1:2]
  
  binOverFeature(overlaps, annotationData=annoData,
                 radius=5000, nbins=20, FUN=length, errFun=0,
                 ylab="count",
                 main="Distribution of aggregated peak numbers around TSS")
  
  ol <- findOverlapsOfPeaks(re.out[c(2,4,1)])
  
  overlaps<-ol$peaklist$`11_2470IUPUI_WT_BM_SMC1_peaks.bed///13_2470IUPUI_WT_BM_Rad21_peaks.bed///10_WT_BM_ASXL1_peaks.bed`
  
  re<-makeVennDiagram(re.out[c(2,4,1)],NameOfPeaks=c("SMC1A", "RAD21","ASXL1"),totalTest=35000)
  
  #fisher exact test
  
  UseFisher <- function(temp.ct,index.A,index.B,totalN) {
    total.peaks=totalN
    A=sum(temp.ct[which(temp.ct[,index.A]==1&temp.ct[,index.B]==1),4])
    B=sum(temp.ct[which(temp.ct[,index.A]==1&temp.ct[,index.B]==0),4])
    C=sum(temp.ct[which(temp.ct[,index.A]==0&temp.ct[,index.B]==1),4])
    D=total.peaks-(A+B+C)
    ctb<-matrix(c(A,B,C,D),nrow = 2,dimnames =list(c("In", "Not"),c("In", "Not")))
    
    #re<-fisher.test(ctb)
    print(ctb)
    re.fisher<-fisher.test(ctb, alternative='greater')[c("p.value","estimate")]
    re.fisher
  }
  
  
  temp.ct<-ol$venn_cnt
  
  #A vs B
  index.A<-grep("SMC1",colnames(temp.ct))
  index.B<-grep("Rad21",colnames(temp.ct))
  tempRe<-UseFisher(temp.ct,index.A,index.B,35000)
  pVal.fisher.AB=tempRe$p.value
  OR.fisher.AB=tempRe$estimate
  
  #A vs C
  index.A<-grep("SMC1",colnames(temp.ct))
  index.B<-grep("ASXL1",colnames(temp.ct))
  tempRe<-UseFisher(temp.ct,index.A,index.B,35000)
  pVal.fisher.AC=tempRe$p.value
  OR.fisher.AC=tempRe$estimate
  
  #B vs C
  index.A<-grep("Rad21",colnames(temp.ct))
  index.B<-grep("ASXL1",colnames(temp.ct))
  tempRe<-UseFisher(temp.ct,index.A,index.B,35000)
  pVal.fisher.BC=tempRe$p.value
  OR.fisher.BC=tempRe$estimate
  
  pVal.fisher.AB
  OR.fisher.AB
  
  pVal.fisher.AC
  OR.fisher.AC
  
  pVal.fisher.BC
  OR.fisher.BC
  
  library(TxDb.Hsapiens.UCSC.hg19.knownGene)
  
  library(TxDb.Hsapiens.UCSC.mm9.knownGene)
  
  aCR<-assignChromosomeRegion(overlaps, nucleotideLevel=FALSE,
                              precedence=c("Promoters", "immediateDownstream",
                                           "fiveUTRs", "threeUTRs",
                                           "Exons", "Introns"),
                              TxDb=TxDb.Mmusculus.UCSC.mm9.knownGene)
  
  
  
  
  barplot(aCR$percentage, las=3)
  
  pie1(aCR$percentage,las=3)
  
  dc<-annoGR(TxDb.Mmusculus.UCSC.mm9.knownGene)
  
  seqinfo(dc)
  seqlevels(dc)
  
  seqinfo(overlaps)
  seqlevels(overlaps)
  
  #GRCm38/mm10
  dd.GRCm39.mm10<-toGRanges(EnsDb.Mmusculus.v75)
  seqinfo(dd.GRCm39.mm10)
  seqlevels(dd.GRCm39.mm10)
  
  seqlevels(dd.GRCm39.mm10,force=TRUE) <- c("chr1","chr10","chr11","chr12","chr13",
                                            "chr14","chr15","chr16","chr17","chr18","chr19","chr2",
                                            "chr3","chr4","chr5","chr6","chr7","chr8","chr9","chrX","chrY")
  
  seqinfo(overlaps)<-seqinfo(dd.GRCm39.mm10)
  seqinfo(overlaps)
  
  overlaps.trimmed<-trim(overlaps, use.names=TRUE)
  
  library(EnsDb.Mmusculus.v79)
  
  #GRCm38/mm10
  dd<-toGRanges(EnsDb.Mmusculus.v79)
  seqinfo(dd)
  
  library(ensembldb)
  library(GenomeInfoDb)
  
  seqlevelsStyle(overlaps.trimmed) <- seqlevelsStyle(dd.GRCm39.mm10)
  
  overlaps.anno<-annoPeaks(overlaps.trimmed,dd.GRCm39.mm10)
  
  library(org.Mm.eg.db)
  
  overlaps.anno.with.entrez.id <- addGeneIDs(overlaps.anno,"org.Mm.eg.db",IDs2Add = "entrez_id")
  
  write.csv(as.data.frame(unname(overlaps.anno.with.entrez.id)), paste0(out.dir.name,"other_anno.csv"))
  
  pie1(table(overlaps.anno.with.entrez.id$insideFeature))
  
  library("DBI")
  over <- getEnrichedGO(overlaps.anno.with.entrez.id, orgAnn="org.Mm.eg.db",
                        maxP=.05, minGOterm=10,
                        multiAdjMethod="BH", condense=TRUE)
  
  # over.gene.symbol <- getEnrichedGO(overlaps.anno.with.entrez.id, orgAnn="org.Mm.eg.db",
  #                                   feature_id_type="gene_symbol",
  #                       maxP=.05, minGOterm=10,
  #                       multiAdjMethod="BH",condense=TRUE)
  
  
  head(over[["bp"]][, -c(3, 10)])
  
  library(org.Hs.eg.db)
  e2s = toTable(org.Mm.egSYMBOL)
  
  tempDS=over$bp
  
  tempDS2<-data.frame(apply(tempDS,1,function(u,e2c){
    
    #print(u[1])
    x=u[11]
    tempId<-unlist(strsplit(as.character(x),split=";"))
    index<-which(!is.na(match(e2s[,1],tempId)))
    index<-match(tempId,e2s[,1])
    geneS<-paste(e2s[index,2], collapse=";")
    geneS
    
    #print(geneS)
    
  },e2c))
  
  tempDS3<-cbind(tempDS,tempDS2)
  
  colnames(tempDS3)[12]="GeneSymbol"
  
  Draw4GO <- function(tempDS3) {
    x=tempDS3
    y=x[order(x$pvalue,decreasing = TRUE),]
    
    z=y[1:10,c(1,3,4,5,6,10)]
    
    Function<-z$Definition
    
    negative_log10p=-log10(z$pvalue)
    
    library(ggplot2)
    
    #ggplot(z, aes(x=z$go.id, y=negative_log10p,fill=factor(z$go.id)))+geom_bar(stat="identity")+geom_hline(yintercept = -log10(0.05))+coord_flip()
    
    ggplot(z, aes(go.id,pvalue, fill = go.id)) +
      geom_bar(stat="identity")+ scale_x_discrete(labels=z$count.InDataset, limits=factor(z$go.id))+ scale_fill_discrete(breaks = z$go.id,
                                                                                                                         name="GO term")+theme(legend.text = element_text(colour="black", size = 11, face = "bold"))+
      theme(axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle("GO enrichment analysis")+labs(x="Gene Counts",y="-log p-value")
    
  }
  
  
  write.table(tempDS3,file=paste0(out.dir.name,"BP_.txt"),row.names = FALSE,quote=FALSE,sep="\t")
  
  
  
  #anno <- annoGR(EnsDb.Hsapiens.v79)
  
  ree<-annotatePeakInBatch(overlaps,
                           AnnotationData=dc,
                           output="nearestBiDirectionalPromoters",
                           bindingRegion=c(-2000, 500))
  
  ree2 <- addGeneIDs(ree,
                     "org.Mm.eg.db",
                     IDs2Add = "entrez_id")
  
  re<-makeVennDiagram(re.out[c(2,4,1)],NameOfPeaks=c("SMC1A", "RAD21","ASXL1"),totalTest=35000)
  
  
  
  library(BSgenome.Mmusculus.UCSC.mm9)
  
  upseqs<-Views(Mmusculus,overlaps)
  
  overlaps.trimmed<-trim(overlaps, use.names=TRUE)
  
  mm9.S<-Seqinfo(genome="mm9")
  
  seqinfo(overlaps,force=TRUE) <- Seqinfo(genome="mm9")
  
  seqlevels(mm9.S) <- c("chr1","chr10","chr11","chr12","chr13",
                        "chr14","chr15","chr16","chr17","chr18","chr19","chr2",
                        "chr3","chr4","chr5","chr6","chr7","chr8","chr9","chrX","chrY")
  
  seqinfo(overlaps) <- mm9.S
  
  goodGR <- trim(overlaps)
  
  overlaps.trimmed<-goodGR
  seq<-getAllPeakSequence(overlaps.trimmed,genome=Mmusculus)
  seqs.mm9 <- getSeq(Mmusculus,overlaps.trimmed)
  write2FASTA(seq, paste0(out.dir.name,"WT_triple.fa"))
  
  ## We can also try simulation data
  seq.sim.motif <- list(c("t", "g", "c", "a", "t", "g"),
                        c("g", "c", "a", "t", "g", "c"))
  set.seed(1)
  seq.sim <- sapply(sample(c(2, 1, 0), 1000, replace=TRUE, prob=c(0.07, 0.1, 0.83)),
                    function(x){
                      s <- sample(c("a", "c", "g", "t"),
                                  sample(100:1000, 1), replace=TRUE)
                      if(x>0){
                        si <- sample.int(length(s), 1)
                        if(si>length(s)-6) si <- length(s)-6
                        s[si:(si+5)] <- seq.sim.motif[[x]]
                      }
                      paste(s, collapse="")
                    })
  
  os <- oligoSummary(seq, oligoLength=6, MarkovOrder=3,
                     quickMotif=TRUE)
  
  zscore <- sort(os$zscore, decreasing=TRUE)
  h <- hist(zscore, breaks=100, main="Histogram of Z-score")
  text(zscore[1:2], rep(5, 2),
       labels=names(zscore[1:2]), adj=0, srt=90)
  
  pfms <- mapply(function(.ele, id)
    new("pfm", mat=.ele, name=paste("SAMPLE motif", id)),
    os$motifs, 1:length(os$motifs))
  
  motifStack(pfms[[1]])
  motifStack(pfms[[2]])
  motifStack(pfms[[3]])
  motifStack(pfms[[4]])
  
}

# input.file.dir="/media/H_driver/2016/Danny/Danny_chip_PeakCall/"
# input.file.pattern="*macs142_peaks.bed"
# output.file.dir="/media/H_driver/2016/Danny/Danny_chip_PeakCall/"
#
# AnnotatePeak(input.file.dir,input.file.pattern,8,output.file.dir,genome="Mm")
# AnnotatePeak(input.file.dir,input.file.pattern,7,output.file.dir,genome="Hs")

AnnotatePeak <- function(input.file.dir,input.file.pattern,index.file,output.file.dir,genome) {
  
  library(ChIPpeakAnno)
  
  dir.name=input.file.dir
  input.file.pattern=input.file.pattern
  
  dir.name=reformatPath(dir.name)
  output.dir.name=reformatPath(output.file.dir)
  
  #print(output.dir.name)
  temp=Sys.time()
  temp1=gsub(":","-",Sys.time())
  temp2=gsub(" ","-",temp1)
  temp3=paste0(output.dir.name,"AnalysisResults_at_",temp2)
  
  dir.create(temp3)
  
  file.name=paste0(dir.name,dir(dir.name,recursive = TRUE,pattern=input.file.pattern))
  file.name.2<-as.list(file.name)
  
  names(file.name.2)=sapply(strsplit(file.name,split="\\/"),"[[",index.file)
  
  print(file.name.2)
  
  file.name.3<-file.name.2
  
  #sample.name<-sapply(strsplit(names(file.name.3),split="_peaks_"),"[[",1)
  
  #names(file.name.3)=sample.name
  
  file.name.4 <-file.name.3[-1]
  
  re.out<-lapply(file.name.4,function(u){
    re=toGRanges(u,format="BED")
    #colnames(re)=c("Count","GeneName")
    re
  })
  
  head(re.out[[1]])
  
  re.out.L<-lapply(re.out,function(u){
    re=length(u)
    #colnames(re)=c("Count","GeneName")
    re
  })
  
  if(genome=="Mm"){
    
    annoData <- toGRanges(EnsDb.Mmusculus.v75, feature="gene")
    
    ol <- findOverlapsOfPeaks(re.out[c(2,4,1)])
    
    overlaps<-ol$peaklist$`11_2470IUPUI_WT_BM_SMC1_peaks.bed///13_2470IUPUI_WT_BM_Rad21_peaks.bed///10_WT_BM_ASXL1_peaks.bed`
    
    binOverFeature(overlaps, annotationData=annoData,
                   radius=5000, nbins=20, FUN=length, errFun=0,
                   ylab="count",
                   main="Distribution of aggregated peak numbers around TSS")
    
    overlaps.trimmed<-trim(overlaps, use.names=TRUE)
    
    library(EnsDb.Mmusculus.v79)
    dd.GRCm39.mm10<-toGRanges(EnsDb.Mmusculus.v75)
    #seqinfo(dd.GRCm39.mm10)
    #seqlevels(dd.GRCm39.mm10)
    
    seqlevels(dd.GRCm39.mm10,force=TRUE) <- c("chr1","chr10","chr11","chr12","chr13",
                                              "chr14","chr15","chr16","chr17","chr18","chr19","chr2",
                                              "chr3","chr4","chr5","chr6","chr7","chr8","chr9","chrX","chrY")
    
    seqinfo(overlaps)<-seqinfo(dd.GRCm39.mm10)
    #GRCm38/mm10
    #dd<-toGRanges(EnsDb.Mmusculus.v79)
    #seqinfo(dd)
    #library(ensembldb)
    #library(GenomeInfoDb)
    seqlevelsStyle(overlaps.trimmed) <- seqlevelsStyle(dd.GRCm39.mm10)
    overlaps.anno<-annoPeaks(overlaps.trimmed,dd.GRCm39.mm10)
    write.table(overlaps.anno,file=paste0(temp3,"/","annotation.txt"),row.names = FALSE,quote=FALSE,sep="\t")
  }else if(genome=="Hs"){
    
    library(EnsDb.Hsapiens.v75)
    #annoData<-toGRanges(EnsDb.Hsapiens.v75, feature="gene")
    
    dd.hs<-toGRanges(EnsDb.Hsapiens.v75)
    
    print(seqinfo(dd.hs))
    print(seqlevels(dd.hs))
    
    #print(seqlevels(dd.hs)[,1])
    
    #print(seqlevels(re.out[[1]])[,1])
    
    # seqlevels(dd.hs,force=TRUE) <- c("chr1","chr10","chr11","chr12","chr13",
    #                                           "chr14","chr15","chr16","chr17","chr18","chr19","chr2",
    #                                           "chr3","chr4","chr5","chr6","chr7","chr8","chr9","chrX","chrY")
    
    #temp4=
    
    re.out.L<-lapply(1:length(re.out),function(u,re.out,dd.hs){
      
      x=re.out[[u]]
      x_name=names(re.out)[u]
      
      seqlevels(dd.hs,force=TRUE)<-seqinfo(x)@seqnames
      #print(seqinfo(re.out.trimmed))
      #print(seqlevels(re.out.trimmed))
      seqinfo(x)<-seqinfo(dd.hs)
      #GRCm38/mm10
      #dd<-toGRanges(EnsDb.Mmusculus.v79)
      #seqinfo(dd)
      #library(ensembldb)
      #library(GenomeInfoDb)
      seqlevelsStyle(x) <- seqlevelsStyle(dd.hs)
      re.out.trimmed<-trim(x, use.names=TRUE)
      overlaps.anno<-annoPeaks(re.out.trimmed,dd.hs)
      
      write.table(overlaps.anno,file=paste0(temp3,"/",x_name,"_annotation.txt"),row.names = FALSE,quote=FALSE,sep="\t")
    },re.out,dd.hs)
    
  }
  
}

ProcessUsingLOLA <- function() {
  #dbPath = system.file("extdata", "hg19", package="LOLA")
  
  dbPath = "/media/H_driver/2016/Yang/Results/data/groups/lab_bock/shared/resources/regions/LOLACore/mm10"
  
  regionDB = loadRegionDB(dbLocation=dbPath)
  
  #read triple overlap peaks
  regionSetA = readBed("/media/H_driver/2016/Yang/Results/WT_triple_overlap.bed")
  
  #read each peak
  regionSet.WT.ASXL1=readBed("/media/H_driver/2016/Yang/BedFromPeng/10_WT_BM_ASXL1_peaks_from_PeakCall4Yang.bed")
  regionSet.WT.SMC1=readBed("/media/H_driver/2016/Yang/BedFromPeng/11_2470IUPUI_WT_BM_SMC1_peaks_from_PeakCall4Yang.bed")
  regionSet.WT.Rad21=readBed("/media/H_driver/2016/Yang/BedFromPeng/13_2470IUPUI_WT_BM_Rad21_peaks_from_PeakCall4Yang.bed")
  
  
  db.841<-regionDB$regionGRL
  
  userSets = GRangesList(regionSet.WT.ASXL1,regionSet.WT.SMC1,regionSet.WT.Rad21)
  
  db.841.2<-c(db.841,userSets)
  
  userUniverse<-buildRestrictedUniverse(userSets)
  
  userUniverse.844<-buildRestrictedUniverse(db.841.2)
  
  re.SMC1A.RAD21.only<-makeVennDiagram(re.out[c(2,4)],NameOfPeaks=c("SMC1A", "RAD21"),totalTest=35000)
  
  ol.SMC1A.RAD21.only <- findOverlapsOfPeaks(re.out[c(2,4)])
  
  ol.SMC1A.RAD21.only.2<-ol.SMC1A.RAD21.only$peaklist$`11_2470IUPUI_WT_BM_SMC1_peaks.bed///13_2470IUPUI_WT_BM_Rad21_peaks.bed`
  
  checkUniverseAppropriateness(ol.SMC1A.RAD21.only.2,userUniverse.844, cores = 1,fast = FALSE)
  
  locResults2 = runLOLA(userSets,userUniverse,regionDB, cores=1)
  locResults = runLOLA(regionSetA,userUniverse,regionDB, cores=1)
  
  #SMC1A and RAD21 overlap
  
  
  
  locResults.SMC1A.RAD21 = runLOLA(ol.SMC1A.RAD21.only.2,userUniverse,regionDB, cores=4)
  
  head(locResults.SMC1A.RAD21)
  
  pValue<-as.data.frame(10^(-locResults.SMC1A.RAD21$pValueLog))
  
  temp<-as.data.frame(locResults.SMC1A.RAD21)
  
  locResults.SMC1A.RAD21.with.p<-cbind(temp[,1:3],pValue,temp[,4:23])
  
  colnames(locResults.SMC1A.RAD21.with.p)[4]="pValue"
  
  #It takes extensive computation resource, be carefully to run
  locResults.SMC1A.RAD21.based.large.universe = runLOLA(ol.SMC1A.RAD21.only.2,userUniverse.844,regionDB, cores=4)
  
  write.table(locResults.SMC1A.RAD21.with.p,
              file=paste0(out.dir.name,"SMC1A.RAD21.with.Other.Antibody.csv"),sep=",",
              quote = FALSE,row.names = FALSE,col.names = TRUE)
  
  write.table(locResults,file=paste0(out.dir.name,"Other.csv"),sep=",",quote = FALSE,row.names = FALSE,col.names = TRUE)
  
  listRegionSets(regionDB)
  extractEnrichmentOverlaps(locResults,regionSetA, regionDB)
  
  loadCaches("/media/H_driver/2016/Yang/Results/LOLACoreCaches_latest.tgz")
  
  save.image(file =paste0(out.dir.name,"re_search.RData"))
  
  # regionSetB = readBed("lola_vignette_data/setB_100.bed")
  # regionSetC = readBed("lola_vignette_data/setC_100.bed")
  # activeDHS = readBed("lola_vignette_data/activeDHS_universe.bed")
  # data("sample_universe", package="LOLA")
  #
  # dbPath = system.file("extdata", "hg19", package="LOLA")
  # regionDB = loadRegionDB(dbLocation=dbPath)
  # data("sample_universe", package="LOLA")
  # data("sample_input", package="LOLA")
  #
  # getRegionSet(regionDB, collections="ucsc_example", filenames="vistaEnhancers.bed")
  # getRegionSet(dbPath, collections="ucsc_example", filenames="vistaEnhancers.bed")
  #
  # res = runLOLA(userSets, userUniverse, regionDB, cores=1)
  # locResult = res[2,]
}

#' parser4diffbind
#' 
#' parser4diffbind
#'
#' @param input.sample.file
#' @param input.bed.dir
#' @param input.file.pattern
#'
#' @return
#' @export
#'
#' @examples
#'  
#' input.sample.file = "/Volumes/Bioinformatics$/2017/DannyNewData/SampleID_INFO_ChIP_new_Danny.csv"
#' input.bed.dir = "/Volumes/Bioinformatics$/2017/DannyNewData/PeakCall/"
#' input.file.pattern = "bed"
#' output.dir="BindDiff_4_7_2017-cutoff_10k_test"
#' output.dir="/Volumes/Bioinformatics$/2017/DannyNewData/testRun"
#' resmcf<-parser4diffbind(input.sample.file,input.bed.dir,input.file.pattern,output.dir)
#'
parser4diffbind<-function(input.sample.file,input.bed.dir,input.file.pattern,output.dir,
                          summit_peak=NULL){
  
  
  sample.info<-read.csv(input.sample.file,header = TRUE)
  sample.info.2 <- sample.info[,-dim(sample.info)[2]]
  
  out.dir.name = dirname(input.sample.file)
  
  re<-ParserReadFiles(input.bed.dir,input.file.pattern)
  
  re.bed<-re$input
  
  
  re.peaks.only.bed.2<-FL(re.bed,'peaks')
  
  if(!is.null(summit_peak)){
    re.summits.only.bed<-FL(re.bed,'summits')
  }
  
  #put peak files into database
  re.peaks.only.bed.3<-list_to_df(re.peaks.only.bed.2)
  
  
  print(re.peaks.only.bed.3)
  
  pos <- regexpr('R1', re.peaks.only.bed.3$ID)
  ID2 <- substr(re.peaks.only.bed.3$ID,1,pos+1)
  
  re.peaks.only.bed.3 <- cbind(ID2,re.peaks.only.bed.3)
  
  colnames(re.peaks.only.bed.3)=c("ID","ID2") 
  
  re.peaks.only.bed.4<-merge(sample.info.2,re.peaks.only.bed.3,by="ID")
  
  replicate<-apply(re.peaks.only.bed.4,1,function(x){
    ID=as.character(x[1])
    cell=as.character(x[2])
    TF=as.character(x[3])
    ID2=as.character(x[4])
    p1=nchar(ID)+2
    p2=regexpr(TF,ID2)
    p2=p2-2
    xx=substr(ID2,p1,p2)
    cell=substr(xx,1,nchar(xx)-2)
    rep=substr(xx,nchar(xx),nchar(xx))
    file=as.character(x[6])
    condition=paste(cell,TF,sep=":")
    z <- data.frame(t(c(ID2,cell,TF,condition,rep,file)))
    z
  })
  
  res <- do.call(rbind.data.frame,replicate)
  colnames(res) <- c("ID","Cell","Ab","Cell_Ab","Replicate","File")
  
  
  re.peaks.only.bed.5 <- res
  
  mcf<-NULL
  
  for(i in 1:dim(re.peaks.only.bed.5)[1]){
    
    #peaks.v=readBed(as.character(re.peaks.only.bed.5[i,6]))
    
    peaks.v <- readPeakFile(as.character(re.peaks.only.bed.5[i,6]), as = "GRanges")
    
    peak.caller.v="macs142"
    sampID.v=as.character(re.peaks.only.bed.5[i,1])
    
    sampID.v=substr(sampID.v,1,nchar(sampID.v)-5)
    
    tissue.v=as.character(re.peaks.only.bed.5[i,2])
    factor.v=as.character(re.peaks.only.bed.5[i,3])
    condition.v=as.character(re.peaks.only.bed.5[i,4])
    
    replicate.v=as.numeric(as.character(re.peaks.only.bed.5[i,5]))
    
    if(is.null(mcf)){
      mcf <- dba.peakset(NULL,peaks=peaks.v,peak.caller=peak.caller.v, sampID=sampID.v,tissue=tissue.v,
                         factor=factor.v,condition=condition.v,replicate=replicate.v)
    }else{
      mcf <- dba.peakset(mcf,peaks=peaks.v,peak.caller=peak.caller.v, sampID=sampID.v,tissue=tissue.v,
                         factor=factor.v,condition=condition.v,replicate=replicate.v)
    }
  }
  
  print(mcf)
  
  temp3=output.dir
  if(!dir.exists(temp3)){dir.create(temp3,recursive = TRUE)}
  
  save(mcf,file=file.path(temp3,"mcf.RData"))
  
  pdf(file.path(temp3,"corrmap.pdf"))
  dba.plotHeatmap(mcf,margin=23)
  dev.off()
  
  GetResultsFromDiffBind(mcf,"yes",temp3)
  
  return(mcf)
}

#' Title
#'
#' @param dir.name 
#'
#' @return
#' @export
#'
#' @examples
#' dir.name="/media/H_driver/2016/Yang/MACS/MACS/"
#' reformatPath(dir.name) 
reformatPath <- function(dir.name){
  
  CheckOPS=Sys.info()[['sysname']]
  
  if(CheckOPS=="Darwin"){
    
    if(length(grep('H_driver',dir.name))!=0){
      temp=unlist(strsplit(dir.name,split="\\/"))
      temp[2]="Volumes"
      temp[3]="Bioinformatics$"
      dir.name=paste0(paste0(temp,collapse = "/"),"/")
    }
  }
  
  return(dir.name)
}

#' useHOMER
#'
#' @return
#' @export
#'
#' @examples
#' res <- useHOMER("findMotifs.pl")
#' system(res)
#' 
useHOMER <- function(function.name){
  
  R_LIB=.libPaths()
  PATH1=Sys.getenv("PATH")
  
  homer_Lib=file.path(R_LIB,"ChipSeq/homer/bin")
  
  Sys.setenv(PATH=paste0(homer_Lib,":",PATH1)) 
  cmd1 <- Sys.which(function.name)[[1]]
  
  PATH2=Sys.getenv("PATH")
  
  cat(PATH1,"\n")
  cat(PATH2,"\n")
  
  return(cmd1)
}

# Filtering list based on certain pattern of the names of list elements

FL <- function(re.bed, patt) {
  
  re.peaks.only.bed<-lapply(1:length(re.bed),function(u,re.bed,patt){
    
    tmp=re.bed
    x=tmp[[u]]
    path_name=dirname(x)
    file_name=basename(x)
    
    #pos_summits=regexpr('summits',file_name)
    pos_peaks=regexpr(patt,file_name)
    
    if(pos_peaks>0){
      y<-x
    }else{
      y<-NULL  
    }
    
    y  
    
  },re.bed,patt)
  
  re.peaks.only.bed.2<-re.peaks.only.bed[lapply(re.peaks.only.bed,length) > 0]
  
  names(re.peaks.only.bed.2)=unlist(lapply(1:length(re.peaks.only.bed.2),function(u,re.peaks.only.bed.2){
    tmp=re.peaks.only.bed.2
    x=tmp[[u]]
    path_name=dirname(x)
    file_name=basename(x)
    file_name
  },re.peaks.only.bed.2))
  
  return(re.peaks.only.bed.2)
}

list_to_df <- function(list_for_df) {
  list_for_df <- as.list(list_for_df)
  
  nm <- names(list_for_df)
  
  if (is.null(nm)) 
    nm <- seq_along(list_for_df)
  
  ID<-sapply(1:length(nm),function(u,nm){
    x=nm[u]
    pos=regexpr("\\.",x)
    pos1=pos-1
    y<-substr(x,1,pos1)
    y
  },nm)
  
  df <- data.frame(ID=ID,name = nm, stringsAsFactors = FALSE)
  df$value <- unname(list_for_df)
  df
}

get_os <- function(){
  sysinf <- Sys.info()
  if (!is.null(sysinf)){
    os <- sysinf['sysname']
    if (os == 'Darwin')
      os <- "osx"
  } else { ## mystery machine
    os <- .Platform$OS.type
    if (grepl("^darwin", R.version$os))
      os <- "osx"
    if (grepl("linux-gnu", R.version$os))
      os <- "linux"
  }
  tolower(os)
}

#' plotBam2
#' 
#' @export
#' @example 
#' 
#' input.sample.file <- '/Volumes/Bioinformatics$/2017/DannyNewData/SampleID_INFO_ChIP_new_Danny.csv'
#' 
#' input.bam.file <- '/Users/axy148/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/BamDanny/Bam_files.txt'
#' 
#' output.dir <- '/Users/axy148/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/BamDanny/BamMerged'
#'  
#' re <- plotBam2(input.sample.file,input.bam.file,output.dir)
#' 
#' 
plotBam2 <- function(input.sample.file,input.bam.file,output.dir)
{
  
  if (!dir.exists(output.dir))
  {
    dir.create(output.dir, recursive = TRUE)
  }
  
  re <- GetSampleInfo(input.sample.file, input.bam.file)
  
  cellInfo <- re$re11
  
  cell.name <- unique(unlist(lapply(cellInfo$Type_Cell,function(u){substr(as.character(u),1,nchar(as.character(u))-2)})))
  
  cmd="samtools merge"
  
  cmd.3 <- lapply(cell.name,function(u,cellInfo,output.dir){
    
    cat(u,"\n")
    
    x <- cellInfo[which(substr(as.character(cellInfo$Type_Cell),1,nchar(as.character(cellInfo$Type_Cell))-2)==u),]
    x$Cell_TF <- gsub(" ","-",x$Cell_TF)
    
    tf <- as.character(x$Type_TF) 
    tf.unique <- unique(tf)
    
    y <- combn(1:length(tf),2,function(x)list(c(x[1],x[2])))
    
    yy <- lapply(tf.unique,function(u,tf,y){
      x <- which(tf %in% u)
      x
    },tf,y)
    
    yyy <- setdiff(y,yy)
    yyyy <- yyy[lapply(yyy, length) > 0]
    
    cmd.2 <- lapply(yyyy,function(u,x,output.dir){
      cmd.1 <- paste(cmd,file.path(output.dir,paste0(paste(x[c(u),]$Cell_TF,collapse ="-"),"_merged.bam")),paste(x[c(u),]$file.name,collapse = " "),collapse = " ")
    },x,output.dir)
    
    cmd.2
    
  },cellInfo,output.dir)
  
  cmd.4 <- unlist(cmd.3)
  
  mergeBamFilesUseJobArray(cmd.4)
  
}

mergeBamFilesUseJobArray <- function(cmd)
{
  
  index <- system('echo $LSB_JOBINDEX',intern = TRUE)
  total <- system('echo $LSB_JOBINDEX_END',intern = TRUE)
  
  cat(index,"\n\n")
  cat(total,"\n\n")
  
  u <- as.integer(index)
  
  system(cmd[u],intern = TRUE)
  cat(cmd,"\n\n")
  
}

#' R -e 'library(DoGs);library(ChipSeq);ChipSeq:::runPlotBam2("~/Danny/SampleID_INFO_ChIP_new_Danny.csv","~/Danny/Alignment/BWA/bam_files.txt","/scratch/projects/bbc/aiminy_project/DannyBamMerged")'

runPlotBam2 <-
  function(input.sample.file,
           input.bam.file,
           output.dir) {
    # This setting works
    Rfun1 <-
      'library(DoGs);library(ChipSeq);re <- ChipSeq:::plotBam2('
    
    input1 <- input.sample.file
    input2 <- input.bam.file
    output <- output.dir
    Rinput <-
      paste0('\\"',
             input1,
             '\\",',
             '\\"',
             input2,
             '\\",',
             '\\"',
             output,
             '\\"')
    Rfun2 <- ')'
    Rfun3 <- paste0(Rfun1, Rinput, Rfun2)
    
    n.job <- 12
    
    mergeBam <-
      DoGs:::createBsubJobArrayRfun(Rfun3, paste0("mergeBam[1-", n.job, "]"), NULL)
    
    system(mergeBam)
    
  }

#' bsub -P bbc -J "bamPlot" -o %J.bamPlot.log -e %J.bamPlot.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::plotMergedBamWgene("/scratch/projects/bbc/aiminy_project/DannyNewNgsPlot4Merged","*_sorted.bam","~/all_common_gene_unique.txt","NgsPlot4Merged")'
#'
plotMergedBamWgene <-
  function(input.bam.file.dir,
           file.pattern,
           input.gene.list,
           output.dir,
           ngs.para = c("hg19", 4000, 1, 1, "total"),
           add.input = NULL)
  {
    cellInfo <-
      list.files(
        input.bam.file.dir,
        pattern = file.pattern,
        all.files = TRUE,
        full.names = TRUE,
        recursive = TRUE,
        include.dirs = TRUE
      )
    
    
    if (!dir.exists(output.dir))
    {
      dir.create(output.dir, recursive = TRUE)
    }
    
    temp3 = output.dir
    
    cellInfo.run <- lapply(1:length(cellInfo), function(u, cellInfo,
                                                        temp3)
    {
      xx <- cellInfo[u]
      xxx <- file_path_sans_ext(basename(xx))
      cmd12 = paste(xx, input.gene.list, xxx, sep = "\t")
      cat(cmd12, file = file.path(temp3, paste0(xxx, "_config_cJunAndp27.txt")), sep = "\t")
    }, cellInfo, temp3)
    
    # dir.name=temp3 dir.name=reformatPath(dir.name)
    file.name <-
      list.files(
        temp3,
        pattern = "*_config_cJunAndp27.txt",
        all.files = TRUE,
        full.names = TRUE,
        recursive = TRUE,
        include.dirs = TRUE
      )
    
    file.name.2 <- as.list(file.name)
    
    zzz <- unlist(file.name.2)
    
    lapply(1:length(zzz), function(u, zzz)
    {
      dir.name = dirname(zzz[u][[1]])
      file_name = file_path_sans_ext(basename(zzz[u][[1]]))
      
      cmd = paste(
        "ngs.plot.r -G hg19 -R tss -C",
        zzz[u][[1]],
        "-O",
        file.path(dir.name,
                  paste0(file_name, ".tss")),
        "-T",
        file_name,
        "-L 4000 -RR 1 -CD 1 -GO total",
        sep = " "
      )
      
      cmd2 = cmd
      cat(cmd2, "\n")
      cat("\n")
      system(cmd2)
    }, zzz)
    
    return(re)
    
  }

#mcf.with.zhao <- ChipSeq:::addMoreBed2mcf("/Volumes/Bioinformatics$/2017/DannyNewData/SampleID_INFO_ChIP_new_Danny_zhao.csv","~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/BamDanny","/Volumes/Bioinformatics$/2017/DannyNewData/PeakCall","bed$",mcf=NULL,"/Volumes/Bioinformatics$/2017/DannyNewData/testRun5")

addMoreBed2mcf<-function(input.sample.file,input.bam.dir,input.bed.dir,input.file.pattern,mcf=NULL,output.dir){
  
  bam.file <- prepareBamFile(input.bam.dir)
  
  sample.info<-read.csv(input.sample.file,header = TRUE)
  sample.info.2 <- sample.info[,-dim(sample.info)[2]]
  
  #print(sample.info.2)
  
  #out.dir.name = dirname(input.sample.file)
  
  re<-ParserReadFiles(input.bed.dir,input.file.pattern)
  
  re.bed<-re$input
  
  #print(re.bed)
  
  re.peaks.only.bed.2<-Filter(function(x) !any(grepl("summits", x)), re.bed)
  
  # if(!is.null(summit_peak)){
  #   re.summits.only.bed<-FL(re.bed,'summits')
  # }
  
  #put peak files into database
  re.peaks.only.bed.3<-list_to_df(re.peaks.only.bed.2)
  
  print(re.peaks.only.bed.3)
  
  ll <- re.peaks.only.bed.3$ID
  
  ID2 <- unlist(lapply(ll,function(u) {
    
    pos <- regexpr('R1',u)
    if(pos>0){
      id <- substr(u,1,pos+1)
    }else{
      id <- u
    }
    id
  }))
  
  #   if(u>0){
  #   ID2 <- substr(ll,1,pos+1)
  #   }else{
  #     ID2 <- ll
  #   }
  
  # pos <- regexpr('R1', re.peaks.only.bed.3$ID)
  # 
  # print(pos)
  # 
  # ll <- re.peaks.only.bed.3$ID
  # 
  # ID2 <- lapply(pos,function(u,ll) {
  #  
  #   if(u>0){
  #   ID2 <- substr(ll,1,pos+1)
  #   }else{
  #     ID2 <- ll
  #   }
  #   
  # },ll)
  
  re.peaks.only.bed.3 <- cbind(ID2,re.peaks.only.bed.3)
  
  #print(re.peaks.only.bed.3)
  
  colnames(re.peaks.only.bed.3)=c("ID","ID2")
  #
  re.peaks.only.bed.4<-merge(sample.info.2,re.peaks.only.bed.3,by="ID")
  #
  
  cat("4\n")
  print(re.peaks.only.bed.4)
  cat("\n")
  
  re.peaks.only.bed.with.bam<-merge(bam.file,re.peaks.only.bed.4,by="ID")
  print(re.peaks.only.bed.with.bam)
  
  colnames(re.peaks.only.bed.with.bam)[6]="SampleID"
  
  replicate<-apply(re.peaks.only.bed.4,1,function(x){
    
    # pos <- regexpr('R1', re.peaks.only.bed.3$ID)
    
    ID=as.character(x[1])
    
    pos <- regexpr('R1',ID)
    if(pos>0){
      cell=as.character(x[2])
      TF=as.character(x[3])
      ID2=as.character(x[4])
      p1=nchar(ID)+2
      p2=regexpr(TF,ID2)
      p2=p2-2
      xx=substr(ID2,p1,p2)
      cell=substr(xx,1,nchar(xx)-2)
      rep=substr(xx,nchar(xx),nchar(xx))
      file=as.character(x[6])
      condition=paste(cell,TF,sep=":")
    }else
    {
      cell=as.character(x[2])
      TF=as.character(x[3])
      ID2=as.character(x[4])
      #p1=nchar(ID)+2
      #p2=regexpr(TF,ID2)
      #p2=p2-2
      #xx=substr(ID2,p1,p2)
      #cell=substr(xx,1,nchar(xx)-2)
      rep=1
      file=as.character(x[6])
      condition=paste(cell,TF,sep=":")
      
    }
    z <- data.frame(t(c(ID2,cell,TF,condition,rep,file)))
    z
  })
  
  res <- do.call(rbind.data.frame,replicate)
  colnames(res) <- c("ID","Cell","Ab","Cell_Ab","Replicate","File")
  
  colnames(res)[1] = "SampleID"
  
  sample.4.dba<-merge(res,re.peaks.only.bed.with.bam,by="SampleID")
  
  re.peaks.only.bed.5 <- res
  
  
  # replicate<-apply(re.peaks.only.bed.4,1,function(x){
  #   ID=as.character(x[1])
  #   cell=as.character(x[2])
  #   TF=as.character(x[3])
  #   ID2=as.character(x[4])
  #   p1=nchar(ID)+2
  #   p2=regexpr(TF,ID2)
  #   p2=p2-2
  #   xx=substr(ID2,p1,p2)
  #   cell=substr(xx,1,nchar(xx)-2)
  #   #rep=1
  #   file=as.character(x[6])
  #   condition=paste(cell,TF,sep=":")
  #   z <- data.frame(t(c(ID2,cell,TF,condition,rep,file)))
  #   z
  # })
  #
  # res <- do.call(rbind.data.frame,replicate)
  # colnames(res) <- c("ID","Cell","Ab","Cell_Ab","Replicate","File")
  #
  #
  # re.peaks.only.bed.5 <- res
  
  print(re.peaks.only.bed.5)
  
  mcf<-mcf
  
  ll <- list()
  
  for(i in 1:dim(re.peaks.only.bed.5)[1]){
    
    #peaks.v=readBed(as.character(re.peaks.only.bed.5[i,6]))
    
    peaks.v <- readPeakFile(as.character(re.peaks.only.bed.5[i,6]), as = "GRanges")
    peaks.v.sorted <- DiffBind:::pv.peaksort(as.data.frame(peaks.v))
    #peaks.v.sorted <- peaks.v
    ##print(peaks.v)
    ll[[i]] <- peaks.v.sorted
    peak.caller.v="macs142"
    sampID.v=as.character(re.peaks.only.bed.5[i,1])
    
    pos <- regexpr('R1',sampID.v)
    if(pos>0){
      sampID.v=substr(sampID.v,1,nchar(sampID.v)-5)
    }else{
      sampID.v = sampID.v
    }
    
    tissue.v=as.character(re.peaks.only.bed.5[i,2])
    factor.v=as.character(re.peaks.only.bed.5[i,3])
    condition.v=as.character(re.peaks.only.bed.5[i,4])
    
    replicate.v=as.numeric(as.character(re.peaks.only.bed.5[i,5]))
    
    if(is.null(mcf)){
      mcf <- dba.peakset(NULL,peaks=peaks.v.sorted,peak.caller=peak.caller.v, sampID=sampID.v,tissue=tissue.v,
                         factor=factor.v,condition=condition.v,replicate=replicate.v)
    }else{
      mcf <- dba.peakset(mcf,peaks=peaks.v.sorted,peak.caller=peak.caller.v, sampID=sampID.v,tissue=tissue.v,
                         factor=factor.v,condition=condition.v,replicate=replicate.v)
    }
  }
  
  print(mcf)
  
  temp3=output.dir
  if(!dir.exists(temp3)){dir.create(temp3,recursive = TRUE)}
  
  save(mcf,file=file.path(temp3,"mcf.RData"))
  
  pdf(file.path(temp3,"corrmap.pdf"))
  dba.plotHeatmap(mcf,margin=23)
  dev.off()
  
  # #GetResultsFromDiffBind(mcf,"yes",temp3)
  re <- list(mcf=mcf,ll = ll,re.peaks.only.bed.5 = re.peaks.only.bed.5,re.peaks.only.bed.with.bam = re.peaks.only.bed.with.bam, sample.4.dba =  sample.4.dba)
  return(re)
}

#ChipSeq:::GetResultsFromDiffBind2(temp,"/Volumes/Bioinformatics$/2017/DannyNewData/testRun2")

GetResultsFromDiffBind2<-function(mcf7,output.file.dir){
  
  temp<-mcf.with.zhao
  
  sampID.v<-colnames(temp$class) 
  sampID.v.2<-unlist(lapply(1:length(sampID.v),function(u,sampID.v){
    
    x=sampID.v[u]
    
    y=x
    y
  },sampID.v))
  
  
  colnames(temp$class)<-sampID.v.2
  temp$class[1,]<-sampID.v.2
  
  #temp<-dba(temp,mask =c(1,2,13,14,11,12,15,16))
  
  #Merge the replicates of each set 
  #if(Mergereplicates=="yes"){
  temp22<-dba.peakset(temp,consensus = -DBA_REPLICATE)
  #temp22<-dba(temp2,mask =c(9,16,17:23))
  print(temp22)
  ##need to set the flexiable number identified from data sets
  #    t <- temp2
  #    temp22<-dba(t,mask =which(t$class[which(rownames(t$class)=="Replicate"),]=="1-2"))
  #  }else{
  #    temp22<-temp
  #  }
  
  temp22<-dba(temp22,mask =17:28)
  print(temp22)
  
  po1 <- c(1,2,6,10)
  po2 <- c(1,2)
  
  temp3=file.path(output.file.dir,"Venn2")
  
  if(!dir.exists(temp3))
  {
    dir.create(temp3)
  }
  
  png(file.path(temp3,paste0(paste0(colnames(temp22$class)[po1],collapse = "-vs-"),".png")))
  dba.plotVenn(temp22,mask=po1,main=paste0(colnames(temp22$class)[po1],collapse = "-vs-"))
  dev.off()
  
  png(file.path(temp3,paste0(paste0(colnames(temp22$class)[po2],collapse = "-vs-"),".png")))
  dba.plotVenn(temp22,mask=po2,main=paste0(colnames(temp22$class)[po2],collapse = "-vs-"))
  dev.off()
  
  # Tissue1<-temp22$class[2,]
  # Tissue2<-unique(temp22$class[2,])
  # 
  # TF<-unique(temp22$class[3,])
  # TF.n<-length(TF)
  # 
  # temp3=file.path(output.file.dir,"Venn")
  # 
  # if(!dir.exists(temp3))
  # {
  #   dir.create(temp3)
  # }
  # 
  # for(i in 1:length(Tissue2)){
  #   po<-which(Tissue1 %in% Tissue2[i])
  #   
  #   print(po)
  #   
  #   if(length(po)==2)
  #   {
  #     png(file.path(temp3,paste0(paste0(colnames(temp22$class)[po],collapse = "-vs-"),".png")))
  #     dba.plotVenn(temp22,mask=po,main=Tissue2[i])
  #     dev.off()
  #     
  #   }else if(length(po)==4){
  #     po1<-po[c(1,3)]
  #     po2<-po[c(2,4)]
  #     
  #     png(file.path(temp3,paste0(paste0(colnames(temp22$class)[po1],collapse = "-vs-"),".png")))
  #     dba.plotVenn(temp22,mask=po1,main=Tissue2[i])
  #     dev.off()
  #     
  #     png(file.path(temp3,paste0(paste0(colnames(temp22$class)[po2],collapse = "-vs-"),".png")))
  #     dba.plotVenn(temp22,mask=po2,main=Tissue2[i])
  #     dev.off()
  #     
  #   }else
  #   {
  #     cat(paste0("For ",Tissue2[i],": Only one peak profile for ",TF.n," TFs\n"))
  #   }
  # }
  # 
  # p.common<-lapply(1:length(Tissue2),function(u,Tissue1,Tissue2,temp22){
  #   
  #   po<-which(Tissue1 %in% Tissue2[u])
  #   
  #   if(length(po)==2)
  #   {
  #     common.peaks<-dba.overlap(temp22,mask=po)
  #     y<-common.peaks$inAll
  #   }else if(length(po)==4){
  #     po1<-po[c(1,3)]
  #     po2<-po[c(2,4)]
  #     
  #     common.peaks.1<-dba.overlap(temp22,mask=po1)
  #     y1<-common.peaks.1$inAll
  #     
  #     common.peaks.2<-dba.overlap(temp22,mask=po2)
  #     y2<-common.peaks.2$inAll
  #     
  #     y<-list(y1=y1,y2=y2)
  #     
  #     names(y)[1]<-paste0(colnames(temp22$class)[po1],collapse = "-vs-")
  #     names(y)[2]<-paste0(colnames(temp22$class)[po2],collapse = "-vs-")
  #     
  #   }else{
  #     y<-NULL}
  #   y
  # },Tissue1,Tissue2,temp22)
  # 
  # names(p.common)<-Tissue2
  # 
  # p.common<-unlist(p.common,recursive = F)
  # 
  # p.common<-p.common[lapply(p.common,length) > 0]
  # 
  # if(!dir.exists(file.path(output.file.dir,"common_peaks_bed")))
  # {
  #   dir.create(file.path(output.file.dir,"common_peaks_bed"))
  #   dir.create(file.path(output.file.dir,"common_peaks_bed","ucsc"))
  #   dir.create(file.path(output.file.dir,"common_peaks_bed","igv"))
  # }
  # 
  # #output common peaks to bed files
  # 
  # lapply(1:length(p.common),function(u,p.common,output.file.dir){
  #   x=p.common[[u]]
  #   
  #   x_name=names(p.common)[u]
  #   
  #   df <- data.frame(seqnames=seqnames(x),
  #                    #starts=start(x)-1,
  #                    starts=start(x),
  #                    ends=end(x),
  #                    names=c(rep(".", length(x))),
  #                    scores=elementMetadata(x)[,1],
  #                    strands=strand(x))
  #   
  #   #assign strand
  #   df.str <- data.frame(seqnames=seqnames(x),
  #                        #starts=start(x)-1,
  #                        starts=start(x),
  #                        ends=end(x),
  #                        names=c(rep(".", length(x))),
  #                        scores=elementMetadata(x)[,1],
  #                        strands=c(rep(".", length(x))))
  #   
  #   df.str.1<-df.str[-grep("random",df.str$seqnames),]
  #   
  #   df.str.2<-df.str.1
  #   
  #   df.str.3<-df.str.2[-grep("chrUn",df.str.2$seqnames),]
  #   
  #   write.table(df,file=file.path(output.file.dir,"common_peaks_bed",paste0(x_name,"_cp_with_header.bed")),
  #               col.names=TRUE,row.names = FALSE,quote=FALSE,sep="\t")
  #   
  #   write.table(df,file=file.path(output.file.dir,"common_peaks_bed","ucsc",paste0(x_name,"_4_ucsc.bed")),
  #               col.names=FALSE,row.names = FALSE,quote=FALSE,sep="\t")
  #   
  #   write.table(df,file=file.path(output.file.dir,"common_peaks_bed",paste0(x_name,"_common_peaks.bed")),
  #               col.names=FALSE,row.names = FALSE,quote=FALSE,sep="\t")
  #   
  #   write.table(df,file=file.path(output.file.dir,"common_peaks_bed","igv",paste0(x_name,"_4_igv.bed")),
  #               col.names=FALSE,row.names = FALSE,quote=FALSE,sep="\t")
  #   
  # },p.common,output.file.dir)
  # 
  # AnntationUsingChipSeeker(file.path(output.file.dir,"common_peaks_bed","igv"),"bed",file.path(output.file.dir,"common_peaks_bed")
  #                          ,txdb="hg19",DD=5000,distanceToTSS_cutoff=10000)
  # 
  # return(p.common)
}

useChIPpeakAnno <- function(mcf.with.zhao) {
  #findOverlapsOfPeaks(mcf.with.zhao$ll[c(17,11,18)])
  overlap <- findOverlapsOfPeaks(mcf.with.zhao$ll[17][[1]],mcf.with.zhao$ll[11][[1]],mcf.with.zhao$ll[18][[1]])
  
  bed.li<- overlap$overlappingPeaks[2][[1]][,2:4]
  write.table(bed.li,file="cJun_Li.bed",sep="\t",row.names = FALSE,
              col.names = FALSE,quote=FALSE)
  
  bed.zhao <- overlap$overlappingPeaks[2][[1]][,8:10]
  write.table(bed.zhao,file="cJun_zhao.bed",sep="\t",row.names = FALSE,
              col.names = FALSE,quote=FALSE)
  
  s1 <- makeVennDiagram(mcf.with.zhao$ll[c(17,11,18)], NameOfPeaks=c("cJun_Li","cJun_1833","cJun_zhao"),
                        totalTest=20000,scaled=F, euler.d=F,fill = c("red","green","blue"),
                        alpha = 0.50,
                        label.col = c(rep("black",7)),
                        cex = 2,
                        fontfamily = "serif",
                        fontface = "bold",
                        cat.col = c("red","green","blue"),connectedPeaks = "keepAll")
  
  s2 <- makeVennDiagram(mcf.with.zhao$ll[c(17,3,18)], NameOfPeaks=c("cJun_Li","cJun_231","cJun_zhao"),
                        totalTest=20000,scaled=F, euler.d=F,fill = c("red","green","blue"),
                        alpha = 0.50,
                        label.col = c(rep("black",7)),
                        cex = 2,
                        fontfamily = "serif",
                        fontface = "bold",
                        cat.col = c("red","green","blue"),connectedPeaks = "keepAll")
}

# SampleID,Tissue,Factor,Condition,Treatment,Replicate,bamReads,ControlID,bamControl,Peaks,PeakCaller
# BT4741,BT474,ER,Resistant,Full-Media,1,reads/Chr18_BT474_ER_1.bam,BT474c,reads/Chr18_BT474_input.bam,peaks/BT474_ER_1.bed.gz,bed
# BT4742,BT474,ER,Resistant,Full-Media,2,reads/Chr18_BT474_ER_2.bam,BT474c,reads/Chr18_BT474_input.bam,peaks/BT474_ER_2.bed.gz,bed
# MCF71,MCF7,ER,Responsive,Full-Media,1,reads/Chr18_MCF7_ER_1.bam,MCF7c,reads/Chr18_MCF7_input.bam,peaks/MCF7_ER_1.bed.gz,bed
# MCF72,MCF7,ER,Responsive,Full-Media,2,reads/Chr18_MCF7_ER_2.bam,MCF7c,reads/Chr18_MCF7_input.bam,peaks/MCF7_ER_2.bed.gz,bed
# MCF73,MCF7,ER,Responsive,Full-Media,3,reads/Chr18_MCF7_ER_3.bam,MCF7c,reads/Chr18_MCF7_input.bam,peaks/MCF7_ER_3.bed.gz,bed
# T47D1,T47D,ER,Responsive,Full-Media,1,reads/Chr18_T47D_ER_1.bam,T47Dc,reads/Chr18_T47D_input.bam,peaks/T47D_ER_1.bed.gz,bed
# T47D2,T47D,ER,Responsive,Full-Media,2,reads/Chr18_T47D_ER_2.bam,T47Dc,reads/Chr18_T47D_input.bam,peaks/T47D_ER_2.bed.gz,bed
# MCF7r1,MCF7,ER,Resistant,Full-Media,1,reads/Chr18_TAMR_ER_1.bam,TAMRc,reads/Chr18_TAMR_input.bam,peaks/TAMR_ER_1.bed.gz,bed
# MCF7r2,MCF7,ER,Resistant,Full-Media,2,reads/Chr18_TAMR_ER_2.bam,TAMRc,reads/Chr18_TAMR_input.bam,peaks/TAMR_ER_2.bed.gz,bed
# ZR751,ZR75,ER,Responsive,Full-Media,1,reads/Chr18_ZR75_ER_1.bam,ZR75c,reads/Chr18_ZR75_input.bam,peaks/ZR75_ER_1.bed.gz,bed
# ZR752,ZR75,ER,Responsive,Full-Media,2,reads/Chr18_ZR75_ER_2.bam,ZR75c,reads/Chr18_ZR75_input.bam,peaks/ZR75_ER_2.bed.gz,bed

#prepareBamFile("~/Dropbox (BBSR)/BWA")

prepareBamFile <- function(input.bam.dir,file.pattern){
  
  bam <- list.files(path = input.bam.dir, pattern=file.pattern, all.files = TRUE,
                    full.names = TRUE, recursive = TRUE,
                    ignore.case = FALSE, include.dirs = TRUE, no.. = FALSE)
  bam2 <- as.data.frame(cbind(basename(bam),bam))
  
  ID <- unlist(lapply(bam2[,1],function(u)
  {
    y <- u
    pos <- regexpr("\\.", y)
    pos <- pos - 1
    y <- substr(y, 1, pos)
    y
  }))
  
  bam3 <- cbind(ID,bam2)
  bam3
  
}

# tamoxifen <- prepareSampleSheet(mcf.with.zhao,"~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/BamDanny")

prepareSampleSheet <- function(mcf.with.zhao,output.file.dir){
  
  sampleSheet <- mcf.with.zhao$sample.4.dba[,c(1,2,3,5,9,13)]
  sampleSheet2 <- cbind(sampleSheet[,1:3],rep("Danny-Treatment",dim(sampleSheet)[1]),rep("Danny-condition",dim(sampleSheet)[1]),sampleSheet[,4:6],rep("bed",dim(sampleSheet)[1]))
  colnames(sampleSheet2)=c("SampleID","Tissue","Factor","Condition","Treatment","Replicate","bamReads","Peaks","PeakCaller")
  fwrite(sampleSheet2, file ="sampleSheet2.csv")
  tamoxifen <- dba(sampleSheet="sampleSheet2.csv")
  tamoxifen
  tamoxifen <- dba.count(tamoxifen, summits=250)
  
  tamoxifen <- dba.contrast(tamoxifen, group1 = c(1,2),group2=c(3,4),name1="cJun_231_DD", name2="cJun_231")
  tamoxifen <- dba.contrast(tamoxifen, group1 = c(1,2),group2=c(11,12),name1="cJun_231_DD", name2="cJun_1833")
  tamoxifen <- dba.contrast(tamoxifen, group1 = c(3,4),group2=c(11,12),name1="cJun_231", name2="cJun_1833")
  tamoxifen <- dba.contrast(tamoxifen, group1 = c(13,14),group2=c(5,6),name1="p27_231_DD", name2="p27_231")
  tamoxifen <- dba.contrast(tamoxifen, group1 = c(13,14),group2=c(15,16),name1="p27_231_DD", name2="p27_1833")
  tamoxifen <- dba.contrast(tamoxifen, group1 = c(5,6),group2=c(15,16),name1="p27_231", name2="p27_1833")
  tamoxifen <- dba.analyze(tamoxifen)
  
  save(tamoxifen,file=file.path(output.file.dir,"Danny_Dba.RData"))
  tamoxifen

  # dba.report(tamoxifen,contrast = 1)
  # dba.report(tamoxifen,contrast = 2)
  # dba.report(tamoxifen,contrast = 3)
  # dba.report(tamoxifen,contrast = 4)
  # dba.report(tamoxifen,contrast = 5)
  # dba.report(tamoxifen,contrast = 6)
  
}

#generateBedFromDba(tamoxifen,"~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/Bba2Bed")

generateBedFromDba <- function(tamoxifen,output.file.dir){
  
  if(!dir.exists(output.file.dir)){dir.create(output.file.dir,recursive = TRUE)}
  
  n <- length(tamoxifen$contrasts)
  
  lapply(1:n,function(u,tamoxifen){
    
    temp <- as.data.frame(dba.report(tamoxifen,contrast = u,th=1))
    
    file.name <- paste(colnames(as.data.frame(dba.report(tamoxifen,contrast = u,th=1)))[c(7,8)],collapse = "-")
    
    write.table(temp, row.names=F, col.names = F, quote =F,sep="\t", file=file.path(output.file.dir,paste0(file.name,".bed")))
    
  },tamoxifen)
  
}

#' dir.name="~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/Bba2Bed"
#' input.file.pattern=".bed"
#' out.dir.name="~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/AnnotationNew4DBA"
#' txdb="hg19"
#' DD=5000
#' 
#' AnntationUsingChipSeeker2(dir.name,input.file.pattern,out.dir.name,txdb=txdb,DD,distanceToTSS_cutoff=5000, AP=c("Promoter","Intron"))
#'
#' res.promoter <- AnntationUsingChipSeeker2(dir.name,input.file.pattern,out.dir.name,txdb=txdb,DD,distanceToTSS_cutoff=5000,AP=c("Promoter"))
#' 
#' AnntationUsingChipSeeker2(dir.name,input.file.pattern,out.dir.name,txdb=txdb,DD,distanceToTSS_cutoff=5000,AP=c("Intron"))
#' 
 
AnntationUsingChipSeeker2 <- function(dir.name,input.file.pattern,out.dir.name,txdb=c("hg19","hg38"),DD,distanceToTSS_cutoff=5000,assignGenomicAnnotation=TRUE,AP=c("Promoter", "5UTR", "3UTR", "Exon", "Intron","Downstream", "Intergenic")) {
  
  re<-ParserReadFiles(dir.name,input.file.pattern)
  
  re.bed<-re$input
  
  re.peaks.only.bed.2 <- re.bed
  
  # if(length(dir(dir.name,pattern="peaks.bed"))!=0)
  # {
  # re.peaks.only.bed.2<-FL(re.bed,'peaks')
  # cat("peaks\n")
  # print(re.peaks.only.bed.2)
  # }
  # 
  # if(length(dir(dir.name,pattern="summits.bed"))!=0){
  # re.summits.only.bed<-FL(re.bed,'summits')
  # cat("summits\n")
  # print(re.summits.only.bed)
  # }
  
  txdb<-match.arg(txdb)
  
  switch (txdb,
          hg38 = {
            cat("Use hg38\n")
            txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
          },
          {
            cat("Use hg19\n") 
            txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
          }
  )
  
  APpath <- paste(AP,collapse = "_")
  
  temp3=file.path(out.dir.name,"Annotation",APpath)
  
  if(!dir.exists(temp3)){dir.create(temp3,recursive = TRUE)}
  
  d=DD
  
  peaks.anno.list <- lapply(1:length(re.peaks.only.bed.2),function(u,re.peaks.only.bed.2,d){
    
    peaks=readPeakFile(re.peaks.only.bed.2[[u]],as="data.frame")
    
    print(head(peaks))
    
    peakAnno <- annotatePeak(re.peaks.only.bed.2[[u]], tssRegion=c(-d, d),
                             TxDb=txdb,assignGenomicAnnotation=assignGenomicAnnotation,genomicAnnotationPriority=AP,annoDb="org.Hs.eg.db")
    
    #select.index <- which(peakAnno$distanceToTSS<20,000 && peakAnno$distanceToTSS >= -20,000)
    dropAnnoM <- function (csAnno, distanceToTSS_cutoff) 
    {
      idx <- which(abs(mcols(csAnno@anno)[["distanceToTSS"]]) < 
                     distanceToTSS_cutoff)
      csAnno@anno <- csAnno@anno[idx]
      csAnno@peakNum <- length(idx)
      if (csAnno@hasGenomicAnnotation) {
        csAnno@annoStat <- ChIPseeker:::getGenomicAnnoStat(csAnno@anno)
        csAnno@detailGenomicAnnotation = csAnno@detailGenomicAnnotation[idx, 
                                                                        ]
      }
      csAnno
    }
    
    peakAnno <- dropAnnoM(peakAnno,distanceToTSS_cutoff = distanceToTSS_cutoff)
    
    x_name=names(re.peaks.only.bed.2)[u]
    cat(x_name,"\n")
    png(file.path(temp3,paste0(x_name,"_",d,APpath,"_around_tss_annotation_pie.png")))
    plotAnnoPie(peakAnno)
    dev.off()
    
    peaks.anno=as.data.frame(peakAnno)
    
    print(head(peaks.anno))
    
    #print(paste0(peaks[,c(2,3)]))
    group1 <- strsplit(tools::file_path_sans_ext(x_name),"-")[[1]][1] 
    group2 <- strsplit(tools::file_path_sans_ext(x_name),"-")[[1]][2] 
    
    colnames(peaks.anno)[5:13]=c("starnd","width_DiffDind_based","strand","Conc", group1,group2,"Fold","p.value","FDR")
    
    print(colnames(peaks.anno))
    write.table(peaks.anno,file=file.path(temp3,paste0(x_name,"_",d,APpath,"_around_tss_annotation_4_only_mapped_peaks.xls")),
                row.names = FALSE,quote=FALSE,sep="\t")
    
    # unmapped.peaks<-peaks[-which(paste0(peaks[,2],"_",peaks[,3]) %in% paste0(peaks.anno[,2],"_",peaks.anno[,3])),]
    # 
    # cat(dim(peaks)[1]," ",dim(peaks.anno)[1]," ",dim(unmapped.peaks)[1],"\n")
    # 
    # 
    # if(dim(unmapped.peaks)[1]!=0){
    #   
    #   colnames(unmapped.peaks)=colnames(peaks.anno)[1:6]
    #   
    #   unmapped.peaks.3<-smartbind(peaks.anno,unmapped.peaks)
    #   
    #   unmapped.peaks.4<-unmapped.peaks.3[order(unmapped.peaks.3[,1],unmapped.peaks.3[,2]),]
    #   
    #   write.table(unmapped.peaks.4,file=file.path(temp3,paste0(x_name,"_",d,APpath,"_around_tss_annotation_4_all_peaks.xls")),row.names = FALSE,quote=FALSE,sep="\t")
    # }
    # 
    
    peaks.anno
    
  },re.peaks.only.bed.2,d)
  
}

#' out.dir.name="~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/CombineChipSeqWithRNASeq"
#' rna.seq.input.file="/Volumes/Bioinformatics$/2017/DannyNewData/AnnotationNew4DBA/table_S3_headerless.xlsx"
#' 
#' integrateChipSeqWithRNASeq(out.dir.name,rna.seq.input.file) 
#' 
integrateChipSeqWithRNASeq <- function(output.file.dir,rna.seq.input.file) {
 
  if(!dir.exists(output.file.dir)){dir.create(output.file.dir,recursive = TRUE)}
  
  gene.231dd.231.1833.1833shp27 <- read.xlsx(rna.seq.input.file,sheetIndex = 1,header = FALSE)
  
  colnames(gene.231dd.231.1833.1833shp27)=c("SYMBOL","FC_231DD_231","p_231DD_231","FC_1833_231","p_1833_231","FC_1833shp27_1833","p_1833shp27_1833")
  
  data.231DD.231 <- gene.231dd.231.1833.1833shp27[,c(1,2,3)]
  data.1833.231 <- gene.231dd.231.1833.1833shp27[,c(1,4,5)]
  data.1833shp27.1833 <- gene.231dd.231.1833.1833shp27[c(1,6,7)]
  
  cJun.231DD.231.ChipSeq.RNASeq <- merge(res.promoter[[2]],data.231DD.231,by="SYMBOL")
  p27.231DD.231.ChipSeq.RNASeq <- merge(res.promoter[[5]],data.231DD.231,by="SYMBOL")
  
  cJun.231.1833.ChipSeq.RNASeq <- merge(res.promoter[[3]],data.1833.231,by="SYMBOL")
  p27.231.1833.ChipSeq.RNASeq  <- merge(res.promoter[[6]],data.1833.231,by="SYMBOL")
  
  write.table(cJun.231DD.231.ChipSeq.RNASeq,file=file.path(output.file.dir,"cJun_231DD_231_DBA_DGE.xls"),
              row.names = FALSE,quote=FALSE,sep="\t")
  
  write.table(p27.231DD.231.ChipSeq.RNASeq,file=file.path(output.file.dir,"p27_231DD_231_DBA_DGE.xls"),
              row.names = FALSE,quote=FALSE,sep="\t")
  
  write.table(cJun.231.1833.ChipSeq.RNASeq,file=file.path(output.file.dir,"cJun_231_1833_DBA_DGE.xls"),
              row.names = FALSE,quote=FALSE,sep="\t")
  
  write.table(p27.231.1833.ChipSeq.RNASeq,file=file.path(output.file.dir,"p27_231_1833_DBA_DGE.xls"),
              row.names = FALSE,quote=FALSE,sep="\t")
}

addTestFunction4HeatmapChipSeq <- function(GenomicRanges, rtracklayer, IRanges, dataDirectory, chipseq, VennDiagram) {
  library(GenomicRanges)
  library(rtracklayer)
  library(IRanges)
  
  input = import.bed(file.path(dataDirectory, 'ES_input_filtered_ucsc_chr6.bed'),
                     asRangedData=FALSE)
  
  rep1 = import.bed("/Volumes/Bioinformatics$/2017/DannyNewData/PeakCall/2017-03-02-01_S5_R1-MDA-MB-231-DD-1-cJun_hs_1.00e-05_macs142_peaks.bed",genome = "hg19")
                    
  rep2 = import.bed("/Volumes/Bioinformatics$/2017/DannyNewData/PeakCall/2017-03-02-02_S2_R1-MDA-MB-231-DD-2-cJun_hs_1.00e-05_macs142_peaks.bed",genome = "hg19")
  
  library(chipseq)
  estimate.mean.fraglen(rep1)
  
  prepareChIPseq = function(reads){
  
  frag.len = median( estimate.mean.fraglen(reads) )
  cat( paste0( 'Median fragment size for this library is ', round(frag.len)))
  reads.extended = resize(reads, width = frag.len)
  return( trim(reads.extended) )
  }
  
  rep1 = prepareChIPseq(rep1)
  
  ovlp = findOverlaps(rep1,rep2)
  ovlp = as.data.frame(ovlp)
  
  ov = min( length(unique(ovlp$queryHits)), length(unique(ovlp$subjectHits)) )
  
  
  library(VennDiagram)
  draw.pairwise.venn(
    area1=length(rep1),
    area2=length(rep2),
    cross.area=ov,
    category=c("rep1", "rep2"),
    fill=c("steelblue", "blue3"),
    cat.cex=0.7)
}


#input.region.bed.dir = "~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/heatmap"
#plotHeatMapUsedeepTools("~/BamCompare","/projects/ctsi/bbc/aimin","hg19_gene.bed",)
#
plotHeatMapUsedeepTools <- function(input.sample.file,input.bw.file.dir,input.region.bed.dir,select.region.bed,output.file.dir){
  
  bw.file.sample.label <- mapBw3Sample(input.sample.file,input.bw.file.dir)
  
  if(!dir.exists(output.file.dir)){dir.create(output.file.dir,recursive = TRUE)}
#/projects/ctsi/bbc/aimin/annotation/hg19_gene.bed
  
   bed.file.list <- list.files(
        input.region.bed.dir,
        pattern = "*.bed$",
        all.files = TRUE,
        full.names = TRUE,
        recursive = TRUE,
        include.dirs = TRUE)
  
  bw.file.list <- as.character(bw.file.sample.label$file.bw)
  samplesLabel <- as.character(bw.file.sample.label$sampleLabel)
  
  print(bed.file.list)
  
if(select.region.bed!=""){
input.beds = bed.file.list[-grep(paste(select.region.bed,collapse = "|"), bed.file.list)] 
input.beds = paste(input.beds,collapse = " ")
}else{
input.beds = paste(bed.file.list,collapse = " ")
}

  print(input.beds)
  
cmd = "computeMatrix reference-point --referencePoint TSS -b 4000 -a 4000 -R"
input.beds=input.beds
input.bw.file=paste(bw.file.list,collapse = " ")
input.samplesLabel=paste(samplesLabel,collapse = " ")
outFileNameMatrix=file.path(output.file.dir,"matrix1_cJun_p27_TSS.gz")
outFileSortedRegions=file.path(output.file.dir,"regions_cJun_p27_genes.bed")
outHeatMapFile=file.path(output.file.dir,"heatmap_cJun_p27.png")

cmd1=paste(cmd,input.beds,"-S",input.bw.file,"--skipZero","-o",outFileNameMatrix,"--outFileSortedRegions",outFileSortedRegions,"--samplesLabel",input.samplesLabel,collapse = " ")

cmd2=paste("plotHeatmap -m",outFileNameMatrix,"-out",outHeatMapFile,collapse = " ")

# 
# testFiles/genes.bed -S testFiles/log2ratio_H3K4Me3_chr19.bw --skipZeros 
# -o matrix1_H3K4me3_l2r_TSS.gz
# --outFileSortedRegions regions1_H3K4me3_l2r_genes.bed
# 
# deepTools2.0/bin/computeMatrix scale-regions \
#   -R genes_chr19_firstHalf.bed genes_chr19_secondHalf.bed \ # separate multiple files with spaces
#   -S testFiles/log2ratio_*.bw  \ or use the wild card approach
#   -b 3000 -a 3000 \
#   --regionBodyLength 5000 \
#   --skipZeros -o matrix2_multipleBW_l2r_twoGroups_scaled.gz \
#   --outFileNameMatrix matrix2_multipleBW_l2r_twoGroups_scaled.tab \
#   --outFileSortedRegions regions2_multipleBW_l2r_twoGroups_genes.bed
# 
# computeMatrix reference-point \ # choose the mode
#        --referencePoint TSS \ # alternatives: TES, center
#        -b 3000 -a 10000 \ # define the region you are interested in
#        -R testFiles/genes.bed \
#        -S testFiles/log2ratio_H3K4Me3_chr19.bw  \
#        --skipZeros \
#        -o matrix1_H3K4me3_l2r_TSS.gz \ # to be used with plotHeatmap and plotProfile
#        --outFileSortedRegions regions1_H3K4me3_l2r_genes.bed
#cmd.java.2="export LD_LIBRARY_PATH=/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre/lib/amd64/server:$LD_LIBRARY_PATH"
#cmd1=paste(cmd.java.2,cmd1,sep=";")
system(cmd1)

#print(cmd2)

}


#R -e 'library(PathwaySplice);library(ChipSeq);ChipSeq:::submitJob4plotHeatMapUsedeepTools("~/BamCompare","/projects/ctsi/bbc/aimin/annotation","hg19_gene.bed","/scratch/projects/bbc/aiminy_project/Danny_ChipSeq_heatmap")'

submitJob4plotHeatMapUsedeepTools <- function(input.bw.file.dir,input.region.bed.dir,select.region.bed,output.file.dir){
  
  #Sys.setenv(JAVA_HOME='/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre/lib/amd64/server')
  
  if (!dir.exists(output.file.dir))
  {
    dir.create(output.file.dir, recursive = TRUE)
  }
  
  job.name <- "HeapMapPlot"
  
  Rfun1 <- 'library(ChipSeq);re <- ChipSeq:::plotHeatMapUsedeepTools('
  
  Rinput <- paste0('\\"',input.bw.file.dir,'\\",',
                   '\\"',input.region.bed.dir,'\\",',
                   '\\"',select.region.bed,'\\",',
                   '\\"',output.file.dir,'\\"')
  Rfun2 <- ')'
  
  Rfun <-paste0(Rfun1,Rinput,Rfun2)
  
  #cmd.java.1="module load java/1.8.0_60"
  #cmd.java.1="R CMD javareconf -e"
 # cmd.java.2="export LD_LIBRARY_PATH=/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre/lib/amd64/server:$LD_LIBRARY_PATH"
  
  #cmd.java ='export JAVA_HOME="/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre"'
  
  cmd.gff <- PathwaySplice:::createBsubJobArrayRfun(Rfun,job.name,wait.job.name=NULL)
  
  #cmd2=paste(cmd.java.2,cmd.gff,sep=";")
  
  system(cmd.gff)

}

# Welcome to Rattler, *please* read these important system notes:
#   
#   --> Rattler is currently running the SLURM resource manager to
# schedule all compute resources. Example SLURM job scripts are
# available on the system at /cm/shared/docs/slurm/current/job_scripts
# 
# To run an interactive shell, issue:
#   srun -p hipri -t 0:30:00 -n 36 -N 1 --pty /bin/bash -l
# 
# To submit a batch job, issue:       sbatch job_script
# To show all queued jobs, issue:     squeue
# To kill a queued job, issue:        scancel <jobId>
#   
#   See "man slurm" for more detailed information.
# 
# --> Rattler has 3 main queues with the following timelimits:
#   * hipri    (2 days)
# * iq       (1 day)
# * requestq (infinite) - Requires a valid slurm reservation to use
# 
# --> To see which software packages are available issue: "module avail"
# 
# Use the following commands to adjust your environment:
#   'module add <module>'     - adds a module to your environment for this session
# 'module initadd <module>' - configure module to be loaded at every login
# 

#plotHeatMapUsedeepTools("/scratch/projects/bbc/aiminy_project/Danny_ChipSeq_heatmap/matrix1_cJun_p27_TSS.gz","/scratch/projects/bbc/aiminy_project/Danny_ChipSeq_heatmap")
#
usePlotHeatmap <- function(input.matrix.file,output.file.dir){
  
  if(!dir.exists(output.file.dir)){dir.create(output.file.dir,recursive = TRUE)}
  #/projects/ctsi/bbc/aimin/annotation/hg19_gene.bed
  
  outHeatMapFile=file.path(output.file.dir,"heatmap_cJun_p27.png")
  
  cmd2=paste("plotHeatmap -m",input.matrix.file,"-out",outHeatMapFile,collapse = " ")
  
  system(cmd2)
  
}

#R -e 'library(PathwaySplice);library(ChipSeq);ChipSeq:::submitJob4usePlotHeatmap("/scratch/projects/bbc/aiminy_project/Danny_ChipSeq_heatmap/matrix1_cJun_p27_TSS.gz","/scratch/projects/bbc/aiminy_project/Danny_ChipSeq_heatmap")'

submitJob4usePlotHeatmap <- function(input.matrix.file,output.file.dir){
  
  if (!dir.exists(output.file.dir))
  {
    dir.create(output.file.dir, recursive = TRUE)
  }
  
  job.name <- "plotHeatmap"
  
  Rfun1 <- 'library(ChipSeq);re <- ChipSeq:::usePlotHeatmap('
  
  Rinput <- paste0('\\"',input.matrix.file,'\\",',
                   '\\"',output.file.dir,'\\"')
  Rfun2 <- ')'
  
  Rfun <-paste0(Rfun1,Rinput,Rfun2)
  
  cmd.gff <- PathwaySplice:::createBsubJobArrayRfun(Rfun,job.name,wait.job.name=NULL)
  
  system(cmd.gff)
}

# input.bam.file.dir="/Volumes/Bioinformatics$/2017/DannyNewData/BindDiff/common_peaks_bed/Annotation"
# out.dir.name = "~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/heatmap"
# generateBed4HeatMap(input.bam.file.dir,out.dir.name)
#
generateBed4HeatMap <- function(input.bam.file.dir,out.dir.name) {
  
  if (!dir.exists(out.dir.name))
  {
    dir.create(out.dir.name, recursive = TRUE)
  }
  
  file.list <- list.files(
    input.bam.file.dir,
    pattern = "*xls$",
    all.files = TRUE,
    full.names = TRUE,
    recursive = TRUE,
    include.dirs = TRUE)
  
  file.table  <- lapply(file.list,function(u){
    y <- basename(u)
    pos <- regexpr("\\_", y)
    pos <- pos - 1
    y <- substr(y, 1, pos)
    x <-fread(u)
    x<- cbind(x,rep(y,dim(x)[1]))
    colnames(x)[dim(x)[2]]="sample_name"
    x
    })
  
  # library(dplyr)
  # x <- data_frame(i = c("a","b","c"), j = 1:3)
  # y <- data_frame(i = c("b","c","d"), k = 4:6)
  # z <- data_frame(i = c("c","d","a"), l = 7:9)
  
  common.gene.peak <- file.table %>%
    Reduce(function(dtf1,dtf2) inner_join(dtf1,dtf2,by="SYMBOL"),.)
  
  x <- data.table::subset(common.gene.peak,select=c("geneChr.x","geneStart.x","geneEnd.x","geneStrand.x","SYMBOL"))
  
  data.table::setkey(x,NULL)
  xx <- unique(x)
  xxx<-xx[complete.cases(xx),]
  
  # gene.231dd.231.1833.1833shp27 <- read.xlsx(
  # "/Volumes/Bioinformatics$/2017/DannyNewData/AnnotationNew4DBA/table_S3_headerless.xlsx",
  # sheetIndex = 1,header = FALSE)
  # colnames(gene.231dd.231.1833.1833shp27)=c("SYMBOL","FC_231DD_231","p_231DD_231","FC_1833_231","p_1833_231","FC_1833shp27_1833","p_1833shp27_1833")
  #save(gene.231dd.231.1833.1833shp27,file="./data/gene.RData")
  gene.45 <- merge(gene.231dd.231.1833.1833shp27,xxx,by="SYMBOL",sort = FALSE)
  
  gene.45.1 <- gene.45[,c(8,9,10,11,1)]
  colnames(gene.45.1)=c("chr","start","end","strand","SYMBOL")
  
  gene.45.1[which(gene.45.1$strand==1),]$strand="+"
  gene.45.1[which(gene.45.1$strand==2),]$strand="-"
  
  
  gene.45.1.sorted.by.chr <- gene.45.1[order(gene.45.1$chr),]
  
  gene.45.1$chr<-paste0("chr",gene.45.1$chr)
  gene.45.1.sorted.by.chr$chr<-paste0("chr",gene.45.1.sorted.by.chr$chr)
  
  write.table(gene.45.1,file=file.path(out.dir.name,"gene_200.bed"),row.names = FALSE,col.names = FALSE,quote=FALSE,sep="\t")
  write.table(gene.45.1.sorted.by.chr,file=file.path(out.dir.name,"gene_200_sorted.bed"),row.names = FALSE,col.names = FALSE,quote=FALSE,sep="\t")
}


#R -e 'library(PathwaySplice);library(ChipSeq);ChipSeq:::bashJob4plotHeatMapUsedeepTools("~/SampleID_INFO_ChIP_new_Danny.csv","~/BamCompare","~/ChipSeqBed",select.region.bed=NULL,"/scratch/projects/bbc/aiminy_project/Danny_ChipSeq_heatmapAddlabel")'

#R -e 'library(PathwaySplice);library(ChipSeq);ChipSeq:::bashJob4plotHeatMapUsedeepTools("~/SampleID_INFO_ChIP_new_Danny.csv","~/BamCoverage","~/ChipSeqBed_p27_cJun",select.region.bed=NULL,"/scratch/projects/bbc/aiminy_project/Danny_ChipSeq_heatmap4p27_cJun")'


#R -e 'library(PathwaySplice);library(ChipSeq);ChipSeq:::bashJob4plotHeatMapUsedeepTools("/nethome/axy148/R/lib64/R/library/ChipSeq/extdata/zhao_data.csv","~/BamCompareZhao","~/cJun_gene_bed",select.region.bed=NULL,"/scratch/projects/bbc/aiminy_project/zhao_ChipSeq_heatmap4cJun")'

bashJob4plotHeatMapUsedeepTools <- function(input.sample.file,input.bw.file.dir,input.region.bed.dir,select.region.bed,output.file.dir){
  
  #Sys.setenv(JAVA_HOME='/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre/lib/amd64/server')
  
  if (!dir.exists(output.file.dir))
  {
    dir.create(output.file.dir, recursive = TRUE)
  }
  
  job.name <- "generateMatrix"
  
  Rfun1 <- 'library(ChipSeq);re <- ChipSeq:::plotHeatMapUsedeepTools('
  
  
  Rinput <- paste0('\\"',input.sample.file,'\\",',
                     '\\"',input.bw.file.dir,'\\",',
                    '\\"',input.region.bed.dir,'\\",',
                   '\\"',select.region.bed,'\\",',
                   '\\"',output.file.dir,'\\"')

  Rfun2 <- ')'
  
  Rfun <-paste0(Rfun1,Rinput,Rfun2)
  
  #cmd.java.1="module load java/1.8.0_60"
  #cmd.java.1="R CMD javareconf -e"
  # cmd.java.2="export LD_LIBRARY_PATH=/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre/lib/amd64/server:$LD_LIBRARY_PATH"
  
  #cmd.java ='export JAVA_HOME="/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre"'
  
  cmd.gff <- PathwaySplice:::createBsubJobArrayRfun(Rfun,job.name,wait.job.name=NULL)
  
  #cmd2=paste(cmd.java.2,cmd.gff,sep=";")
  
  system(cmd.gff)
  
  job.name <- "plotHeatmap"
  
  Rfun1 <- 'library(ChipSeq);re <- ChipSeq:::usePlotHeatmap('
  input.matrix.file <- file.path(output.file.dir,"matrix1_cJun_p27_TSS.gz")
  
  Rinput <- paste0('\\"',input.matrix.file,'\\",',
                   '\\"',output.file.dir,'\\"')
  Rfun2 <- ')'
  
  Rfun <-paste0(Rfun1,Rinput,Rfun2)
  
  cmd.gff <- PathwaySplice:::createBsubJobArrayRfun(Rfun,job.name,wait.job.name="generateMatrix")
  
  system(cmd.gff)
  
}

#input.bw.dir="~/BamCompare"
#input.sample.file="~/SampleID_INFO_ChIP_new_Danny.csv"
#
#mapBw3Sample(input.sample.file,input.bw.dir)
#
mapBw3Sample <- function(input.sample.file,input.bw.dir) {
  
  file.1 <- list.files(input.bw.dir,pattern=".bw$",all.files = TRUE,full.names = TRUE,recursive = TRUE,include.dirs = TRUE)
  
  sample.file <- fread(input.sample.file)
  
  file.2<-cbind(unlist(lapply(file.1,function(u){x<-basename(u);p1 <- regexpr("\\_", x);p2 <- regexpr("\\.", x);xx <- substr(x,p1+1,p2-1)})),file.1)
  
  colnames(file.2)=c("ID","file.bw")
  file.3 <- merge(file.2,sample.file,by="ID",sort=F)
  sampleLabel= paste(gsub(" ", "-", file.3$Type_Cell), file.3$Type_TF, sep = "-")
  sampleLabel=gsub("MDA-MB-","",sampleLabel)
  file.4 <- cbind(file.3,sampleLabel)
  file.5 <- file.4[,c(2,6)]
  file.5
}

makeHeatMapByme <- function() {
  compute.matrix.file="/Users/axy148/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/heatmap/Danny_ChipSeq_heatmapBasedSortedBed/matrix1_cJun_p27_TSS.gz"
  
  table = read.table(gzfile(compute.matrix.file), skip=1)
  matrix = table[7:dim(table)[2]]
  image(1:dim(matrix)[2], 1:dim(matrix)[1], t(as.matrix(matrix)),  axes=FALSE, xlab='sample', ylab='gene')
  axis(2, at=1:length(table$V1), labels=table$V4, las = 1)
}

# input.bam.file.dir="/Volumes/Bioinformatics$/2017/DannyNewData/BindDiff/common_peaks_bed/Annotation"
# out.dir.name = "~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/heatmap"
# gene.full <- generateBed4HeatMap2(input.bam.file.dir,out.dir.name)
#
# input.bam.file.dir="~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/heatmap/Peak2Bed/p27"
# out.dir.name = "~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/heatmap/Peak2Bed"
# gene.full <- generateBed4HeatMap2(input.bam.file.dir,out.dir.name,"p27")
#
# input.bam.file.dir="~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/heatmap/Peak2Bed/cJun"
# out.dir.name = "~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/heatmap/Peak2Bed"
# gene.full <- generateBed4HeatMap2(input.bam.file.dir,out.dir.name,"cJun")
#

generateBed4HeatMap2 <- function(input.bam.file.dir,out.dir.name,Ab) {
  
  out.dir.name <-file.path(out.dir.name,paste0(Ab,"_gene_bed"))
  
  if (!dir.exists(out.dir.name))
  {
    dir.create(out.dir.name, recursive = TRUE)
  }
  
  file.list <- list.files(
    input.bam.file.dir,
    pattern = "*xls$",
    all.files = TRUE,
    full.names = TRUE,
    recursive = TRUE,
    include.dirs = TRUE)
  
  file.table  <- lapply(file.list,function(u){
    y <- basename(u)
    pos <- regexpr("\\_", y)
    pos <- pos - 1
    y <- substr(y, 1, pos)
    x <-fread(u)
    x<- cbind(x,rep(y,dim(x)[1]))
    colnames(x)[dim(x)[2]]="sample_name"
    x
  })
  
  # library(dplyr)
  # x <- data_frame(i = c("a","b","c"), j = 1:3)
  # y <- data_frame(i = c("b","c","d"), k = 4:6)
  # z <- data_frame(i = c("c","d","a"), l = 7:9)
  
  common.gene.peak <- file.table %>%
    Reduce(function(dtf1,dtf2) full_join(dtf1,dtf2,by="SYMBOL"),.)
  
  x <- subset(common.gene.peak,select=c("geneChr.x","geneStart.x","geneEnd.x","geneStrand.x","SYMBOL"))
  
  setkey(x,NULL)
  xx <- unique(x)
  xxx<-xx[complete.cases(xx),]
  
  # gene.231dd.231.1833.1833shp27 <- read.xlsx(
  # "/Volumes/Bioinformatics$/2017/DannyNewData/AnnotationNew4DBA/table_S3_headerless.xlsx",
  # sheetIndex = 1,header = FALSE)
  # colnames(gene.231dd.231.1833.1833shp27)=c("SYMBOL","FC_231DD_231","p_231DD_231","FC_1833_231","p_1833_231","FC_1833shp27_1833","p_1833shp27_1833")
  #save(gene.231dd.231.1833.1833shp27,file="./data/gene.RData")
  #gene.45 <- merge(gene.231dd.231.1833.1833shp27,xxx,by="SYMBOL",sort = FALSE)
  
  gene.45 <- xxx
  gene.45.1 <- gene.45
  colnames(gene.45.1)=c("chr","start","end","strand","SYMBOL")

  gene.45.1[which(gene.45.1$strand==1),]$strand="+"
  gene.45.1[which(gene.45.1$strand==2),]$strand="-"


  gene.45.1.sorted.by.chr <- gene.45.1[order(gene.45.1$chr),]

  gene.45.1$chr<-paste0("chr",gene.45.1$chr)
  gene.45.1.sorted.by.chr$chr<-paste0("chr",gene.45.1.sorted.by.chr$chr)

  write.table(gene.45.1,file=file.path(out.dir.name,paste0("gene_",Ab,".bed")),row.names = FALSE,col.names = FALSE,quote=FALSE,sep="\t")
  write.table(gene.45.1.sorted.by.chr,file=file.path(out.dir.name,paste0("gene_",Ab,"_sorted.bed")),row.names = FALSE,col.names = FALSE,quote=FALSE,sep="\t")
  
  # > length(unique(file.table[[1]]$SYMBOL))
  # [1] 417
  # > length(unique(file.table[[2]]$SYMBOL))
  # [1] 222
  # > length(unique(file.table[[3]]$SYMBOL))
  # [1] 458
  # 
  # 
  # unique(c(unique(file.table[[1]]$SYMBOL),unique(file.table[[2]]$SYMBOL),unique(file.table[[3]]$SYMBOL)))
  gene.45.1.sorted.by.chr
}



# /deepTools-1.5/bin/bamCompare --bamfile1 ChIP.bam --bamfile2 Input.bam \
# --binSize 25 --fragmentLength 200 --missingDataAsZero no \
# --ratio log2 --scaleFactorsMethod SES -o log2ratio_ChIP_vs_Input.bw

#' bsub -P bbc -J "BamCoverage" -o %J.BamCoverage.log -e %J.BamCoverage.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::useBamCoverage("/scratch/projects/bbc/aiminy_project/DannyNewData2/SampleID_INFO_ChIP_new_Danny.csv","/nethome/axy148/_visualization/sorted_bam_files_2.txt","~/BamCoverage")'

useBamCoverage <- function(input.sample.file,input.bam.file,output.dir)
{
  
  re <- GetSampleInfo(input.sample.file, input.bam.file)
  
  cellInfo <- re$y
  
  # output.dir.name = dirname(input.sample.file)
  
  if (!dir.exists(output.dir))
  {
    dir.create(output.dir, recursive = TRUE)
  }
  
  temp3 = output.dir
  
  # cmd9 = 'ngs.plot.r -G' cmd10 = '-R' cmd11 = '-C' cmd12 = '-O' cmd13 = '-T'
  # cmd14 = '-L' cmd15 = '-RR' cmd16 = '-CD' cmd17= '-GO'
  
  
  
  cellInfo.run <- lapply(1:length(cellInfo), function(u,cellInfo, 
                                                      temp3)
  {
    
    x.name = cellInfo[[u]]$name
    
    es <- cellInfo[[u]]$es
    
    x.input <- es[es$Type_TF == "Input", ]$file.name
    
    x.sample <- es[es$Type_TF != "Input", ]
    
    #print(x.sample)
    #print(x.input)
    
    x.run <- apply(x.sample, 1, function(x, x.input, temp3)
    {
      
      y <- x
      
      ID <- y[1]
      Type_Cell <- y[2]
      Type_TF <- y[3]
      Cell_TF <- y[4]
      file.name <- y[5]
      xx <- file.name
      xx.name = paste(ID, gsub(" ", "-", Type_Cell), Type_TF, sep = "-")
      
      # ~/python/Python-2.7.11/python  ~/NGS_tools/deepTools/bin/bamCompare  --bamfile1 /scratch/projects/bbc/aiminy_project/DannyNewNgsPlot/2017-03-02-03_S11_R1.marked_sorted.bam --bamfile2 /scratch/projects/bbc/aiminy_project/DannyNewNgsPlot/2017-03-02-17_S1_R1.marked_sorted.bam --binSize 25 --ratio log2 -o ~/BamCompare/log2ratio_2017-03-02-03_S11_R1.marked_sorted.bam_vs_2017-03-02-17_S1_R1.marked_sorted.bam.bw 
      
      #bamCoverage -b reads.bam -o coverage.bw
      
      #bamCoverage --bam a.bam -o a.SeqDepthNorm.bw \
      #--binSize 10
      #--normalizeTo1x 2150570000
      #--ignoreForNormalization chrX
      #--extendReads
      
      cmd1 <- paste("~/python/Python-2.7.11/python  ~/NGS_tools/deepTools/bin/bamCoverage --bam",xx,sep=" ")
      cmd2 <- "--binSize 25 --normalizeTo1x 2451960000 --ignoreForNormalization chrX"
      cmd3 <- "-o"
      
      cmd4 <- paste(cmd1,cmd2,cmd3,sep=" ") 
      
      cmd5 <- file.path(output.dir,paste0("To1x_",basename(as.character(xx)),"_vs_","no_input",".bw"))
      
      
      cmd6 <- paste(cmd4,cmd5,sep=" ") 
      
      cmd6
      
      #cat(cmd6, "\n")
      #cat("\n")
    }, x.input, temp3)
    
    x.run
    
  }, cellInfo,temp3)
  
  
  names(cellInfo.run) = unlist(lapply(cellInfo, function(u)
  {
    u$name
  }))
  
  zzz <- unlist(cellInfo.run)
  
  lapply(1:length(zzz), function(u, zzz)
  {
    
    cat(as.character(zzz[u][[1]]), "\n")
    cat("\n")
    
    system(as.character(zzz[u][[1]]))
    
  }, zzz)
  # # dir.name=temp3 dir.name=reformatPath(dir.name)
  # 
  # file.name = file.path(temp3, dir(temp3, recursive = TRUE))
  # 
  # file.name.2 <- as.list(file.name)
  # 
  # 
  # # names(file.name.2) = unlist(lapply(file.name.2, function(u) { u$name }))
  # 
  # zzz <- unlist(file.name.2)
  # 
  # lapply(1:length(zzz), function(u, zzz)
  # {
  #   
  #   dir.name = dirname(zzz[u][[1]])
  #   file_name = file_path_sans_ext(basename(zzz[u][[1]]))
  #   
  #   cmd = paste("ngs.plot.r -G hg19 -R tss -C", zzz[u][[1]], "-O", file.path(dir.name, 
  #                                                                            paste0(file_name, ".tss")), "-T", file_name, "-L 4000 -RR 1 -CD 1 -GO total", 
  #               sep = " ")
  #   
  #   
  #   
  #   # system(as.character(zzz[u][[1]])) job.name = paste0('bamPlot.', u)
  #   # cmd.pegasus = usePegasus(job.option, Wall.time = '72:00',cores = 32,Memory
  #   # = 16000,span.ptile = 16,job.name) cmd2 = paste(cmd.pegasus,cmd,sep = ' ')
  #   
  #   cmd2 = cmd
  #   cat(cmd2, "\n")
  #   cat("\n")
  #   system(cmd2)
  # }, zzz)
  # 
  # 
  # re <- list(cellInforun = cellInfo.run, zzz = zzz)
  # 
  # # AnntationUsingChipSeeker(temp3, 'peaks.bed', temp3, DD = 5000)
  
  #return(re)
  
}

#' output.file.dir="~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/heatmap/Peak2Bed"
#' #mcf7=mcf2
#' re<-getTargetGene4Ab(mcf2,"yes","p27",output.file.dir)
#' re<-getTargetGene4Ab(mcf2,"yes","cJun",output.file.dir)
#' 
getTargetGene4Ab<-function(mcf7,Mergereplicates=c("yes","no"),Ab,output.file.dir){
  
  output.file.dir = file.path(output.file.dir,Ab)
  if (!dir.exists(output.file.dir))
  {
    dir.create(output.file.dir,recursive = TRUE)
  }
  
  temp<-mcf7
  
  sampID.v<-colnames(temp$class) 
  sampID.v.2<-unlist(lapply(1:length(sampID.v),function(u,sampID.v){
    
    x=sampID.v[u]
    
    y=x
    y
  },sampID.v))
  
  
  colnames(temp$class)<-sampID.v.2
  temp$class[1,]<-sampID.v.2
  
  #temp<-dba(temp,mask =c(1,2,13,14,11,12,15,16))
  
  #Merge the replicates of each set 
  if(Mergereplicates=="yes"){
    temp2<-dba.peakset(temp,consensus = -DBA_REPLICATE)
    #temp22<-dba(temp2,mask =c(9,16,17:23))
    print(temp2)
    ##need to set the flexiable number identified from data sets
    t <- temp2
    temp22<-dba(t,mask =which(t$class[which(rownames(t$class)=="Replicate"),]=="1-2"))
  }else{
    temp22<-temp
  }
  
  print(temp22)
  #temp22 <- re
  
   Tissue1<-temp22$class[2,]
   Tissue2<-unique(temp22$class[2,])
   
   TF<-unique(temp22$class[3,])
   TF.n<-length(TF)
    
   for(i in 1:length(Tissue2)){
     
     #for(j in 1:length(TF)){
     p <- which(temp22$class[2,]==Tissue2[i]&temp22$class[3,]==Ab)
     cat(Tissue2[i]," ",Ab," ",p,"\n")
     if(length(p)!=0){
     #as.data.frame(dba.peakset(temp22,peaks=1,bRetrieve=TRUE))
     dba.peakset(temp22,peaks=as.integer(p),bRetrieve=TRUE, writeFile = file.path(output.file.dir,paste0(Tissue2[i],"_",Ab,".bed")))
    }
     
   }
   
   AnntationUsingChipSeeker3(output.file.dir,".bed",output.file.dir
                            ,txdb="hg19",DD=5000)
   
#   AnntationUsingChipSeeker2
  # temp3=file.path(output.file.dir,"Venn")
  # 
  # if(!dir.exists(temp3))
  # {
  #   dir.create(temp3)
  # }
  # 
  # for(i in 1:length(Tissue2)){
  #   po<-which(Tissue1 %in% Tissue2[i])
  #   
  #   print(po)
  #   
  #   if(length(po)==2)
  #   {
  #     png(file.path(temp3,paste0(paste0(colnames(temp22$class)[po],collapse = "-vs-"),".png")))
  #     dba.plotVenn(temp22,mask=po,main=Tissue2[i])
  #     dev.off()
  #     
  #   }else if(length(po)==4){
  #     po1<-po[c(1,3)]
  #     po2<-po[c(2,4)]
  #     
  #     png(file.path(temp3,paste0(paste0(colnames(temp22$class)[po1],collapse = "-vs-"),".png")))
  #     dba.plotVenn(temp22,mask=po1,main=Tissue2[i])
  #     dev.off()
  #     
  #     png(file.path(temp3,paste0(paste0(colnames(temp22$class)[po2],collapse = "-vs-"),".png")))
  #     dba.plotVenn(temp22,mask=po2,main=Tissue2[i])
  #     dev.off()
  #     
  #   }else
  #   {
  #     cat(paste0("For ",Tissue2[i],": Only one peak profile for ",TF.n," TFs\n"))
  #   }
  # }
  # 
  # p.common<-lapply(1:length(Tissue2),function(u,Tissue1,Tissue2,temp22){
  #   
  #   po<-which(Tissue1 %in% Tissue2[u])
  #   
  #   if(length(po)==2)
  #   {
  #     common.peaks<-dba.overlap(temp22,mask=po)
  #     y<-common.peaks$inAll
  #   }else if(length(po)==4){
  #     po1<-po[c(1,3)]
  #     po2<-po[c(2,4)]
  #     
  #     common.peaks.1<-dba.overlap(temp22,mask=po1)
  #     y1<-common.peaks.1$inAll
  #     
  #     common.peaks.2<-dba.overlap(temp22,mask=po2)
  #     y2<-common.peaks.2$inAll
  #     
  #     y<-list(y1=y1,y2=y2)
  #     
  #     names(y)[1]<-paste0(colnames(temp22$class)[po1],collapse = "-vs-")
  #     names(y)[2]<-paste0(colnames(temp22$class)[po2],collapse = "-vs-")
  #     
  #   }else{
  #     y<-NULL}
  #   y
  # },Tissue1,Tissue2,temp22)
  # 
  # names(p.common)<-Tissue2
  # 
  # p.common<-unlist(p.common,recursive = F)
  # 
  # p.common<-p.common[lapply(p.common,length) > 0]
  # 
  # if(!dir.exists(file.path(output.file.dir,"common_peaks_bed")))
  # {
  #   dir.create(file.path(output.file.dir,"common_peaks_bed"))
  #   dir.create(file.path(output.file.dir,"common_peaks_bed","ucsc"))
  #   dir.create(file.path(output.file.dir,"common_peaks_bed","igv"))
  # }
  # 
  # #output common peaks to bed files
  # 
  # lapply(1:length(p.common),function(u,p.common,output.file.dir){
  #   x=p.common[[u]]
  #   
  #   x_name=names(p.common)[u]
  #   
  #   df <- data.frame(seqnames=seqnames(x),
  #                    #starts=start(x)-1,
  #                    starts=start(x),
  #                    ends=end(x),
  #                    names=c(rep(".", length(x))),
  #                    scores=elementMetadata(x)[,1],
  #                    strands=strand(x))
  #   
  #   #assign strand
  #   df.str <- data.frame(seqnames=seqnames(x),
  #                        #starts=start(x)-1,
  #                        starts=start(x),
  #                        ends=end(x),
  #                        names=c(rep(".", length(x))),
  #                        scores=elementMetadata(x)[,1],
  #                        strands=c(rep(".", length(x))))
  #   
  #   df.str.1<-df.str[-grep("random",df.str$seqnames),]
  #   
  #   df.str.2<-df.str.1
  #   
  #   df.str.3<-df.str.2[-grep("chrUn",df.str.2$seqnames),]
  #   
  #   write.table(df,file=file.path(output.file.dir,"common_peaks_bed",paste0(x_name,"_cp_with_header.bed")),
  #               col.names=TRUE,row.names = FALSE,quote=FALSE,sep="\t")
  #   
  #   write.table(df,file=file.path(output.file.dir,"common_peaks_bed","ucsc",paste0(x_name,"_4_ucsc.bed")),
  #               col.names=FALSE,row.names = FALSE,quote=FALSE,sep="\t")
  #   
  #   write.table(df,file=file.path(output.file.dir,"common_peaks_bed",paste0(x_name,"_common_peaks.bed")),
  #               col.names=FALSE,row.names = FALSE,quote=FALSE,sep="\t")
  #   
  #   write.table(df,file=file.path(output.file.dir,"common_peaks_bed","igv",paste0(x_name,"_4_igv.bed")),
  #               col.names=FALSE,row.names = FALSE,quote=FALSE,sep="\t")
  #   
  # },p.common,output.file.dir)
  # 
  # AnntationUsingChipSeeker(file.path(output.file.dir,"common_peaks_bed","igv"),"bed",file.path(output.file.dir,"common_peaks_bed")
  #                          ,txdb="hg19",DD=5000,distanceToTSS_cutoff=10000)
  # 
  # return(p.common)
}

#bsub -P bbc -J "RunSppR" -o %J.RunSppR.log -e %J.RunSppR.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::useRunSppR("~/SampleID_INFO_ChIP_new_Danny.csv","~/sort_bam","/scratch/projects/bbc/aiminy_project/Danny_ChipSeq_data_QC")'
#
#R/lib64/R/library/ChipSeq/extdata/zhao_data.csv 
#
#bsub -P bbc -J "RunSppR" -o %J.RunSppR.log -e %J.RunSppR.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::useRunSppR("R/lib64/R/library/ChipSeq/extdata/zhao_data.csv","~/sort_bam","/scratch/projects/bbc/aiminy_project/zhao_ChipSeq_data_QC")'
#
#bsub -P bbc -J "RunSppR" -o %J.RunSppR.log -e %J.RunSppR.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::useRunSppR("~/SampleID_INFO_ChIP_new_Danny.csv","/projects/scratch/bbc/Project/Danny_chip2/BigWig","/scratch/projects/bbc/aiminy_project/Danny_ChipSeq_data_QC_2")'
#
#
#
useRunSppR <- function(input.sample.file,input.bam.file.dir,output.file.dir){
  
  #bam.file.sample.label <- mapBam2Sample2(input.sample.file,input.bam.file.dir)
  
  bam.file.sample.label <- mapBam2Sample3(input.sample.file,input.bam.file.dir)
  
  if(!dir.exists(output.file.dir)){dir.create(output.file.dir,recursive = TRUE)}
  #/projects/ctsi/bbc/aimin/annotation/hg19_gene.bed
  
  bam.file.list <- as.character(bam.file.sample.label$file.bam)
  samplesLabel <- as.character(bam.file.sample.label$sampleLabel)
  
  lapply(1:length(bam.file.list),function(u,bam.file.list,samplesLabel,output.file.dir){
    
    cmd = paste("Rscript ~/phantompeakqualtools/run_spp.R",
                paste0("-c=",bam.file.list[u]),
                paste0("-savp=",file.path(output.file.dir,paste0(samplesLabel[u],"_QC.pdf"))),
                paste0("-out=",file.path(output.file.dir,paste0(samplesLabel[u],"_QC_metrix.txt"))),sep=" ")
    
    system(cmd)
    
  },bam.file.list,samplesLabel,output.file.dir)

}

#R -e 'library(PathwaySplice);library(ChipSeq);ChipSeq:::submitJob4plotHeatMapUsedeepTools("~/BamCompare","/projects/ctsi/bbc/aimin/annotation","hg19_gene.bed","/scratch/projects/bbc/aiminy_project/Danny_ChipSeq_heatmap")'

submitJob4useRunSppR <- function(input.sample.file,input.bam.file.dir,output.file.dir){
  
  #Sys.setenv(JAVA_HOME='/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre/lib/amd64/server')
  
  if (!dir.exists(output.file.dir))
  {
    dir.create(output.file.dir, recursive = TRUE)
  }
  
  job.name <- "QC"
  
  Rfun1 <- 'library(ChipSeq);re <- ChipSeq:::useRunSppR('
  
  Rinput <- paste0('\\"',input.sample.file,'\\",',
                   '\\"',input.bam.file.dir,'\\",',
                   '\\"',output.file.dir,'\\"')
  Rfun2 <- ')'
  
  Rfun <-paste0(Rfun1,Rinput,Rfun2)
  
  cmd.gff <- DoGs:::createBsubJobArrayRfun(Rfun,job.name,wait.job.name=NULL)
  
  system(cmd.gff)
  
}

#input.bw.dir="~/BamCompare"
#input.sample.file="~/SampleID_INFO_ChIP_new_Danny.csv"
#
#mapBw3Sample(input.sample.file,input.bw.dir)
#
mapBam2Sample <- function(input.sample.file,input.bam.dir) {
  
  file.1 <- list.files(input.bam.dir,pattern=".bam$",all.files = TRUE,full.names = TRUE,recursive = TRUE,include.dirs = TRUE)
  
  sample.file <- fread(input.sample.file)
  
  file.2<-cbind(unlist(lapply(file.1,function(u){x<-basename(u);p1 <- regexpr("\\_", x);p2 <- regexpr("\\.", x);xx <- substr(x,p1+1,p2-1)})),file.1)
  
  colnames(file.2)=c("ID","file.bam")
  file.3 <- merge(file.2,sample.file,by="ID",sort=F)
  sampleLabel= paste(gsub(" ", "-", file.3$Type_Cell), file.3$Type_TF, sep = "-")
  sampleLabel=gsub("MDA-MB-","",sampleLabel)
  file.4 <- cbind(file.3,sampleLabel)
  file.5 <- file.4[,c(2,6)]
  file.5
}

mapBam2Sample2 <- function(input.sample.file,input.bam.dir) {
  
  file.1 <- list.files(input.bam.dir,pattern=".bam$",all.files = TRUE,full.names = TRUE,recursive = TRUE,include.dirs = TRUE)
  
  sample.file <- fread(input.sample.file)
  
  file.2<-cbind(unlist(lapply(file.1,function(u){x<-basename(u);p2 <- regexpr("\\.", x);xx <- substr(x,1,p2-1)})),file.1)
  
  colnames(file.2)=c("ID","file.bam")
  file.3 <- merge(file.2,sample.file,by="ID",sort=F)
  sampleLabel= paste(gsub(" ", "-", file.3$Type_Cell), file.3$Type_TF, sep = "-")
  sampleLabel=gsub("MDA-MB-","",sampleLabel)
  file.4 <- cbind(file.3,sampleLabel)
  file.5 <- file.4[,c(2,6)]
  file.5
}

mapBam2Sample3 <- function(input.sample.file,input.bam.dir) {
  
  file.1 <- list.files(input.bam.dir,pattern=".bam$",all.files = TRUE,full.names = TRUE,recursive = TRUE,include.dirs = TRUE)
  
  sample.file <- fread(input.sample.file)
  
  file.2<-cbind(unlist(lapply(file.1,function(u){x<-basename(u);xx <- tools::file_path_sans_ext(x)})),file.1)
  
  colnames(file.2)=c("ID","file.bam")
  file.3 <- merge(file.2,sample.file,by="ID",sort=F)
  sampleLabel= paste(gsub(" ", "-", file.3$Type_Cell), file.3$Type_TF, sep = "-")
  sampleLabel=gsub("MDA-MB-","",sampleLabel)
  file.4 <- cbind(file.3,sampleLabel)
  file.5 <- file.4[,c(2,6)]
  file.5
}

#' dir.name="~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/Bba2Bed"
#' input.file.pattern=".bed"
#' out.dir.name="~/Dropbox (BBSR)/BBSR Team Folder/Aimin_Yan/ChipSeq/AnnotationNew4DBA"
#' txdb="hg19"
#' DD=5000
#' 
#' AnntationUsingChipSeeker3(dir.name,input.file.pattern,out.dir.name,txdb=txdb,DD,distanceToTSS_cutoff=5000, AP=c("Promoter","Intron"))
#'
#' res.promoter <- AnntationUsingChipSeeker3(dir.name,input.file.pattern,out.dir.name,txdb=txdb,DD,distanceToTSS_cutoff=5000,AP=c("Promoter"))
#' 
#' AnntationUsingChipSeeker3(dir.name,input.file.pattern,out.dir.name,txdb=txdb,DD,distanceToTSS_cutoff=5000,AP=c("Intron"))
#' 

AnotationUsingChipSeeker3 <- function(dir.name,input.file.pattern,out.dir.name,txdb=c("hg19","hg38"),DD,distanceToTSS_cutoff=NULL,assignGenomicAnnotation=TRUE,AP=c("Promoter", "5UTR", "3UTR", "Exon", "Intron","Downstream", "Intergenic")) {
  
  re<-ParserReadFiles(dir.name,input.file.pattern)
  
  re.bed<-re$input

  re.peaks.only.bed.2 <- re.bed

  txdb<-match.arg(txdb)
  
  switch (txdb,
          hg38 = {
            cat("Use hg38\n")
            txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
          },
          {
            cat("Use hg19\n") 
            txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
          }
  )
  
  APpath <- paste(AP,collapse = "_")
  
  temp3=file.path(out.dir.name,APpath)
  
  if(!dir.exists(temp3)){dir.create(temp3,recursive = TRUE)}
  
  d=DD
  
  peaks.anno.list <- lapply(1:length(re.peaks.only.bed.2),function(u,re.peaks.only.bed.2,d){
    
    peaks=readPeakFile(re.peaks.only.bed.2[[u]],as="data.frame")
    
    print(head(peaks))
    
    if(dim(peaks)[1]>0){
    
    peakAnno <- annotatePeak(re.peaks.only.bed.2[[u]], tssRegion=c(-d, d),
                             TxDb=txdb,assignGenomicAnnotation=assignGenomicAnnotation,genomicAnnotationPriority=AP,annoDb="org.Hs.eg.db")
    
    dropAnnoM <- function (csAnno, distanceToTSS_cutoff) 
    {
      idx <- which(abs(mcols(csAnno@anno)[["distanceToTSS"]]) < 
                     distanceToTSS_cutoff)
      csAnno@anno <- csAnno@anno[idx]
      csAnno@peakNum <- length(idx)
      if (csAnno@hasGenomicAnnotation) {
        csAnno@annoStat <- ChIPseeker:::getGenomicAnnoStat(csAnno@anno)
        csAnno@detailGenomicAnnotation = csAnno@detailGenomicAnnotation[idx, 
                                                                        ]
      }
      csAnno
    }
    
    if(!is.null(distanceToTSS_cutoff)){
    peakAnno <- dropAnnoM(peakAnno,distanceToTSS_cutoff = distanceToTSS_cutoff)
    }else
    {
    peakAnno <- peakAnno 
    }
    
    x_name=names(re.peaks.only.bed.2)[u]
    x_name=tools::file_path_sans_ext(x_name)
    x_name=tools::file_path_sans_ext(x_name)
    
    cat(x_name,"\n")
    png(file.path(temp3,paste0(x_name,"_",d,".png")))
    plotAnnoPie(peakAnno)
    dev.off()
    
    peaks.anno=as.data.frame(peakAnno)
    
    print(head(peaks.anno))
    
    print(colnames(peaks.anno))
    write.table(peaks.anno,file=file.path(temp3,paste0(x_name,"_",d,".xls")),
                row.names = FALSE,quote=FALSE,sep="\t")

    peaks.anno
    }
    
  },re.peaks.only.bed.2,d)
  
}

# R -e 'library(PathwaySplice);library(ChipSeq);ChipSeq:::bashJob4generateBed4HeatMap2("~/cJun","~/","cJun")'

bashJob4generateBed4HeatMap2 <- function(input.bam.file.dir,out.dir.name,Ab){
  
  #Sys.setenv(JAVA_HOME='/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre/lib/amd64/server')
  
  if (!dir.exists(out.dir.name))
  {
    dir.create(out.dir.name, recursive = TRUE)
  }
  
  job.name <- "generateGeneBed"
  
  Rfun1 <- 'library(ChipSeq);re <- ChipSeq:::generateBed4HeatMap2('
  
  
  Rinput <- paste0('\\"',input.bam.file.dir,'\\",',
                   '\\"',out.dir.name,'\\",',
                   '\\"',Ab,'\\"')

  Rfun2 <- ')'
  
  Rfun <-paste0(Rfun1,Rinput,Rfun2)
  
  #cmd.java.1="module load java/1.8.0_60"
  #cmd.java.1="R CMD javareconf -e"
  # cmd.java.2="export LD_LIBRARY_PATH=/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre/lib/amd64/server:$LD_LIBRARY_PATH"
  
  #cmd.java ='export JAVA_HOME="/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre"'
  
  cmd.gff <- PathwaySplice:::createBsubJobArrayRfun(Rfun,job.name,wait.job.name=NULL)
  
  #cmd2=paste(cmd.java.2,cmd.gff,sep=";")
  
  system(cmd.gff)
  
}

# /deepTools-1.5/bin/bamCompare --bamfile1 ChIP.bam --bamfile2 Input.bam \
# --binSize 25 --fragmentLength 200 --missingDataAsZero no \
# --ratio log2 --scaleFactorsMethod SES -o log2ratio_ChIP_vs_Input.bw

#' bsub -P bbc -J "bamCompare" -o %J.bamCompare.log -e %J.Compare.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::useBamCompare2("/nethome/axy148/R/lib64/R/library/ChipSeq/extdata/zhao_data.csv","/nethome/axy148/_visualization/sort_bam.txt","~/BamCompareZhao")'

useBamCompare2 <- function(input.sample.file,input.bam.file,output.dir)
{
  
  #sampleF <- read.table(input.sample.file,header = FALSE)
  #colnames(sampleF) <- c("ID","SampleName")
  
  #bam <- prepareBamFile(input.bam.file,file.pattern)
  
  re <- GetSampleInfo(input.sample.file, input.bam.file)
  
  cellInfo <- re$y
  # output.dir.name = dirname(input.sample.file)
  
  if (!dir.exists(output.dir))
  {
    dir.create(output.dir, recursive = TRUE)
  }
  
  temp3 = output.dir
  
  # cmd9 = 'ngs.plot.r -G' cmd10 = '-R' cmd11 = '-C' cmd12 = '-O' cmd13 = '-T'
  # cmd14 = '-L' cmd15 = '-RR' cmd16 = '-CD' cmd17= '-GO'
  
  
  
  cellInfo.run <- lapply(1:length(cellInfo), function(u,cellInfo, 
                                                      temp3)
  {
    
    x.name = cellInfo[[u]]$name
    
    es <- cellInfo[[u]]$es
    
    x.input <- es[es$Type_TF == "Input", ]$file.name
    
    x.sample <- es[es$Type_TF != "Input", ]
    
    #print(x.sample)
    #print(x.input)
    
    x.run <- apply(x.sample, 1, function(x, x.input, temp3)
    {
      
      y <- x
      
      ID <- y[1]
      Type_Cell <- y[2]
      Type_TF <- y[3]
      Cell_TF <- y[4]
      file.name <- y[5]
      xx <- file.name
      xx.name = paste(ID, gsub(" ", "-", Type_Cell), Type_TF, sep = "-")
      
      # ~/python/Python-2.7.11/python  ~/NGS_tools/deepTools/bin/bamCompare  --bamfile1 /scratch/projects/bbc/aiminy_project/DannyNewNgsPlot/2017-03-02-03_S11_R1.marked_sorted.bam --bamfile2 /scratch/projects/bbc/aiminy_project/DannyNewNgsPlot/2017-03-02-17_S1_R1.marked_sorted.bam --binSize 25 --ratio log2 -o ~/BamCompare/log2ratio_2017-03-02-03_S11_R1.marked_sorted.bam_vs_2017-03-02-17_S1_R1.marked_sorted.bam.bw 
      
      cmd1 <- paste("~/python/Python-2.7.11/python  ~/NGS_tools/deepTools/bin/bamCompare --bamfile1",xx,"--bamfile2",x.input,sep=" ")
      cmd2 <- "--binSize 25"
      cmd3 <- "--ratio log2 -o"
      
      cmd4 <- paste(cmd1,cmd2,cmd3,sep=" ") 
      
      cmd5 <- file.path(output.dir,paste0("log2ratio_",basename(as.character(xx)),"_vs_",basename(as.character(x.input)),".bw"))
      
      
      cmd6 <- paste(cmd4,cmd5,sep=" ") 
      
      cmd6
      
      #cat(cmd6, "\n")
      #cat("\n")
    }, x.input, temp3)
    
    x.run
    
  }, cellInfo,temp3)
  
  
  names(cellInfo.run) = unlist(lapply(cellInfo, function(u)
  {
    u$name
  }))
  
  system("module unload python/2.7.3")
  
  zzz <- unlist(cellInfo.run)
  
  lapply(1:length(zzz), function(u, zzz)
  {
    
    cat(as.character(zzz[u][[1]]), "\n")
    cat("\n")
    
    system(as.character(zzz[u][[1]]))
    
  }, zzz)
  # # dir.name=temp3 dir.name=reformatPath(dir.name)
  # 
  # file.name = file.path(temp3, dir(temp3, recursive = TRUE))
  # 
  # file.name.2 <- as.list(file.name)
  # 
  # 
  # # names(file.name.2) = unlist(lapply(file.name.2, function(u) { u$name }))
  # 
  # zzz <- unlist(file.name.2)
  # 
  # lapply(1:length(zzz), function(u, zzz)
  # {
  #   
  #   dir.name = dirname(zzz[u][[1]])
  #   file_name = file_path_sans_ext(basename(zzz[u][[1]]))
  #   
  #   cmd = paste("ngs.plot.r -G hg19 -R tss -C", zzz[u][[1]], "-O", file.path(dir.name, 
  #                                                                            paste0(file_name, ".tss")), "-T", file_name, "-L 4000 -RR 1 -CD 1 -GO total", 
  #               sep = " ")
  #   
  #   
  #   
  #   # system(as.character(zzz[u][[1]])) job.name = paste0('bamPlot.', u)
  #   # cmd.pegasus = usePegasus(job.option, Wall.time = '72:00',cores = 32,Memory
  #   # = 16000,span.ptile = 16,job.name) cmd2 = paste(cmd.pegasus,cmd,sep = ' ')
  #   
  #   cmd2 = cmd
  #   cat(cmd2, "\n")
  #   cat("\n")
  #   system(cmd2)
  # }, zzz)
  # 
  # 
  # re <- list(cellInforun = cellInfo.run, zzz = zzz)
  # 
  # # AnntationUsingChipSeeker(temp3, 'peaks.bed', temp3, DD = 5000)
  
  #return(re)
  
}

# bsub -P bbc -J "zhaoCJun" -o %J.zhaoCJun.log -e %J.zhaoCJun.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::ngs()'

ngs <- function(){
  
system("ngs.plot.r -G hg19 -R tss -C ~/zhao_config.txt -O zhao.tss -L 4000")

}

# bsub -P bbc -J "zhaoCJun" -o %J.zhaoCJun.log -e %J.zhaoCJun.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::ngs2()'

ngs2 <- function(config.file,distance.around.tss,output.file.dir){
  
  if (!dir.exists(output.file.dir))
  {
    dir.create(output.file.dir, recursive = TRUE)
  }
  
  cmd=paste("ngs.plot.r -G hg19 -R tss -C",config.file,"-O",output.file.dir,"-L",distance.around.tss,"-CO white:white:red", collapse = " ")
  
  system(cmd)
  
}

doAlignment<-function(input.fastq.dir,bowtie2Indeces,output.dir){
  re <- parserreadfiles(input.fastq.dir, "fastq")
  res <- re$input
  index <- system('echo $LSB_JOBINDEX',intern = TRUE)
  cmd0="bowtie2 -x"
  "/home/someusername/bowtie2Indeces/somegenome -U /home/someusername/myFastqFile.fastq -S /home/someusername/myFastqFile.sam"
  u <- as.integer(index)
  path_name = dirname(res[[u]])
  file_name = file_path_sans_ext(basename(res[[u]]))
  cmd1 <- paste(cmd0,bowtie2Indeces,"-U",res[[u]],"-S",file.path(output.dir,paste0(file_name,".sam")), sep = " ")
  cmd2 <- cmd1
  system(cmd2)
  cat(cmd2,"\n")
}

filterSam<-function(input.fastq.dir,bowtie2Indeces,output.dir){
  re <- parserreadfiles(input.fastq.dir, "fastq")
  res <- re$input
  index <- system('echo $LSB_JOBINDEX',intern = TRUE)
  cmd2='samtools view -Sh /home/someusername/myFastqFile.sam | grep -e "^@" -e "XM:i:[012][^0-9]" | grep -v "XS:i:" > /home/someusername/myFastqFile.sam.filtered.sam'
  
  cmd0="bowtie2 -x"
  "/home/someusername/bowtie2Indeces/somegenome -U /home/someusername/myFastqFile.fastq -S /home/someusername/myFastqFile.sam"
  u <- as.integer(index)
  path_name = dirname(res[[u]])
  file_name = file_path_sans_ext(basename(res[[u]]))
  cmd1 <- paste(cmd0,bowtie2Indeces,"-U",res[[u]],"-S",file.path(output.dir,paste0(file_name,".sam")), sep = " ")
  cmd2 <- cmd1
  system(cmd2)
  cat(cmd2,"\n")
}

convertSam2Bam<-function(input.fastq.dir,bowtie2Indeces,output.dir){
  re <- parserreadfiles(input.fastq.dir, "fastq")
  res <- re$input
  index <- system('echo $LSB_JOBINDEX',intern = TRUE)
  cmd3="samtools view -S -b /home/someusername/myFastqFile.sam.filtered.sam > /home/someusername/myFastqFile.sam.filtered.sam.bam"
  cmd0="bowtie2 -x"
  "/home/someusername/bowtie2Indeces/somegenome -U /home/someusername/myFastqFile.fastq -S /home/someusername/myFastqFile.sam"
  u <- as.integer(index)
  path_name = dirname(res[[u]])
  file_name = file_path_sans_ext(basename(res[[u]]))
  cmd1 <- paste(cmd0,bowtie2Indeces,"-U",res[[u]],"-S",file.path(output.dir,paste0(file_name,".sam")), sep = " ")
  cmd2 <- cmd1
  system(cmd2)
  cat(cmd2,"\n")
}

sortBam<-function(input.fastq.dir,bowtie2Indeces,output.dir){
  re <- parserreadfiles(input.fastq.dir, "fastq")
  res <- re$input
  index <- system('echo $LSB_JOBINDEX',intern = TRUE)
  cmd4="samtools sort /home/someusername/myFastqFile.sam.filtered.sam.bam /home/someusername/myFastqFile.sam.filtered.sam.bam.sorted"
  cmd0="bowtie2 -x"
  "/home/someusername/bowtie2Indeces/somegenome -U /home/someusername/myFastqFile.fastq -S /home/someusername/myFastqFile.sam"
  u <- as.integer(index)
  path_name = dirname(res[[u]])
  file_name = file_path_sans_ext(basename(res[[u]]))
  cmd1 <- paste(cmd0,bowtie2Indeces,"-U",res[[u]],"-S",file.path(output.dir,paste0(file_name,".sam")), sep = " ")
  cmd2 <- cmd1
  system(cmd2)
  cat(cmd2,"\n")
}

removePCRduplicates<-function(input.fastq.dir,bowtie2Indeces,output.dir){
  re <- parserreadfiles(input.fastq.dir, "fastq")
  res <- re$input
  index <- system('echo $LSB_JOBINDEX',intern = TRUE)
  
  cmd5="samtools rmdup -s /home/someusername/myFastqFile.sam.filtered.sam.bam.sorted.bam /home/someusername/myFastqFile.sam.filtered.sam.bam.sorted.bam.nodup.bam"
  cmd0="bowtie2 -x"
  "/home/someusername/bowtie2Indeces/somegenome -U /home/someusername/myFastqFile.fastq -S /home/someusername/myFastqFile.sam"
  u <- as.integer(index)
  path_name = dirname(res[[u]])
  file_name = file_path_sans_ext(basename(res[[u]]))
  cmd1 <- paste(cmd0,bowtie2Indeces,"-U",res[[u]],"-S",file.path(output.dir,paste0(file_name,".sam")), sep = " ")
  cmd2 <- cmd1
  system(cmd2)
  cat(cmd2,"\n")
}

indexBAM<-function(input.fastq.dir,bowtie2Indeces,output.dir){
  re <- parserreadfiles(input.fastq.dir, "fastq")
  res <- re$input
  index <- system('echo $LSB_JOBINDEX',intern = TRUE)
  
  cmd6="samtools index /home/someusername/myFastqFile.sam.filtered.sam.bam.sorted.bam.nodup.bam"
  
  cmd0="bowtie2 -x"
  "/home/someusername/bowtie2Indeces/somegenome -U /home/someusername/myFastqFile.fastq -S /home/someusername/myFastqFile.sam"
  u <- as.integer(index)
  path_name = dirname(res[[u]])
  file_name = file_path_sans_ext(basename(res[[u]]))
  cmd1 <- paste(cmd0,bowtie2Indeces,"-U",res[[u]],"-S",file.path(output.dir,paste0(file_name,".sam")), sep = " ")
  cmd2 <- cmd1
  system(cmd2)
  cat(cmd2,"\n")
}

bam2Bed<-function(input.fastq.dir,bowtie2Indeces,output.dir){
  re <- parserreadfiles(input.fastq.dir, "fastq")
  res <- re$input
  index <- system('echo $LSB_JOBINDEX',intern = TRUE)
  cmd7="bedtools bamtobed -i /home/someusername/myFastqFile.sam.filtered.sam.bam.sorted.bam.nodup.bam > /home/someusername/myFastqFile.sam.filtered.sam.bam.sorted.bam.nodup.bam.bed"
  cmd0="bowtie2 -x"
  "/home/someusername/bowtie2Indeces/somegenome -U /home/someusername/myFastqFile.fastq -S /home/someusername/myFastqFile.sam"
  u <- as.integer(index)
  path_name = dirname(res[[u]])
  file_name = file_path_sans_ext(basename(res[[u]]))
  cmd1 <- paste(cmd0,bowtie2Indeces,"-U",res[[u]],"-S",file.path(output.dir,paste0(file_name,".sam")), sep = " ")
  cmd2 <- cmd1
  system(cmd2)
  cat(cmd2,"\n")
}

getBamReady<-function(input.fastq.dir,bowtie2Indecesinput,sample.num,output.bam.dir){

Rfun1 <- 'library(ChipSeq);library(DoGs);re <- ChipSeq:::doAlignment('
Rinput <- paste0('\\"',input.fastq.dir,'\\",',
                 '\\"',bowtie2Indecesinput,'\\",',
                 '\\"',output.dir,'\\"')
Rfun2 <- ')'
Rfun <-paste0(Rfun1,Rinput,Rfun2)
alignment <- createBsubJobArrayRfun(Rfun,paste0("alignment[1-",sample.num,"]",NULL))
system(alignment)
}

getEffectOfRmDup<-function(){
  not.rm <- read.table("~/Dropbox (BBSR)/Aimin_project/Research/ChipSeq/Danny_ChipSeq_data_QC/qc_16.txt")
  rm <- read.table("~/Dropbox (BBSR)/Aimin_project/Research/ChipSeq/Danny_ChipSeq_data_QC_2/16_samples_QC.txt")
  res <- as.data.table(rbind(cbind(rep("not_rm",length(not.rm[,11])),not.rm[,11]),cbind(rep("rm",length(rm[,11])),rm[,11])))
  boxplot(as.numeric(res$V2)~res$V1)
}


#' sbatch -P bbc -J "zhaoCJun" -o %J.zhaoCJun.log -e %J.zhaoCJun.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::runIDR()'

#' sbatch -p hipri test-job.sh
#' sbatch -p hipri test-job.sh
#' squeue -u steven.miamiuniv


runIDR <- function()
{
cmd="idr --samples /home/steven.miamiuniv/Softwares/idr-2.0.2/tests/data/peak1 /home/steven.miamiuniv/Softwares/idr-2.0.2/tests/data/peak2 --plot testIDR"
system(cmd)

}

# bsub -P bbc -J "zhaoCJun" -o %J.zhaoCJun.log -e %J.zhaoCJun.err -W 72:00 -n 32 -q parallel -R 'rusage[mem= 16000 ] span[ptile= 16 ]' -u aimin.yan@med.miami.edu R -e 'library(ChipSeq);re <- ChipSeq:::peakCallAndAnnotationWithoutInput("/scratch/projects/bbc/aiminy_project/7_27_2017_bam/","/scratch/projects/bbc/aiminy_project/macs2_call","hs","macs2",0.0001)'
# 
peakCallAndAnnotationWithoutInput <- function(input.file.dir,output.file.dir,genome = "hs",peakcaller = c("macs14", "macs2"), peakPvalue) {
  
  genome <- match.arg(genome)
  
  cmd10 <- paste("-f BAM", "-g", genome, "-n", sep = " ")
  
  switch(peakcaller, macs2 = {
    
    PATH1 = Sys.getenv("PATH")
    
    macs2_Lib = file.path("/nethome/axy148/NGS_tools/MACS/bin/")
    
    Sys.setenv(PATH = paste0(macs2_Lib, ":", PATH1))
    
    cmd1 <- Sys.which("macs2")[[1]]
    
    cat(cmd1, "\n")
    
    cmd9 = paste(cmd1, "callpeak -t", sep = " ")
    
    cmd11 <- paste("-p", peakPvalue, sep = " ")
    
  }, {
    cmd9 = "macs14 -t "
    
    cmd11 <- paste("-m 6,18 --bw=200 -p", peakPvalue, sep = " ")
    
  })
  
  re<-ParserReadFiles(input.file.dir,"bam")
  
  file.name.2<-re$input
  #output.dir.name=re$output
  
  temp3=file.path(output.file.dir,"PeakCall")
  
  if(!dir.exists(temp3)){dir.create(temp3,recursive = TRUE)}
  
  re.out<-file.name.2
  
  #cmd9="macs14 -t "
  #cmd10="-f BAM -g hs -n "
  #cmd11=" -m 6,18 --bw=200 -p 0.00001"
  
  lapply(1:length(re.out),function(u,re.out,temp3){
    
    x=re.out[[u]]
    x_name=names(re.out)[u]
    
    cmd12=paste(cmd9,x,cmd10,file.path(temp3,paste0(x_name,"_hs_1.00e-05_macs")),cmd11,sep=" ")
    
    print(cmd12)
    system(cmd12, intern = TRUE, ignore.stderr = TRUE)
    
    #re=read.table(u,header=FALSE)
    #  re<-as.character(re[,1])
    #  #colnames(re)=c("Count","GeneName")
    #  re
  },re.out,temp3)
  
  #AnnotatePeak2(paste0(temp3,"/"),"*macs142_peaks.bed",7,paste0(output.dir.name,"PeakAnnotation_at_",temp2),genome="Hs")
  
  #AnnotatePeak3(paste0(temp3,"/"),paste0(output.dir.name,"_PeakAnnotation"),
  #              genome="Hs")
  
  #BamFileSortIndexVisualization2(re,genome)
  
}

#input <- "~/nodup.bam.txt"
#output <- "~/"
#
# R -e 'library(ChipSeq);library(ChipSeq);ChipSeq:::parseToSampleInfo("/projects/scratch/bbc/Project/Danny_chip3/Filtered_bam","*.bam$","~/Danny_chip3","sample_infor_Danny_chip3.txt","Danny_chip3")'
# 
parseToSampleInfo <- function(input.dir,input.pattern,output.dir,output.file,label){
  
  file.1 <- list.files(input.dir,pattern=input.pattern, all.files = TRUE,full.names = TRUE,recursive = TRUE,include.dirs = TRUE)
  s <- file.1
  ss <- basename(as.character(s))
  sss <- tools::file_path_sans_ext(ss)
  ssss <- tools::file_path_sans_ext(sss)
  
  sm.info <- cbind.data.frame(sss,rep(label,length(sss)),ssss,rep("Link",length(sss)))
  
  colnames(sm.info) <- c("ID","Type_Cell","Type_TF","Link")
  
  if(!dir.exists(output.dir)){dir.create(output.dir,recursive = TRUE)}
  
  write.table(sm.info,file=file.path(output.dir,output.file),row.names = FALSE,quote=FALSE,sep=",")
  
}

# ChipSeq:::getAb2inputPair("/Volumes/Bioinformatics$/Aimin_project/Danny_chip3.txt")
# 
getAb2inputPair <- function(input.infor.file)
{
  
  f <- read.table(input.infor.file,header = FALSE)
  
  colnames(f)=c("Ab","Input")
  
  print(f)
  
  return(f)
}
 
#  ChipSeq:::getBwUseBamCompare("/Volumes/Bioinformatics$/Aimin_project/Danny_chip3.txt","~")
#  
getBwUseBamCompare <- function(input.infor.file,bl_file,output.dir)
{
  x.sample <- getAb2inputPair(input.infor.file)
    
  if (!dir.exists(output.dir))
  {
    dir.create(output.dir, recursive = TRUE)
  }
  
    x.run <- apply(x.sample, 1, function(x,output.dir)
    {
      
      print(t(x))
      
      x <- as.data.frame(t(x))
      xx <- x$Ab
      xx.input <- x$Input
      
      #xx.name = paste(ID, gsub(" ", "-", Type_Cell), Type_TF, sep = "-")
      
      cmd1 <- paste("bamCompare --bamfile1",xx,"--bamfile2",xx.input,sep=" ")
      cmd2 <- "--binSize 25"
      #cmd3 <- "--ratio log2 -o"
      cmd3 <- "--ratio subtract --normalizeTo1x 2451960000 --ignoreForNormalization chrX -o"
      cmd3 <- "--ratio subtract --normalizeTo1x 2451960000 --ignoreForNormalization chrX --blackListFileName"
      cmd3 <- paste(cmd3,bl_file,"-o",sep=" ")
      cmd4 <- paste(cmd1,cmd2,cmd3,sep=" ") 
      
      cmd5 <- file.path(output.dir,paste0("log2ratio_",basename(as.character(xx)),"_vs_",basename(as.character(xx.input)),".bw"))
      
      cmd6 <- paste(cmd4,cmd5,sep=" ") 
      
      cmd6
      
      print(cmd6)
      
      system(cmd6)
      
    }, output.dir)
    

}

# R -e 'library(ChipSeq)::peakCall2("/Volumes/Bioinformatics$/Aimin_project/Danny_chip3.txt","hs","~/","macs2",0.00001)'
# 
peakCall2 <- function(input.infor.file,genome = c("Hs","hs", "HS", "hS"),output.dir,peakcaller = c("macs14","macs2"), peakPvalue)
{
  x.sample <- getAb2inputPair(input.infor.file)
  
  if (!dir.exists(output.dir))
  {
    dir.create(output.dir, recursive = TRUE)
  }
  
  peakcaller <- match.arg(peakcaller)

  genome <- match.arg(genome)

  genome <- tolower(genome)
  
  cmd10 <- paste("-f BAM", "-g", genome, "-n", sep = " ")
  
  switch(peakcaller, macs2 = {
    
    cmd1 <- "macs2"
    
    cat(cmd1, "\n")
    
    cmd9 = paste(cmd1, "callpeak -t", sep = " ")
    
    cmd11 <- paste("-p", peakPvalue, sep = " ")
    
  }, {
    cmd9 = "macs14 -t "
    
    cmd11 <- paste("-p", peakPvalue, sep = " ")
    
  })
  
  macs.run <- apply(x.sample,1,function(u, output.dir)
  {
    x <- as.data.frame(t(u))
    xx <- x$Ab
    xx.input <- x$Input
    
    x.name <- paste(peakcaller,basename(as.character(xx)),basename(as.character(xx.input)),genome,peakPvalue,sep="_")
    
    cmd12 = paste(cmd9, xx, "-c", xx.input, cmd10, file.path(output.dir, x.name), cmd11, sep = " ")
      
    print(cmd12)
      
    system(cmd12)
    
    },output.dir)

  #AnntationUsingChipSeeker(output.dir, "peaks.bed", temp3, DD = 5000)
  
#  return(re)
  
}

useBamCoverage <- function(input.sample.file,input.bam.file,output.dir)
{
  
  re <- GetSampleInfo(input.sample.file, input.bam.file)
  
  cellInfo <- re$y
  
  # output.dir.name = dirname(input.sample.file)
  
  if (!dir.exists(output.dir))
  {
    dir.create(output.dir, recursive = TRUE)
  }
  
  temp3 = output.dir
  
  # cmd9 = 'ngs.plot.r -G' cmd10 = '-R' cmd11 = '-C' cmd12 = '-O' cmd13 = '-T'
  # cmd14 = '-L' cmd15 = '-RR' cmd16 = '-CD' cmd17= '-GO'
  
  
  
  cellInfo.run <- lapply(1:length(cellInfo), function(u,cellInfo, 
                                                      temp3)
  {
    
    x.name = cellInfo[[u]]$name
    
    es <- cellInfo[[u]]$es
    
    x.input <- es[es$Type_TF == "Input", ]$file.name
    
    x.sample <- es[es$Type_TF != "Input", ]
    
    #print(x.sample)
    #print(x.input)
    
    x.run <- apply(x.sample, 1, function(x, x.input, temp3)
    {
      
      y <- x
      
      ID <- y[1]
      Type_Cell <- y[2]
      Type_TF <- y[3]
      Cell_TF <- y[4]
      file.name <- y[5]
      xx <- file.name
      xx.name = paste(ID, gsub(" ", "-", Type_Cell), Type_TF, sep = "-")
      
      # ~/python/Python-2.7.11/python  ~/NGS_tools/deepTools/bin/bamCompare  --bamfile1 /scratch/projects/bbc/aiminy_project/DannyNewNgsPlot/2017-03-02-03_S11_R1.marked_sorted.bam --bamfile2 /scratch/projects/bbc/aiminy_project/DannyNewNgsPlot/2017-03-02-17_S1_R1.marked_sorted.bam --binSize 25 --ratio log2 -o ~/BamCompare/log2ratio_2017-03-02-03_S11_R1.marked_sorted.bam_vs_2017-03-02-17_S1_R1.marked_sorted.bam.bw 
      
      #bamCoverage -b reads.bam -o coverage.bw
      
      #bamCoverage --bam a.bam -o a.SeqDepthNorm.bw \
      #--binSize 10
      #--normalizeTo1x 2150570000
      #--ignoreForNormalization chrX
      #--extendReads
      
      cmd1 <- paste("~/python/Python-2.7.11/python  ~/NGS_tools/deepTools/bin/bamCoverage --bam",xx,sep=" ")
      cmd2 <- "--binSize 25 --normalizeTo1x 2451960000 --ignoreForNormalization chrX"
      cmd3 <- "-o"
      
      cmd4 <- paste(cmd1,cmd2,cmd3,sep=" ") 
      
      cmd5 <- file.path(output.dir,paste0("To1x_",basename(as.character(xx)),"_vs_","no_input",".bw"))
      
      
      cmd6 <- paste(cmd4,cmd5,sep=" ") 
      
      cmd6
      
      #cat(cmd6, "\n")
      #cat("\n")
    }, x.input, temp3)
    
    x.run
    
  }, cellInfo,temp3)
  
  
  names(cellInfo.run) = unlist(lapply(cellInfo, function(u)
  {
    u$name
  }))
  
  zzz <- unlist(cellInfo.run)
  
  lapply(1:length(zzz), function(u, zzz)
  {
    
    cat(as.character(zzz[u][[1]]), "\n")
    cat("\n")
    
    system(as.character(zzz[u][[1]]))
    
  }, zzz)
  # # dir.name=temp3 dir.name=reformatPath(dir.name)
  # 
  # file.name = file.path(temp3, dir(temp3, recursive = TRUE))
  # 
  # file.name.2 <- as.list(file.name)
  # 
  # 
  # # names(file.name.2) = unlist(lapply(file.name.2, function(u) { u$name }))
  # 
  # zzz <- unlist(file.name.2)
  # 
  # lapply(1:length(zzz), function(u, zzz)
  # {
  #   
  #   dir.name = dirname(zzz[u][[1]])
  #   file_name = file_path_sans_ext(basename(zzz[u][[1]]))
  #   
  #   cmd = paste("ngs.plot.r -G hg19 -R tss -C", zzz[u][[1]], "-O", file.path(dir.name, 
  #                                                                            paste0(file_name, ".tss")), "-T", file_name, "-L 4000 -RR 1 -CD 1 -GO total", 
  #               sep = " ")
  #   
  #   
  #   
  #   # system(as.character(zzz[u][[1]])) job.name = paste0('bamPlot.', u)
  #   # cmd.pegasus = usePegasus(job.option, Wall.time = '72:00',cores = 32,Memory
  #   # = 16000,span.ptile = 16,job.name) cmd2 = paste(cmd.pegasus,cmd,sep = ' ')
  #   
  #   cmd2 = cmd
  #   cat(cmd2, "\n")
  #   cat("\n")
  #   system(cmd2)
  # }, zzz)
  # 
  # 
  # re <- list(cellInforun = cellInfo.run, zzz = zzz)
  # 
  # # AnntationUsingChipSeeker(temp3, 'peaks.bed', temp3, DD = 5000)
  
  #return(re)
  
}

getBwUseBamCoverage2 <- function(input.infor.file,bl_file,output.dir)
{
  x.sample <- getAb2inputPair(input.infor.file)
  
  if (!dir.exists(output.dir))
  {
    dir.create(output.dir, recursive = TRUE)
  }
  
  x.run <- apply(x.sample, 1, function(x,output.dir)
  {
    
    print(t(x))
    
    x <- as.data.frame(t(x))
    xx <- x$Ab
    xx.input <- x$Input
    
    #xx.name = paste(ID, gsub(" ", "-", Type_Cell), Type_TF, sep = "-")
    
    cmd1 <- paste("bamCoverage --bam",xx,sep=" ")
    cmd2 <- "--binSize 25 --normalizeTo1x 2451960000 --ignoreForNormalization chrX --blackListFileName"
    cmd2 <- paste(cmd2,bl_file,sep=" ")
    cmd3 <- "-o"
    
    
    
    #cmd1 <- paste("bamCompare --bamfile1",xx,"--bamfile2",xx.input,sep=" ")
    #cmd2 <- "--binSize 25"
    #cmd3 <- "--ratio log2 -o"
    
    cmd4 <- paste(cmd1,cmd2,cmd3,sep=" ") 
    
    cmd5 <- file.path(output.dir,paste0("Un_norma",basename(as.character(xx)),".bw"))
    
    cmd6 <- paste(cmd4,cmd5,sep=" ") 
    
    cmd6
    
    print(cmd6)
    
    system(cmd6)
    
  }, output.dir)
  
  
}

getSummitSequence<-function(dir.name,input.file.pattern,genome,out.dir.name){
  
  if (!dir.exists(out.dir.name))
  {
    dir.create(out.dir.name, recursive = TRUE)
  }
  
  file.1 <- list.files(dir.name,pattern=input.file.pattern,all.files = TRUE,full.names = TRUE,recursive = TRUE,include.dirs = TRUE)
  
  print(file.1)
  
  re.out<-lapply(file.1,function(u){
    
    #print(names(u))
    uu=u
    re=read.table(uu,header=TRUE)
    
    print(head(re))
    
    colnames(re)[c(7,9)]=c("-10*LOG10(pvalue)","FDR(%)")
    temp<-re
    temp$start<-temp$start-1
    temp$end<-temp$end-1
    temp$summit<-temp$abs_summit-1
    temp
    
    summitPeak<-temp$start+temp$summit
    
    temp2<-temp
    
    temp2$start<-summitPeak-49
    temp2$end<-summitPeak+50
    temp2$summit<-summitPeak
    temp2$length<-temp2$end-temp2$start+1
    temp2
    rownames(temp2)<-paste0("MACS_peak_",rownames(temp2))
    temp3<-toGRanges(temp2)
    names(temp3)<-rownames(temp2)
    
    
    if(genome == "mm10")
    {
    dd.GRCm39.mm10<-toGRanges(EnsDb.Mmusculus.v75)
    genome(temp3)<-genome(dd.GRCm39.mm10)
    seqlevels(dd.GRCm39.mm10,force=TRUE) <- seqlevels(temp3)
    seqinfo(temp3)<-seqinfo(dd.GRCm39.mm10)
    temp3.trimmed<-trim(temp3, use.names=TRUE)
    genome.mm10<-getBSgenome("BSgenome.Mmusculus.UCSC.mm10")
    genome(temp3.trimmed)<-"mm10"
    }
    
    if(genome == "hg19"){
      
      dd.GRCm39.mm10<-toGRanges(EnsDb.Hsapiens.v75)
      genome(temp3)<-genome(dd.GRCm39.mm10)
      seqlevels(dd.GRCm39.mm10,force=TRUE) <- seqlevels(temp3)
      seqinfo(temp3)<-seqinfo(dd.GRCm39.mm10)
      temp3.trimmed<-trim(temp3, use.names=TRUE)
      genome.mm10<-getBSgenome("BSgenome.Hsapiens.UCSC.hg19")
      genome(temp3.trimmed)<-"hg19"
      
    }
    
    
    seq.temp3<-getAllPeakSequence(temp3.trimmed,genome=genome.mm10)
    
    write2FASTA(seq.temp3, file.path(out.dir.name,paste0(basename(u),".fa")))
    
    #re2<-list(originalPeak=re,down1Peak=temp,aroundSummit100Peak=temp2,GR=temp3)
    #re2
  })
  
  #sample.name<-sapply(strsplit(names(file.name.2),split="_peaks_"),"[[",1)
  
  #names(re.out)<-sample.name
  
   test  
  #return(re.out)
}
aiminy/ChipSeq documentation built on May 12, 2019, 3:37 a.m.