annexeFunc/performAnalysis.R

######################################
########### Annex functions ##########
######################################

peaksLoading<-function(x){
    # Laoding files based on files extension

    if(grepl(x=x,pattern=".bed")& !grepl(x=x,pattern=".gff")){

          x<-read.table(x, stringsAsFactors=F)
        if(length(grep(x=x[,1], pattern="chr"))==0){
            x[,1]<-paste0("chr",x[,1])
            x<-GRanges(seqnames=as.character(x[,1]),range=IRanges(x[,2],x[,3]))
        }else{
            x<-GRanges(seqnames=as.character(x[,1]),range=IRanges(x[,2],x[,3]))
        }

    }else if(grepl(x=x, pattern=".Rda")){
        x<-get(load(x))
    }else if(grepl(x=x, pattern=".gff3")){
          x<-read.table(x, skip=30,stringsAsFactors=F)
          if(length(grep(x=x[,1], pattern="chr"))==0){
              x[,1]<-paste0("chr",x[,1])
              x<-GRanges(seqnames=as.character(x[,1]),range=IRanges(x[,4],x[,5]))
          }else{
              x<-GRanges(seqnames=as.character(x[,1]),range=IRanges(x[,4],x[,5]))
          }

    } else if(grepl(x=x, pattern=".gff")){
        x<-read.table(x, skip=30,stringsAsFactors=F)
        if(length(grep(x=x[,1], pattern="chr"))==0){
            x[,1]<-paste0("chr",x[,1])
            x<-GRanges(seqnames=as.character(x[,1]),range=IRanges(x[,4],x[,5]))
        }else{
            x<-GRanges(seqnames=as.character(x[,1]),range=IRanges(x[,4],x[,5]))
        }
    } else{
        x<-read.table(x, header=T,stringsAsFactors=F)
        if(length(grep(x=x[,1], pattern="chr"))==0){
            x[,1]<-paste0("chr",x[,1])
            x<-GRanges(seqnames=as.character(x[,1]),range=IRanges(x[,2],x[,3]))
        }else{
             x<-GRanges(seqnames=as.character(x[,1]),range=IRanges(x[,2],x[,3]))
        }
    }
    return(x)
}




######################################
########## Perform analysis ##########
######################################

# Single Run analysis - looping should be done in bash so you can run thing fatser


performAnalysis<-function(TFList,ChIP,DNASequenceSet,
                          Access=NULL,setSequence=NULL,
                          reduce=NULL,peaks=NULL,tileSize=20000, cores=1,
                          filename=NULL,OP=NULL,noiseFilter="sigmoid",method="pearsonMean",normValues=NULL,withValidation=T){


  print("Loading DNA Access")
if(!is.null(Access) & class(Access)!="GRanges"){
        Access<-get(load(Access))
        name <- as.character(seqnames(Access))
        Access <- GRanges(seqnames=name,
        range= IRanges(start= start(Access), end=end(Access)))
}
### extracting relavalnt data and loading it

PO<-parameterOptions(noiseFilter=noiseFilter,stepSize=50)

# DNASequenceSet
print("Loading DNA Seq")
  if(class(DNASequenceSet)=="character" ){
    DNASequenceSet<-get(load(DNASequenceSet))
  }else if(class(DNASequenceSet)=="BSgenome"){
    DNASequenceSet<-getSeq(DNASequenceSet)
  }
### Loading TF PFM ###
print("Loading PFM")
  if(class(TFList)!="list" & length(TFList)!=2){
    stop(paste0(deparse(substitute(TFList))," is not a List of 2 elements"))
  }
  if(grepl(".Rda",TFList[[1]])){
    PFM<-get(load(TFList[[1]]))
    GPP<-genomicProfiles(PFM=PFM,PFMFormat=TFList[[2]],
    BPFrequency=DNASequenceSet,PWMThreshold=0.7)
  } else{
    GPP<-genomicProfiles(PFM=TFList[[1]],PFMFormat=TFList[[2]],
    BPFrequency=DNASequenceSet,PWMThreshold=0.7)
  }


## precessing ChIP seq
  chipProfile <- processingChIP(profile=ChIP,loci=setSequence, reduce=reduce,
                              peaks=NULL, chromatinState=NULL,
                              parameterOptions=PO,cores=cores,normValues=normValues)
if(withValidation){
## up for sone validation
###############################################################################
###############################################################################
#TrainScore<-scores(chipProfile)[grepl("chr2L",names(scores(chipProfile))) | grepl("chr2R",names(scores(chipProfile)))]
#ValidationScore<-scores(chipProfile)[grepl("chr3L",names(scores(chipProfile))) | grepl("chr3R",names(scores(chipProfile)))]

#Trainloci<-loci(chipProfile)[grepl("chr2L",names(loci(chipProfile))) | grepl("chr2R",names(loci(chipProfile)))]
#Validationloci<-loci(chipProfile)[grepl("chr3L",names(loci(chipProfile))) | grepl("chr3R",names(loci(chipProfile)))]
###############################################################################
###############################################################################

TrainScore<-scores(chipProfile)[1:10]
ValidationScore<-scores(chipProfile)

Trainloci<-loci(chipProfile)[1:10]
Validationloci<-loci(chipProfile)


.scores(chipProfile)<-TrainScore
.loci(chipProfile)<-Trainloci


# compute optimal
  optimal <- computeOptimal(GPP,DNASequenceSet,chipProfile,Access,PO,cores=cores)


    ### Computing Optimal Parameters ###

    if(!is.null(filename)){
        save(optimal,file=paste0(filename,noiseFilter,"_OptimalOutputTraining.Rda"))
        save(chipProfile, file=paste0(filename,noiseFilter,"_ChIPTraining.Rda"))
    } else {
        save(optimal,file=paste0(noiseFilter,"optimalOutputTraining.Rda"))
        save(chipProfile, file=paste0(noiseFilter,"ChIPTraining.Rda"))
    }

    ## optimal param for validation
    ## let assume you have more than one

    for(meth in method){

    param<-optimal[[1]][[1]][[meth]]

    lambda<-param[1]
    bm<-param[2]

    GPP<-genomicProfiles(PFM=TFList[[1]],PFMFormat=TFList[[2]],
    BPFrequency=DNASequenceSet,PWMThreshold=0.7,lambdaPWM=lambda,boundMolecules=bm,stepSize=50)
    gw<-computeGenomeWideScores(GPP,DNASequenceSet,Access,cores=cores)
    .scores(chipProfile)<-ValidationScore
    .loci(chipProfile)<-Validationloci
    pwm<-computePWMScore(gw,DNASequenceSet,chipProfile,Access,cores=cores)
    occup<-computeOccupancy(pwm)
    chip<-computeChIPProfile(occup,chipProfile,cores=cores)
    gof<-profileAccuracyEstimate(chip,chipProfile,cores=cores)
    optimalList<-list("Occupancy"=occup,"ChIPProfile"=chip,"Gof"=gof)

    if(!is.null(filename)){
        save(optimalList,file=paste0(filename,noiseFilter,"_",meth,"OptimalOutputValidation.Rda"))
        save(chipProfile, file=paste0(filename,noiseFilter,"_",meth,"ChIPValidation.Rda"))
    } else {
        save(optimalList,file=paste0(noiseFilter,"optimalOutputValidation.Rda"))
        save(chipProfile, file=paste0(noiseFilter,"ChIPValidation.Rda"))
    }

  }


  }else{
    chipProfile <- processingChIP(profile=ChIP,loci=setSequence, reduce=reduce,
                                peaks=NULL, chromatinState=NULL,
                                parameterOptions=PO,cores=cores,normValues=normValues)


  optimal <- computeOptimal(GPP,DNASequenceSet,chipProfile,Access,PO,cores=cores)

 method=method[1]
  ### Computing Optimal Parameters ###
  if(!is.null(filename)){
      save(optimal,file=paste0(filename,noiseFilter,"_",method,"OptimalOutput.Rda"))
      save(chipProfile, file=paste0(filename,noiseFilter,"_",method,"ChIP.Rda"))
  } else {
      save(optimal,file=paste0(noiseFilter,"optimalOutput.Rda"))
     save(chipProfile, file=paste0(noiseFilter,"ChIP.Rda"))
  }


  }
}
patrickCNMartin/ChIPanalyserDev documentation built on Nov. 14, 2019, 4:39 p.m.