R/tags.r

#!/usr/bin/env R
#
# This file is part of peakAnalysis,
# http://github.com/alexjgriffith/alpha-score/, 
# and is Copyright (C) University of Ottawa, 2015. It is Licensed under 
# the three-clause BSD License; see LICENSE.txt.
# Author : Alexander Griffith
# Contact: griffitaj@gmail.com

#' testZero
#'
#' Checks if the result of subtracting width from value is zero
#'
#' @param value integer or double which cannot be less than 0
#' @param width integer or double
#' @return a range value+width value-width where value-width >0
#' @template authorTemplate
#' @export
testZero<-function(value,width){
    shift<-value-width
    if(shift>=0)
        return (c(value-width,value+width))
    else{
        return (c(value-width-shift,value+width-shift))}}

#' outputFile
#'
#' generates the final bed format layout <chr><start><end><tag1-tag2...-tagn>
#' for internal use with \code{\link{tags()}}
#' @param peaks  Two column data type generated by \code{\link{tags()}}
#' @param chrdata Sorted input bed files all as elements as character
#' @param width The width which will be outputed
#' @template authorTemplate
outputFile<-function(peaks,chrdata,chrcats,fileOrder,width=150,scores=FALSE){
    output<-mapply(function(i1,i2,width=300){
        summit<-as.character(testZero(floor(mean(as.double(chrdata[i1:i2,5]))),width) )
        filevalues<-lapply(chrdata[i1:i2,10],function(x){chrcats[which(sub("_[[:digit:]]*$","",x,perl=TRUE)==fileOrder)]})
        files<-do.call(paste,c(as.list(sort(unique(as.character(unlist(filevalues))))),sep="-"))
        chro<-chrdata[i1,1]
        c(chro,summit,files)
    },peaks[1,],peaks[2,],MoreArgs=list(width=width))
    data<-data.frame(t(output))    
    colnames(data)<-c("chro","start","end","tags")
    data
    #apply(data,1,function(x) gregexpr(x[4],)
    }

#' tags
#'
#' Recives a list of files and catagories and generates a unified set of peaks
#'
#' @param filelist c(file1,file2,file3)
#' @param cats c(cat1,cat2,cat3) ; each cat relates to a single file in order
#' @param overlapWidth The width used to genearte the overal
#' @param outputWidth +/- value from the estimated peak summit (twice that of \code{\link{overlapWidth}})
#' @export
#' @template authorTemplate
tags<-function(fileList,cats,overlapWidth,outputWidth,macsCutoff=0){
    chrcats<-as.character(unlist(cats))
    temp<-as.character(do.call(rbind,lapply(fileList,read.table,header=TRUE,nrow=1))$name)
    fileOrder<-unlist(lapply(temp ,function(x){substr(x,1,nchar(x)-2)}))
    rm(temp)
    dataset<-do.call(rbind,lapply(fileList,read.table,header=TRUE))
    dataset<-dataset[with(dataset,order(chr,abs_summit)),]
    idex<-(as.numeric(as.character(dataset$X.log10.pvalue.)) >macsCutoff)
    dataset<-dataset[idex,]
    chrdata<-as.matrix(dataset)
    l<-nrow(dataset)
    t1<-dataset[1:l-1,]
    t2<-dataset[2:l,]
    #b<-mapply(all,chr=t1$chr==t2$chr , summit=abs(t1$abs_summit-t2$abs_summit)<overlapWidth)
    b<-(t1$chr==t2$chr & abs(t1$abs_summit-t2$abs_summit)<overlapWidth)
    prePeaks<-unlist(lapply(which(b==FALSE),function(x){c(x,x+1)}))
    peaks<-matrix(c(1,prePeaks[1:length(prePeaks)-1]),nrow=2)    
    outputFile(peaks,chrdata,chrcats,fileOrder,outputWidth)}

#' testTags
#'
#' A qick test
#' 
#' @template authorTemplate
testTags<-function(file="test.bed"){
    cats<-read.table("/home/griffita/Dropbox/UTX-Alex/jan/catagories")
    prefix<-"/mnt/brand01-00/mbrand_analysis//peaks/october/"
    suffix<-"/combined_mock_peaks.xls"
    fileList<-apply(cats,1,function(x){paste(prefix,x,suffix,sep="")})
    data<-tags(fileList,cats,600,150)
    write.table(data,file,quote=FALSE,row.names=FALSE,col.names = FALSE)}
alexjgriffith/mulcal documentation built on May 10, 2019, 8:53 a.m.