#!/usr/bin/env R
#
# This file is part of CCCA,
# http://github.com/alexjgriffith/CCCA/,
# and is Copyright (C) University of Ottawa, 2015. It is Licensed under
# the three-clause BSD License; see LICENSE.txt.
# Author : Alexander Griffith
# Contact: griffitaj@gmail.com
#' loadBedFile
#'
#' Calls file_length and read_bed from hg19Height.c and returns a 3 column bed file
#' @param file the location of the bed file of interest
#' @return data.frame $chro $start $end
#' @export
#' @template authorTemplate
#' @examples
#' data<-loadBedFile(file="filelocation")
loadBedFile<-function(file){
file<-normalizePath(file)
if(!file.exists(file)){
sprintf("Can't find file %s.",file)}
else{
fileLength<-as.integer(.C("file_length",file,stringLength=integer(1))[[2]])
results<-.C("read_bed",file,chro=character(fileLength),start=integer(fileLength),end=integer(fileLength))
data.frame(chro=results$chro,start=results$start,end=results$end)
}}
#' convertChroms
#'
#' Calls valueChromosome from hg19Height.c which converts a chromosme rank to a string
#' @seealso For the reverse see \code{\link{rankChroms}}
#' @param lis a list of chromosmes to
#' @template authorTemplate
#' @export
convertChroms<-function(lis){
n<-length(lis)
.C("valueChromosomes",character(n),as.integer(n), lis)[[1]]}
#' @export
getChroms<-function(file,n=100){
file<-as.character(normalizePath(file))
if(!file.exists(file)){
stop(paste("Can't find file",file))
}
else{
return(Filter(function(x) x!="", .C("getChroms",file,choms=character(n))$choms))}
}
#' rankChroms
#'
#' Calls rankChromosome from hg19Height.c which converts a chromosme string to its equivelent rank
#' @seealso For the reverse see \code{\link{convertChroms}}
#' @template authorTemplate
#' @export
rankChroms<-function(lis){
n<-length(lis)
.C("rankChromosomes",as.character(lis),as.integer(n), integer(n))[[3]]}
#' getPileUp
#'
#' A minimal wrapper for the pileup function from hg19Height.c
#' returns a integer vector of computed read pileups
#' @param file The raw data file of interest
#' @param bed The preloaded bed infromation including bed$start and bed$end
#' @param chroms a list of chromosomes whos string values have been repalced with ranks
#' @param peakLength The length of the bed data provided
#' @export
getPileUp<-function(file,bed,chroms,peakLength){
start<-as.integer(as.character(bed$start))
end<-as.integer(as.character(bed$end))
peaknum<-as.integer(peakLength)
score<-as.integer(rep(0,peakLength))
#print(sapply(list(start,end,score,chroms),length))
#print(file)
#list(chrom=chroms,start=start,end=end,peaknum=peaknum,score=score)
results<-.C("pileup",file,chrom=chroms,start=start,end=end,peaknum=peaknum,score=score)
results$score
#NULL
}
#' @export
getPeakDensity<-function(file,bed,chroms,peakLength,width){
start<-as.integer(as.character(bed$start))
end<-as.integer(as.character(bed$end))
peaknum<-as.integer(peakLength)
score<-as.integer(rep(0,peakLength*width))
results<-.C("peakDensity",file,chrom=chroms,start=start,end=end,peaknum=peaknum,score=score)
t(matrix(results$score,ncol=peakLength))
}
#' @export
peakDensity<-function(data,rawdata,n=0,width,verbose=FALSE,clust=NULL){
for(file in rawdata){
if(! file.exists(file)){
stop("Can't find file ",file,".")
}
if(verbose)
print(paste("# Raw data file ",file," was found.",sep=""))
}
peakLength<-length(data$chr)
chroms<-as.character(data$chr)
if(n>0){
if(is.null(clust))
cs<-makeForkCluster(n,renice=0)
else
cs<-clust
ret<-parLapply(cs,rawdata,getPeakDensity,data,chroms,peakLength,width)
if(is.null(clust))
stopCluster(cs)
}else{
ret<-lapply(rawdata,getPileUp,data,chroms,peakLength,width)}
ret}
#' PileUp
#'
#' Generates a pile up matrix from a unified set of peaks and a list of raw data sets
#' @template authorTemplate
#' @param data A preloaded bed data.frame which includes slots $chro $start $end
#' @param rawdata a list of raw data files
#' @param n the number of nodes to use. If 0 then the parrallel package is not used
#' @examples
#' # Initialize catagories, files containing raw data
#' cats<-read.table("/home/griffita/Dropbox/UTX-Alex/jan/catagories")
#' prefix<-"/mnt/brand01-00/mbrand_analysis/data_sets/"
#' suffix<-"_sorted.bed"
#' rawdata<-apply(cats,1,function(x){paste(prefix,x,"/",x,suffix,sep="")})
#' # Apply pileUp to peaks found using MACS
#' data<-hg19Sort(loadBedFile(file))
#' # score<-pileUp(data,rawdata,n=22)
#' # cluster the data sets based on the read hights of the peaks
#' # temp<-cor(score)
#' rownames(temp)<-t(cats)
#' colnames(temp)<-t(cats)
#' pdf("test.pdf")
#' plot(hclust(dist(temp)),hang=-1)
#' # Rather than using peaks we can do global analyis
#' # this relies on breaking the genome into bins
#' # human.hg19.genome provides the start and stop point for each genome
#' # bin size desides how large the regions should be
#' binSize=10000
#' regions<-do.call(rbind,
#' apply(read.table("/data/binaries/BEDTools/genomes/human.hg19.genome")[1:24,],1,
#' function(x,step) {y<-seq(1,as.numeric(x[2]),step);
#' cbind(as.character(x[1]),as.character(y),as.character(y+step))} ,binSize))
#' data<-hg19Sort(data.frame(chro=regions[,1],
#' start=as.integer(regions[,2]),
#' end=as.integer(regions[,3])))
#' #score<-pileUp(data,rawdata,n=22)
#' @export
pileUp<-function(data,rawdata,n=0,verbose=FALSE,clust=NULL){
for(file in rawdata){
if(! file.exists(file)){
stop("Can't find file ",file,".")
}
if(verbose)
print(paste("# Raw data file ",file," was found.",sep=""))
}
peakLength<-length(data$chr)
chroms<-as.character(data$chr)
if(n>0){
if(is.null(clust))
cs<-makeForkCluster(n,renice=0)
else
cs<-clust
ret<-matrix(unlist(parLapply(cs,rawdata,getPileUp,data,chroms,peakLength)),nrow=peakLength)
if(is.null(clust))
stopCluster(cs)
}else{
ret<-matrix(unlist(lapply(rawdata,getPileUp,data,chroms,peakLength)),nrow=peakLength)}
ret}
#' hg19Sort
#'
#' Reorders the chro factor in data to that which is outputed by BWA.
#' This allows for the sorting step to be skipped, however if another chromasome is used then a new order must be defined.
#'
#' @param data [chr,start,end]
#' @return The same members of data but sorted on chr and start
#' @template authorTemplate
#' @export
hg19Sort<-function(data){
neworder<-Filter(function(x) x %in% levels(data$chro), strsplit("chr1 chr2 chr3 chr4 chr5 chr6 chr7 chrX chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr20 chrY chr19 chr22 chr21 chrM" ," ")[[1]])
data$chro<-factor(data$chro,neworder)
data<-data[with(data,order(chro,start)),]
data
}
#' Chrom Genome Sort
#'
#' Returns a function that reorders the peak data based on the chromosome order provided
#' @param chromOrder A list of chromosomes to be found in the data
#' @examples
#' sortedChroms<-strsplit("chr1 chr2 chr3 chr4 chr5 chr6 chr7 chrX
#' chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17
#' chr18 chr20 chrY chr19 chr22 chr21 chrM" ," ")[[1]]
#' hg19Sort<-chromGenomeSort(sortedChroms)
#' bedData<-loadBedData("test.bed")
#' sortedBedData<-hg19Sort(bedData)
#' @export
chromGenomeSort<-function(chromOrder){
rf<-function(data){
ls<-levels(data$chro)
neworder<-Filter(function(x) {x %in% ls}, chromOrder)
data$chro<-factor(data$chro,neworder)
data<-data[with(data,order(chro,start)),]
data
}
return(rf)
}
#' getData
#'
#' loads data from file returns a sorted set with a height matrix
#' @export
#' @template authorTemplate
#' @param file peak file
#' @param rawdata a list of raw data files
#' @param n the number of nodes used default=0
getData<-function(file,rawdata,n=0){
data<-hg19Sort(loadBedFile(file))
score<-pileUp(data,rawdata,n=n)
c(data=data,score=score)}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.