#!/usr/bin/env R
#
# This file is part of peakAnalysis,
# http://github.com/alexjgriffith/alpha-score/,
# and is Copyright (C) University of Ottawa, 2015. It is Licensed under
# the three-clause BSD License; see LICENSE.txt.
# Author : Alexander Griffith
# Contact: griffitaj@gmail.com
#
#' Genomic Regions
#'
#' Implements the Stanford GREAT genomic region generation
#' @export
genomeDomains<-function(chrom,tss,strand,proxUp,proxDown,distal,n=length(chrom)){
extendsMax<-function(n,chrom,basalDomains,tss,distal){
withinChrom<-(chrom[n]==chrom)
a<-basalDomains[withinChrom,1]-tss[n]
ap<-a[a> basalDomains[n,2]-tss[n] & a<distal]
ab<-a[a>0 & a< basalDomains[n,2]-tss[n]]
if(length(ab))
lm<-basalDomains[n,2]
else if(length(ap))
lm<-tss[n]+min(ap)
else
lm<-tss[n]+distal
b<-tss[n]-basalDomains[withinChrom,2]
bp<-b[ b> tss[n]-basalDomains[n,1] & b<distal]
bb<-b[b>0 & b< tss[n]-basalDomains[n,1]]
if(length(bb))
um<-basalDomains[n,1]
else if(length(bp>0))
um<-tss[n]-min(bp)
else
um<-tss[n]-distal
geneDomain<-c(um,lm)
geneDomain
}
determ<-function(csl,gene,fun){
if(as.character(fun)=="+")
psl<-c(csl,as.character(gene))
else{
psl<-csl[which(csl != as.character(gene))]}
psl
}
basalDomains<-t(apply(cbind(tss-strand*proxUp,tss+strand*proxDown),1,sort))
genomeDomains<-do.call(rbind,lapply(seq(n),extendsMax,chrom,basalDomains,tss,distal))
return(genomeDomains)
}
#' Genomic Regions
#' @export
genomicRegions<-function(
a,chrom,
chroms=unique(chrom)){
beg<-list()
n=0
b<-cbind(c(as.numeric(a[,1]),as.numeric(a[,2])),seq(length(a[,1])),unlist(lapply(c(0,1),function(x)rep(x,length(a[,1])))))
for(cro in chroms){
region=(chrom==cro)
k<-b[region,]
c<-k[order(k[,1]),]
buffer<-c(c[1,2])
reg<-list()
start<-c[1,1]
for(i in seq(2,length(which(region))*2)){
reg<-append(reg,list(c(cro,start,c[i,1],buffer)))
if(c[i,3]==0){
buffer<-c(buffer,c[i,2])
}else{
buffer<-buffer[c[i,2]!=buffer]}
start<-c[i,1]
}
n<-n+1
beg<-append(beg,reg[unlist(lapply(reg,function(x) length(x)>3))])
#beg<-append(beg,reg)
}
return(beg)
}
#' Peak Gene Regions
#'
#' compare peak locations to gene regions generated by genomicRegions
#' Return a list of genes which corospond to the overlaped gene regions
#' @examples
#' heightFile<-"~/Dropbox/UTX-Alex/jan/combined_heights.bed"
#' geneList<-read.delim(paste(fileLocation,"hg19.RefSeqGenes.csv",sep=""))
#' data<-loadHeightFile(heightFile)$data
#' reg<-mapply(function(pc,loc)buildRegions(data,pc,loc)[,1],
#' list(1,1,3,c(3,5),c(3,7),c(3,7),7),
#' list("top","bottom","top",c("top","top"),c("top","bottom"),c("top","bottom"),"top"))
#' chrom<-as.character(geneList$chrom)
#' tss<-as.numeric(geneList$txStart)
#' start<-Sys.time()
#' a<-genomicRegions(chrom,tss,5000,1000,50000)
#' Sys.time()-start
#' #
#' jurkGenes<-peakGeneRegions(bedData[reg[,2],],a,geneList)
#' filenames<-lapply(cbind("erythroid","t-all","ecfc","other","hspc","meka","diff"),paste, "-genes.txt",sep="")
#' for (i in seq(7)){
#' genes<-peakGeneRegions(bedData[reg[,i],],a,geneList)
#' write.table(genes,filenames[[i]],quote=FALSE,col.names=FALSE,row.names=FALSE)}
#' @export
peakGeneRegions<-function(bedData,genes,geneList){
aChr<-as.character(lapply(genes,"[[",1))
aStart<-as.numeric(lapply(genes,"[[",2))
aEnd<-as.numeric(lapply(genes,"[[",3))
peaks<-apply(bedData[,2:3],1,mean)
pc<-"chr0"
chroms<-(aChr==pc)
cStart<-aStart[chroms]
cEnd<-aEnd[chroms]
locs<-c()
for (i in seq(length(bedData[,1]))){
if(pc!=as.character(bedData[i,1])){
pc<-as.character(bedData[i,1])
chroms<-which((aChr==pc))
cStart<-aStart[chroms]
cEnd<-aEnd[chroms]
}
gene<-chroms[which(peaks[i]>cStart &peaks[i]<cEnd)]
locs<-c(locs,gene)
}
geneList[
as.numeric(unique(unlist(lapply(genes[unique(locs)],function(x) {x[4:length(x)]}))))
]
#locs
}
#' Nearest Gene
#' @examples
#' heightFile<-"~/Dropbox/UTX-Alex/jan/combined_heights.bed"
#' data<-loadHeightFile(heightFile)$data
#' geneList<-read.delim(paste(fileLocation,"hg19.RefSeqGenes.csv",sep=""))
#' reg<-mapply(function(pc,loc)buildRegions(data,pc,loc)[,1],
#' list(1,1,3,c(3,5),c(3,7),c(3,7),7),
#' list("top","bottom","top",c("top","top"),c("top","bottom"),c("top","bottom"),"top"))
#' a<-geneAssocFlipAux(geneList,bedData,c(50000,500,0,0))
#' names<-mapply(function(a,b) paste(a,b,sep="-"), as.character(geneList$name),as.character(geneList$name2))
#' filenames<-lapply(cbind("erythroid","t-all","ecfc","other","hspc","meka","diff"),paste, "-genes.txt",sep="")
#' for (i in seq(7)){
#' print(filenames[[i]])
#' filename<-filenames[[i]]
#' sequence<-reg[,i]
#' b<-lapply(a,function(x,y) list(x[[1]],x[[2]][which(x[[2]]%in%y)]) ,which(sequence))
#' c<-nearestGene(b,sequence,as.numeric(geneList$txStart),names,bedData)
#' write.table(unique(unlist(c)),filename ,col.names = FALSE,row.names=FALSE,quote=FALSE)
#' }
#' sequence<-ascore(data,1,"top",6)
#' filename<-"erythroid-6-genes.txt"
#' @export
nearestGene<-function(b,sequence,geneStart,name,peak){
c<-do.call(rbind,lapply(b,function(x) cbind(x[[1]],(function(x)if(length(x>0)) x else 0)(x[[2]]))))
sequence2<-intersect(which(sequence),unique(c[,2]))
lapply(sequence2, function(n) name[getMin(abs(geneStart[c[which(c[,2]==n),1]]-peak[n,2]))])
}
#' Rotate List
#' @export
rotateList<-function(b,range){
c<-do.call(rbind,lapply(b,function(x) cbind(x[[1]],(function(x)if(length(x>0)) x else 0)(x[[2]]))))
lapply(do.call(seq,as.list(range)), function(n) c[which(c[,2]==n),1])
}
#' Get Min
#' will return the single value if there is only a single value in the list
#' @export
getMin<-function(x){
if(length(x)>1)
which.min(x)
else
1 #unlist(x)
}
#' Gene Assoc Flip Aux
#' @export
geneAssocFlipAux<-function(geneList,point,bounds){
peak<-(as.numeric(point[,2])+as.numeric(point[,3]))/2
tss<-as.numeric(geneList$txStart)
ess<-as.numeric(geneList$txEnd)
chrom<-as.character(geneList$chrom)
name<-as.numeric(rownames(geneList))#as.character(geneList$name)
lapply(seq(length(name)),function(x) geneAssocFlip(name[x],tss[x],ess[x],chrom[x],bedData,peak,c(50000,0,0,0)))
}
#' Gene Assoc Flip
#' @export
geneAssocFlip<-function(name,tss,ess,chrom,point,peak,bounds){
a<-which(chrom==as.character(point[,1]))
b<-a[which(tss-bounds[1]<peak[a])]
r<-b[which(ess+bounds[3]>peak[b])]
e<-r[c(which(tss-bounds[2]>peak[r]),
which(ess+bounds[4]<peak[r]))]
list(name,e)}
#' Gene Assoc
#' @export
geneAssoc<-function(point,geneList,bounds){
# Currently Returns the enhancer locations
peak<-(as.numeric(point[[2]])+as.numeric(point[[3]]))/2
tss<-geneList$txStart
ess<-geneList$txEnd
chrom<-geneList$chrom
a<-which(chrom==as.character(point[[1]]))
b<-a[which(tss[a]-bounds[1]<peak)]
r<-b[which(ess[b]+bounds[3]>peak)]
e<-r[c(which(tss[r]-bounds[2]>peak),which(ess[r]+bounds[4]<peak))]
e}
#' lene Assoc
#' parallel gene assoc
#' @export
leneAssoc<-function(point,geneList,bounds){
peak<-(as.numeric(point[[2]])+as.numeric(point[[3]]))/2
tss<-geneList$txStart
ess<-geneList$txEnd
chrom<-geneList$chrom
a<-which(chrom==as.character(point[[1]]))
b<-a[which(tss[a]-bounds[1]<peak)]
r<-b[which(ess[b]+bounds[3]>peak)]
locations<-r[c(which(tss[r]-bounds[2]>peak),which(ess[r]+bounds[4]<peak))]
t<-geneList$txStart[unlist(locations)]
gene<-as.character(geneList$name[locations[order(abs(t-peak))]])
if(identical(gene,character(0))){"None"}else(gene)}
#' Min Gene
#' @export
minGene<-function(x,y){
x[order(abs(x-y))]}
#' Gene Association
#'
#' Assosiate each ChIP enrinchement region with the closest peak within a bound defined by the user
#'
#' @param bedData three column data.frame
#' @param geneList a data.frame loaded without a header from the refseq gene data base
#' @param bounds c(A,B,C,D) Region 1 = (tss-A,tss+B) Region 2 = (ess-C ess+D)
#' @param n Number of nodes to use.
#'
#' @examples
#' fileLocation="~/Dropbox/UTX-Alex/jan/"
#' bedData<-read.delim(paste(fileLocation,"combined_sorted.bed",sep=""),header=0)
#' geneList<-read.delim(paste(fileLocation,"hg19.RefSeqGenes.csv",sep=""))
#' genes<-geneAssociation(bedData,geneList, c(50000,0,0,0))
#' write.table(cbind(bedData,unlist(t(lapply(genes, function(x) {if(identical(x,character(0))){"None"} else{x}})))) ,"combined_tagged_genes.bed" ,col.names=FALSE,row.names=FALSE,quote=FALSE,sep="\t")
#'
#' @export
#' @template authorTemplate
geneAssociation<-function(bedData,geneList,bounds,n=FALSE){
if(is.logical(n)){
peak<-(as.numeric(bedData[,2])+as.numeric(bedData[,3]))/2
locations<-apply(bedData,1,geneAssoc,geneList,bounds)
t<-lapply(locations,function(locations) as.character(geneList$name2)[unlist(locations)])
tssGenes<-Map(minGene,t,peak)
a<-lapply( tssGenes, function(x){as.character(geneList$name2[x])})}
else{
cs<-makeForkCluster(n,renice=0)
a<-parApply(cs,bedData,1,leneAssoc,geneList=geneList,bounds=bounds)
stopCluster(cs)}
return (a)}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.