# provide extremaS matrix, and x = c(x1,x2), two areas to search between,
# will return the lowest minima between those two extrema.
findDivs = function(ind, extremaS) {
x1=ind[1]
x2=ind[2]
res = which(extremaS[x1:x2,3] == min(extremaS[x1:x2,3]))
return(x1+res[1]-1);
}
#Given some data and a quantile, fit a gamma distribution to the data
#And return the value of quantile for that paramaterized distribution.
fitGammaCutoff = function(d, quant=0.9) {
v = var (d);
m = mean(d);
theta = v/m;
kappa = m/theta;
cutoff = qgamma(quant, shape=kappa, scale=theta)
return(cutoff);
}
#####################################################
#Define Broad peaks
#####################################################
defineBroadPeaks = function(chrom, densityEstimate, windowCoords, scratchDir, looseQuantile,windowStep) {
cat("Defining broad peaks...\t");
peakCalls = Rle(densityEstimate>looseQuantile)
#restrict peak calls to only those that span a certain distance without dropping below the threshold.
rTrueCumLen = cumsum(runLength(peakCalls))
choose = (which(runValue(peakCalls) & runLength(peakCalls) > 100/windowStep)-1)
if (length(choose) <1 ) { #found no broad peaks
return(NULL);
}
#pick out the starts and ends of the peaks we've selected
peakStarts = rTrueCumLen[choose]
peakEnds = rTrueCumLen[choose]+runLength(peakCalls)[choose+1]
length(peakStarts) #how many peaks ?
options(scipen=15) #don't print scientific notation
highScores = sapply(mapply(seq, peakStarts, peakEnds), function(x) { return(max(densityEstimate[x])); } )
#convert scores to the UCSC bed format 1-1000 scale.
highScores = round( (highScores/max(highScores)) * 1000 ); #signif(highScores,4))
write(file=paste(scratchDir, "/", chrom, ".peaks.b.bed", sep=""), rbind(chrom, windowCoords[peakStarts], windowCoords[peakEnds], 1:length(peakStarts), highScores, ncol=5, sep="\t"); toc();
peakCoords = cbind(peakStarts, peakEnds)
return(peakCoords);
}
#####################################################
#Define Narrow peaks
#####################################################
defineNarrowPeaks = function(chrom, densityEstimate, windowCoords, scratchDir, peakCoords) {
cat("Defining narrow peaks...\t",chrom);
peakList = apply(peakCoords, 1, findPeaks, densityEstimate, windowCoords)
peaks = do.call(rbind, peakList)
#convert scores to the UCSC bed format 1-1000 scale.
peaks[,3] = round( ( peaks[,3]/max( peaks[,3])) * 1000 );
write(file=paste(scratchDir, "/", chrom, ".peaks.dips.bed", sep=""), rbind(chrom, windowCoords[peaks[,1]], windowCoords[peaks[,2]], 1:nrow(peaks), peaks[,3]), ncol=5, sep="\t"); toc();
}
pkFig = function(extremaS) {
val = densityEstimates2[peakStart:peakEnd]
par(mfrow=c(3,1), mar=c(2,3,2,1))
plot(val, type="l", lwd=2, main="signal")
abline(v=deriv1Zeros, col=rgb(.6,.6,.9))
abline(v=tips, col="magenta");
points(extremaS[extremaS[,2]==-1,1], extremaS[extremaS[,2]==-1,3], bg="red", pch=25, cex=2)
points(extremaS[extremaS[,2]==1,1], extremaS[extremaS[,2]==1,3], bg="darkgreen", pch=24, cex=2)
points(divMatrix, rep(max(peakValues),length(divMatrix)), lwd=2)
plot(deriv1, type="l", col="blue", main="1st derivative")
abline(h=0, col=rgb(.8,.8,.8))
abline(v=deriv1Zeros, col=rgb(.6,.6,.9))
plot(deriv2, type="l", col="red", main="2nd derivative")
abline(h=0, col=rgb(.8,.8,.8))
abline(v=deriv1Zeros, col=rgb(.6,.6,.9))
#abline(v=deriv1Zeros[deriv2[deriv1Zeros] <=0], col="cyan")
}
killdip = function(zen, extremaS) {
if (nrow(extremaS) <4) { return(NULL); }
if (zen == 2) { return(3); }
if (zen == nrow(extremaS)-1 ) { return( nrow(extremaS)-2); }
#if (extremaS[zen+2,3] > extremaS[zen-2,3]) { return (zen+1); } #merge this peak with the larger neighboring peak
if (abs(extremaS[zen+2,1]-extremaS[zen,1]) < abs(extremaS[zen-2,1]-extremaS[zen,1])) { return (zen+1); }
return(zen-1);
}
symdiff = function(x) {
d = diff(c(x, rep(x[length(x)], 1)) , lag=1) +
diff(c(x, rep(x[length(x)], 2)) , lag=2) +
diff(c(x, rep(x[length(x)], 3)) , lag=3) +
diff(c(rep(x[1], 1), x) , lag=1) +
diff(c(rep(x[1], 2), x) , lag=2) +
diff(c(rep(x[1], 3), x) , lag=3);
return(d/6);
}
#Function takes a vector with coordinates of the start and end of a region,
#and returns a vector of the tips in that region
#depending on some tweakable parameters.
showfig=TRUE;
findPeaks = function(broadPeakCoords, densityEstimate, windowCoords, showfig=FALSE) {
#cat(broadPeakCoords[1], "\t"); #debug information
peakStart = broadPeakCoords[1];
peakEnd = broadPeakCoords[2];
peakWidth = windowCoords[peakEnd] - windowCoords[peakStart]
peakValues = densityEstimate[peakStart:peakEnd]
mpv = mean(peakValues)
deriv1 = symdiff(peakValues) #first derivative of signal
deriv2 = symdiff(deriv1) #second derivative of signal
signalRle = Rle(peakValues >= 0)
deriv1Rle = Rle(deriv1 >= 0)
deriv1Zeros = cumsum(runLength(deriv1Rle))
tips = deriv1Zeros[deriv2[deriv1Zeros] < -mpv*.01]; #peakValues[deriv1Zeros] > strictQuantile &
dips = c(1,na.omit(deriv1Zeros[deriv2[deriv1Zeros] > mpv*.01]), length(peakValues))
if (length(tips) == 0) { return(NULL); }
#build a matrix with all the information about the tips and dips:
extrema = rbind(cbind(na.omit(tips), 1, peakValues[tips], deriv1[tips], deriv2[tips]), cbind(na.omit(dips), -1, peakValues[dips], deriv1[dips], deriv2[dips]))
extremaS = extrema[order(extrema[,1]),]
#In a legitimate matrix, the order will go: dip, tip, dip, tip, dip...etc.
#If there are two dips or tips in a row, we should delete one.
badExtrema = which(runLength(Rle(extremaS[,2])) > 1)
if (length(badExtrema) > 0) {
#two or more of the same extrema in a row: we should only keep one, but which one? The best one.
cs = cumsum(runLength(Rle(extremaS[,2])))
runs = mapply(seq, c(0, cs[-length(cs)])+1, cs, SIMPLIFY=FALSE)
todelete = lapply(runs, function(x) { #keep only the highest tip or the lowest dip.
keep = which(extremaS[x,3] == abs(max(extremaS[x,3] * extremaS[x,2])))[1];
return(setdiff(x, x[keep]));
})
#eliminate bad extrema
extremaS = extremaS[-unlist(todelete), ]
#reconstitute the dips and tips vectors with the new extrema.
dips = extremaS[extremaS[,2] == -1,1]
tips = extremaS[extremaS[,2] == 1,1]
}
#Now, we want to only keep the tips that have sufficient peakStrength,
#which measures not the total peak height, but how much it rises above its local dips.
extremaDeriv = diff(extremaS[,3])
peakRise = (abs(extremaDeriv [ which (extremaS[,2] == 1)-1]) + abs(extremaDeriv [ which (extremaS[,2] == 1)]))/2
peakStrength = peakRise / peakValues[tips]
#delete any tip with low peakStrength
#each tip has two dips, we want to delete one of them. Which one? Perhaps the furthest away;
#this is what function killdip() does.
delete = peakStrength < .5
i = order(peakStrength)[1]
while (peakStrength[i] < .5) {
if (length(peakStrength) == 1) { return(NULL); }
#cat(peakStrength[i]); #debug information
dipToDelete = killdip(which(extremaS[,2]==1)[i],extremaS)
extremaS = extremaS[-c(which(extremaS[,2]==1)[i], dipToDelete), ]
extremaDeriv = diff(extremaS[,3])
dips = extremaS[extremaS[,2] == -1,1]
tips = extremaS[extremaS[,2] == 1,1]
peakRise = (abs(extremaDeriv [ which (extremaS[,2] == 1)-1]) + abs(extremaDeriv [ which (extremaS[,2] == 1)]))/2
peakStrength = peakRise / peakValues[tips]
i = order(peakStrength)[1]
}
#now take the final set of peaks and convert it back to correct coordinates.
divMatrix = cbind(dips[-length(dips)], dips[-1])
peaksFound = peakStart + divMatrix
if(showfig) { pkFig(extremaS); }
return(cbind(peaksFound, peakValues[tips]))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.