orderAllLinkageGroups.func <- function(linkageGroupList,
strandStateMatrix,
strandFreqMatrix,
strandReadCount,
whichLG=NULL,
saveOrdered=NULL,
orderCall='greedy',
randomAttempts=75,
nProcesses = 1,
verbose=TRUE)
{
combineZeroDistContigs <- function(linkageStrands, rawStrandTable, lg)
{
#Filter out borderline calls:
contigNames <- rownames(linkageStrands)
rawStrandTable <- rawStrandTable[contigNames, ]
linkageStrands[which(abs(rawStrandTable) > 0.2 & abs(rawStrandTable) < 0.1 ) ] <- NA
linkageMat <- linkageStrands
linkageStrands <- data.frame(linkageStrands)
linkageStrands <- data.frame(lapply(linkageStrands, function(x) factor(x, levels=c(1,2,3))))
rownames(linkageStrands) <- contigNames
linkRows <- apply(linkageStrands, 1, function(x) length(which(is.na(x))) != ncol(linkageStrands))
linkCols <- apply(linkageStrands, 2, function(x) length(which(is.na(x))) != nrow(linkageStrands))
linkageStrands <- linkageStrands[linkRows , linkCols]
linkageMat <- linkageMat[linkRows, linkCols]
##Combine zero dist contigs:
strandDist <- as.matrix(daisy(linkageStrands))
mergedContigs <- list()
beenMerged <- vector()
mergedStrands <- matrix(nrow=nrow(linkageStrands), ncol=ncol(linkageStrands))
groupCount <- 1
for(contig in rownames(linkageStrands))
{
#Only merge contigs not already pulled in by other merges:
if(!contig %in% beenMerged)
{
toMerge <- which(strandDist[contig,] == 0)
#And don't pull in contigs that are present in toMerge if they've already been asigned
toMerge <- toMerge[!names(toMerge) %in% beenMerged]
beenMerged <- append(beenMerged, names(toMerge))
mergedContigs[[paste('LG', lg, '.', groupCount, sep='')]] <- names(toMerge)
mergedStrands[groupCount,] <- linkageMat[contig,]
groupCount <- groupCount +1
}
}
colnames(mergedStrands) <- colnames(linkageMat)
mergedStrands <- mergedStrands[rowSums(is.na(mergedStrands)) != ncol(mergedStrands),]
orderedContigMatrix <- matrix(c(unlist(lapply(1:length(mergedContigs),
function(x) rep(names(mergedContigs[x]),
length(mergedContigs[[x]]) ))),
unlist(mergedContigs)), ncol=2)
contigsByLG <- sapply(orderedContigMatrix[,1], function(x){strsplit(x, '[.]')[[1]]})
contigStarts <- sub('-.*', '', sub('.*:', '', orderedContigMatrix[,2]))
orderedContigMatrix <- orderedContigMatrix[order(contigsByLG[1,], as.numeric(contigsByLG[2,] ), as.numeric(contigStarts)),]
orderedContigMatrix <- ContigOrdering(orderedContigMatrix)
if(!is(mergedStrands, 'matrix'))
{
mergedStrands <- t(as.matrix(mergedStrands))
rownames(mergedStrands) <- names(mergedContigs)
mergedStrands <- StrandStateMatrix(mergedStrands)
}else{
rownames(mergedStrands) <- names(mergedContigs)
mergedStrands <- StrandStateMatrix(mergedStrands)
}
return(list(mergedStrands=mergedStrands, contigKey=orderedContigMatrix))
}
if(is.null(whichLG)){whichLG=seq_len(length(linkageGroupList))}
orderedGroups <- matrix(nrow=0, ncol=2)
plotGroups <- vector("list", length(whichLG))
for(lg in seq_len(length(whichLG)))
{
if(verbose){message(paste('-> Ordering fragments in LG', whichLG[lg], sep=""))}
if(length(linkageGroupList[[whichLG[lg]]]) > 1)
{
linkageGroup <- linkageGroupList[[whichLG[lg]]]
linkageGroupReadTable <- strandStateMatrix[linkageGroup,]
zeroGroups <- combineZeroDistContigs(linkageGroupReadTable, strandFreqMatrix, whichLG[lg])
#Make a contig Weight vector
ordMat <- cbind(zeroGroups[[2]], apply(strandReadCount[which(rownames(strandReadCount) %in% zeroGroups[[2]][,2] ),] , 1, median))
#Then make a LG weight by taking the sum of all contigs within that LG, and order the linkageGroupTable based on the deepest LG
uniqueZeros <- unique(ordMat[,1])
orderZeros <-sapply(uniqueZeros, function(x) sum(as.numeric(ordMat[which(ordMat[,1] == x),3])))
orderZeros <- names(sort(orderZeros, decreasing=TRUE))
#solves issue that subsetting one value matrix coreces it to a vector and causes crash
if(nrow(zeroGroups[[1]]) > 1)
{
linkageGroupReadTable <- zeroGroups[[1]][orderZeros,]
}else{
linkageGroupReadTable <- zeroGroups[[1]]
}
#Choose which ordering method to use
if(orderCall == 'greedy')
{
outOfOrder <- orderContigsGreedy(linkageGroupReadTable, randomAttempts=randomAttempts,nProcesses = nProcesses,verbose=verbose)
}else if (orderCall == 'TSP')
{
outOfOrder <- orderContigsTSP(linkageGroupReadTable, reps=randomAttempts)
}else{
stop('###### WARNING! orderCall parameter not recognized! No ordering Performed. ######')
}
mergedGroups <- matrix(nrow=0, ncol=2)
for(gp in seq_len(length(outOfOrder[[1]]))) {
mergedGroups <- rbind(mergedGroups, ordMat[which(ordMat[,1] == outOfOrder[[1]][gp]),1:2] )
}
orderedGroups <- rbind(orderedGroups, mergedGroups)
plotGroups[[lg]] <- StrandStateMatrix(outOfOrder[[2]])
if(!(is.null(saveOrdered)))
{
#find fragment names from lg
chromosome <- sort(table(sapply(seq_len(length(linkageGroupList[[whichLG[lg]]])), function(x) strsplit(linkageGroupList[[whichLG[lg]]][x] ,':')[[1]][1])), decreasing=TRUE)
#find predominant fragment name
chromosome <- round(chromosome[1]/length(linkageGroupList[[whichLG[lg]]])*100, digits=1)
plotFrame <- data.frame(outOfOrder[[2]])
plotFrame <- data.frame(lapply(plotFrame, function(x) factor(x, levels=c(1,2,3))))
rownames(plotFrame) <- rownames(outOfOrder[[2]])
similarLinkageStrands <- as.matrix(1-daisy(plotFrame))
diag(similarLinkageStrands) <- 1
if(nrow(similarLinkageStrands) > 1)
{
suppressWarnings(heatmap.2(similarLinkageStrands,
breaks= (seq(0:11)-1)/11,
Rowv=NA,
Colv=NA,
dendrogram="none",
revC=TRUE,
col=colorRampPalette(c('#a50026','#ffffbf','#313695'))(11),
trace='none',
main=paste(orderCall, '-ordered LG', whichLG[lg], '\n main fragment: ', names(chromosome), ' (', chromosome, '%)', sep="")))
}
}
}else{
groupName <- paste('LG', whichLG[lg], '.1', sep='')
linkageGroup <- linkageGroupList[[whichLG[lg]]]
orderedGroups <- rbind(orderedGroups, c(groupName, linkageGroup))
linkageGroupReadTable <- t(as.matrix(strandStateMatrix[linkageGroup,]))
rownames(linkageGroupReadTable) <- groupName
plotGroups[[which(whichLG == whichLG[lg])]] <- StrandStateMatrix(linkageGroupReadTable)
}
}
rownames(orderedGroups) <- NULL
colnames(orderedGroups) <- c('LG', 'contig')
rownames(orderedGroups) <- sapply(seq_len(nrow(orderedGroups)), function(x) strsplit(orderedGroups[x,], "[.]")[[1]][1])
orderedGroups <- ContigOrdering(orderedGroups)
for(lg in whichLG)
{
lgName <- paste("LG", lg, sep="")
orderMyLGs <- orderedGroups[which(rownames(orderedGroups) == lgName),2]
names(orderMyLGs) <- NULL
linkageGroupList[[lg]] <- orderMyLGs
}
plotGroups <- StrandStateList(plotGroups, names=paste('LG', whichLG, ' StrandStateMatrix', sep=''))
linkageGroupList <- LinkageGroupList(linkageGroupList, names=paste('LG', seq_len(length(linkageGroupList)), sep=''))
return(list(orderedGroups, plotGroups, linkageGroupList))
}
####################################################################################################
#' Function to call contig ordering algorithms iteratively across each linkage group element
#' @useDynLib contiBAIT
#' @param linkageGroupList list of vectors, each specifying which contigs belong in which linkage group (product of clusterContigs)
#' @param strandStateMatrix table of strand calls for all contigs (product of preprocessStrandTable)
#' @param strandFreqMatrix table of W:C read proportions (used for QC) (product of strandSeqFreqTable[[1]])
#' @param strandReadCount table of read counts (product of strandSeqFreqTable[[2]])
#' @param whichLG vector of integers specifying the element(s) of linkageGroupList to be ordered (i.e. which specific linkage groups to try to order). Default is all LGs.
#' @param saveOrdered Will return a pdf of heatmaps for each linkage group; String entered becomes the fileName (default is saveOrderedPDF=FALSE)
#' @param orderCall currently either 'greedy' for greedy algorithm or 'TSP' for travelling salesperson alogrithm (default is 'greedy')
#' @param verbose Pringts messages to the terminal. Default is TRUE
#' @param randomAttempts itenger specifying number of iterations of either the Monte Carlo or the 2-opt TSP solver to identify the best ordering. Default is 75, but higher values (eg 1000) make more sense for the TSP.
#' @param nProcesses number of processes to attempt ordering in parallel
#' @aliases orderAllLinkageGroups orderAllLinkageGroups,orderAllLinkageGroups-LinkageGroupList-StrandStateMatrix-StrandFreqMatrix-StrandReadMatrix-method
#' @rdname orderAllLinkageGroups
#'
#' @return a list containing a ContigOrdering object consisting of ordered contigs with linkage group names, a StrandStateList consisting of StrandStateMatrices for each LG, and a
#' LinkageGroupList consisting of reordered contigs within each list element.
#'
#' @example inst/examples/orderAllLinkageGroups.R
#' @export
#' @include AllClasses.R
#' @import Rcpp
#' @import BH
#' @importFrom cluster daisy
#' @importFrom gplots heatmap.2
####################################################################################################
setMethod('orderAllLinkageGroups',
signature = signature(linkageGroupList='LinkageGroupList', strandStateMatrix= 'StrandStateMatrix', strandFreqMatrix='StrandFreqMatrix', strandReadCount='StrandReadMatrix'),
definition = orderAllLinkageGroups.func
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.