# SPAN-TCR
# Alex Xu
# alex.m.xu@gmail.com
#' Comparison function between two sets of TCRs generated by SPANTCR. Compatible between two paired sets, or two single gene sets.
#'
#' @param baseset CDR3Breakdown object 1
#' @param compset CDR3Breakdown object 2
#' @param limit Cumulative distance at which to stop comparing CDR3s
#' @param ticks Number of ticks to use, default 100
#' @param functionmatrix Matrix of amino acid distance metrics, default modified BLOSUM
#' @param fulloutput T/F if all TCRs in compset should be saved
#'
#' @return List containing the cumulative distance between each TCR of base set and either the top closest k-mers in compset or all k-mers
#' @export
#'
#' @examples \dontrun{ComparisonFunction(SPANTCROutput, SPANTCROutput, 100, FineBinTicks, BLOSUMCappedDiff, F)}
ComparisonFunction <- function(baseset, compset, limit=100, ticks=FineBinTicks, functionmatrix=BLOSUMCappedDiff, fulloutput=F)
{
if(baseset@Metadata$FileType!=compset@Metadata$FileType){
stop("Incorrect CDR3Breakdown objects")
}
else{
if(baseset@Metadata$FileType!="Paired"){
baseCDR3s <- baseset@CloneData$CDR3
compCDR3s <- compset@CloneData$CDR3
} else {
baseCDR3s <- paste(baseset@CloneData$TRACDR3, baseset@CloneData$TRBCDR3, sep="/")
compCDR3s <- paste(compset@CloneData$TRACDR3, compset@CloneData$TRBCDR3, sep="/")
}
AllElements <- unique(c(baseset@Elements, compset@Elements))
AllRelMatrix <- sapply(AllElements, function(baseelement)
unlist(sapply(AllElements, function(compelement)
sum(functionmatrix[cbind(match(strsplit(baseelement,NULL)[[1]],AminoAcidFilter[["AA"]]),
match(strsplit(compelement,NULL)[[1]],AminoAcidFilter[["AA"]]))]))), USE.NAMES = T)
BaseOriginsMasterOrder <- sort(unique(baseset@Output$Origin))
BaseData <- baseset@Output[order(baseset@Output$Origin),]
CompData <- compset@Output[order(compset@Output$Origin),]
# Global order to be used for comp origins
CompOriginsMasterOrder <- sort(unique(compset@Output$Origin))
# Error limit
# Initialize distances between baseoriginsmasterorder and comporiginsmasterorder
InitialDistancePer <- rep(0, length(CompOriginsMasterOrder))
CumulativeDistanceMatrix <- lapply(BaseOriginsMasterOrder, function(bybase) InitialDistancePer)
CumulativeDistances <- data.table::data.table(matrix(data=0, nrow=length(CompOriginsMasterOrder),
ncol=length(BaseOriginsMasterOrder),
dimnames = list(compCDR3s[CompOriginsMasterOrder],
baseCDR3s[BaseOriginsMasterOrder])))
BlankMatrix <- CumulativeDistanceMatrix
# Blank memory to start TRUE means ok, FALSE means exceeded error limit
InitialMemory <- rep(TRUE, length(CompOriginsMasterOrder))
memory <- lapply(BaseOriginsMasterOrder, function(bybase) InitialMemory)
# Indexes of good memories
indexmemory <- lapply(memory, function(index) which(index))
CompSearchAvailable <- CompOriginsMasterOrder
goodmemories <- lapply(indexmemory, function(index) which(as.numeric(as.character(CompSearchAvailable)) %in% index))
TickWidth <- 0.01
newfailindex <- vector("numeric")
NoMatchList <- vector("numeric")
NoMatchPoints <- vector("numeric")
for(tick in ticks)
{
EmptyOriginsMatrix <- lapply(BaseOriginsMasterOrder, function(bybase) InitialDistancePer)
if(length(NoMatchList!=0)){EmptyOriginsMatrix <- EmptyOriginsMatrix[-NoMatchList]}
# Everything is based on temporary order unless specified
# remove failures from search data
BaseData <- BaseData[!(Origin %in% NoMatchList)]
# bases set of data
BaseSearch <- BaseData[dplyr::near(Tick,tick)]
# narrow list of base origins used if there are no matches
# BaseOriginsMasterOrder <- unique(BaseSearch$Origin)
# comparison set of data
CompSearch <- CompData[dplyr::near(Tick,tick)]
# probability of base windows appearing (delta function)
# BaseSearchWeights <- BaseSearch$Probability
# windows that are found in base random order
BaseSearchWindows <- BaseSearch$Window
# corresponding base origins
BaseSearchOrigins <- BaseSearch$Origin
# probability of base windows appearing w/ weight (divide by base probability later)
BaseSearchWeights <- BaseSearch$WeightProbability
# given BaseOriginsMasterOrder order, where do these origins contribute window(s)
BaseSearchOriginIndex <- lapply(unique(BaseSearchOrigins), function(origin) which(BaseSearch$Origin %in% origin))
# given BaseOriginsMasterOrder order, how much does each window contribute to origin
BaseSearchOriginRelWeight <- lapply(unique(BaseSearchOrigins), function(origin) BaseSearchWeights[BaseSearch$Origin==origin]/sum(
BaseSearchWeights[BaseSearch$Origin==origin]))
indexmemoriesgood <- which(sapply(indexmemory, length)!=0)
EmptyCompSearch <- lapply(BaseSearch$Window, function(bybase) rep(0, length(CompSearch$Window)))
# comparison windows possible
CompSearchWindows <- CompSearch$Window
# comparison origins existing
CompSearchAvailable <- CompSearch$Origin
# for each base origin, which comparison windows come from available comparison windows
CompSearchOriginMemory <- lapply(indexmemory, function(index) which(as.numeric(as.character(CompSearchAvailable)) %in% index))
# for each base window, which comparison windows come from available comparison windows
CompSearchWindowMemory <- lapply(BaseSearchOrigins, function(origin) CompSearchOriginMemory[[match(origin, indexmemoriesgood)]])
# for each base window, find list of compatible comparison windows by index
CompSearchIndexMemorybyOrigin <- lapply(BaseSearchOrigins, function(origin) indexmemory[[origin]])
# given CompOriginsMasterOrder order, where do these origins contribute window(s)
CompSearchOriginsIndex <- lapply(CompOriginsMasterOrder, function(origin) which(CompSearch$Origin %in% origin))
# list data to match bases to still compatible comparison windows
BaseSearchWindowsCompatible <- mapply(list, BaseSearchWindows, CompSearchWindowMemory, EmptyCompSearch, SIMPLIFY=F)
# distances between base window and all remaining compatible comparison windows, random order for both
BasetoCompDistance <- lapply(BaseSearchWindowsCompatible, function(BaseWindow1CompWindowsCompatible2Empty3)
{BaseWindow1CompWindowsCompatible2Empty3[[3]][BaseWindow1CompWindowsCompatible2Empty3[[2]]] <- AllRelMatrix[cbind(rep(match(BaseWindow1CompWindowsCompatible2Empty3[[1]], rownames(AllRelMatrix)),
length(BaseWindow1CompWindowsCompatible2Empty3[[2]])),
match(CompSearchWindows[BaseWindow1CompWindowsCompatible2Empty3[[2]]],
colnames(AllRelMatrix)))]*TickWidth
BaseWindow1CompWindowsCompatible2Empty3[[3]]})#*TickWidth
# list data to combine distances when a comparison contributes multiple windows
BaseCompDistanceIndexMemoryCompatibleOrigins <- mapply(list, BasetoCompDistance, CompSearchIndexMemorybyOrigin, SIMPLIFY=F)
#
DistanceAddedCompressedbyCompOrigin <- lapply(BaseCompDistanceIndexMemoryCompatibleOrigins,
function(Distances1CompatibleOrigins2) sapply(CompSearchOriginsIndex[Distances1CompatibleOrigins2[[2]]],
function(index) min(Distances1CompatibleOrigins2[[1]][index])))
# for each base origin, find the additional distance added per comp origin, to transform to base Master Order
DistancesbySharedBaseOrigin <- lapply(BaseSearchOriginIndex, function(search) DistanceAddedCompressedbyCompOrigin[search])
# for each base origin, pair distances with their weights
DistancesbySharedBaseOriginWithRelWeights <- mapply(list, DistancesbySharedBaseOrigin, BaseSearchOriginRelWeight, SIMPLIFY=F)
# match up each distance with relative weight within list
DistancesbySharedBaseMatchedWithRelWeightinList <- lapply(DistancesbySharedBaseOriginWithRelWeights, function(Distances1Weights2) Map(list, Distances1Weights2[[1]], Distances1Weights2[[2]]))
# extract weighted distance for each base origin
DistanceBetweenBasesMasterOrderCompatibleComps <- lapply(DistancesbySharedBaseMatchedWithRelWeightinList, function(ByBase) Reduce("+", lapply(ByBase, function(Distance1Weight2) mapply("*", Distance1Weight2[[1]], Distance1Weight2[[2]]))))
# BaseSearchMasterOrderRelativeWeight <- mapply(list,BaseSearchOriginIndex, BaseSearchOriginRelWeight, SIMPLIFY=F)
# find new distances from base origins to each compatible comparison origin
if(length(NoMatchList)!=0){DistanceBetweenBasesAvailableComps <- mapply(list, DistanceBetweenBasesMasterOrderCompatibleComps, indexmemory[-NoMatchList], EmptyOriginsMatrix, SIMPLIFY=F)} else {DistanceBetweenBasesAvailableComps <- mapply(list, DistanceBetweenBasesMasterOrderCompatibleComps, indexmemory, EmptyOriginsMatrix, SIMPLIFY=F)}
# if(length(NoMatchList)!=0){DistanceBetweenBasesAvailableComps <- mapply(list, DistanceBetweenBasesMasterOrderCompatibleComps, indexmemory[-NoMatchList], EmptyOriginsMatrix, SIMPLIFY=F)}
CumulativeDistanceAdded <- lapply(DistanceBetweenBasesAvailableComps,
function(Distances1AvailableComps2EmptyMatrix3) {
Distances1AvailableComps2EmptyMatrix3[[3]][Distances1AvailableComps2EmptyMatrix3[[2]]] <- Distances1AvailableComps2EmptyMatrix3[[1]]
Distances1AvailableComps2EmptyMatrix3[[3]]})
CumulativeDistanceBase <- BlankMatrix
if(length(NoMatchList!=0)){CumulativeDistanceBase[-NoMatchList] <- CumulativeDistanceAdded} else {CumulativeDistanceBase <- CumulativeDistanceAdded}
CumulativeDistanceMatrix <- mapply("+",CumulativeDistanceMatrix, CumulativeDistanceBase, SIMPLIFY=F)
# for each base origin, which comparison origins are still valid
memory <- lapply(CumulativeDistanceMatrix, function(windows) windows<limit)
# for each base origin, obtain indexing of available comparison windows
indexmemory <- lapply(memory, function(index) which(index))
# detect no match origins
newfailindex <- which(sapply(indexmemory, function(x) length(x)==0))
# detect new failures
# failrecord <- c(failrecord)
# record no match origins
NoMatchList <- c(NoMatchList,newfailindex)
# record when no matches remained
NoMatchPoints <- c(NoMatchPoints,rep(tick, length(newfailindex)))
# if(length(NoMatchList!=0)){CumulativeDistanceMatrix <- CumulativeDistanceMatrix[-newfailindex]}
cat(paste0(tick, "/ "))
}
CumulativeDistanceComparisons <- data.table::data.table(BaseCDR3 = baseCDR3s,
Top5CompIndex = lapply(CumulativeDistanceMatrix, function(scorelist) order(scorelist)[1:5]),
Top5CompScore = lapply(CumulativeDistanceMatrix, function(scorelist) sort(scorelist)[1:5]))
if (fulloutput) {
CumulativeDistanceComparisons[, FullData := CumulativeDistanceMatrix]
}
CumulativeDistanceComparisons[, Top5Chain := lapply(Top5CompIndex, function(topindex) compCDR3s[topindex])]
CumulativeDistanceComparisons[, BestChain := sapply(Top5Chain, function(x) x[[1]])]
CumulativeDistanceComparisons[, BestScore := sapply(Top5CompScore, function(x) x[[1]])]
CumulativeDistanceComparisons
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.