## This is the master function for now.
NB <- function(DT, Prior, Likelihood_MV, Training, Explanatory, tolerance){
## Explanatory doesn't do anything so far
## Barcode is always the explanatory variable
## 'Training' refers to the prediction columns.
for(i in seq_along(DT)){
## For each testing/training DT, add tolerance to Q1/Q3
DT[[i]][, library.Q1.min := as.numeric(library.Q1-tolerance)]
DT[[i]][, library.Q1.max := as.numeric(library.Q1+tolerance)]
DT[[i]][, library.Q3.min := as.numeric(library.Q3-tolerance)]
DT[[i]][, library.Q3.max := as.numeric(library.Q3+tolerance)]
Likelihood_MV[[i]][, library.Q1.min := as.numeric(library.Q1-tolerance)]
Likelihood_MV[[i]][, library.Q1.max := as.numeric(library.Q1+tolerance)]
Likelihood_MV[[i]][, library.Q3.min := as.numeric(library.Q3-tolerance)]
Likelihood_MV[[i]][, library.Q3.max := as.numeric(library.Q3+tolerance)]
## Rename 'Barcode' to actual barcode
setnames(Likelihood_MV[[i]], "Barcode", "Training.Barcode", skip_absent=TRUE)
## Find all training DT rows that match testing rows based on Q1/Q3
setkey(DT[[i]],
library.Q1.min, library.Q1.max,
library.Q3.min, library.Q3.max)
setkey(Likelihood_MV[[i]],
library.Q1.min, library.Q1.max,
library.Q3.min, library.Q3.max)
overlap <- foverlaps(DT[[i]], Likelihood_MV[[i]], type="any")
## Remove any duplicate columns starting with 'i'
selectCol <- grep("^i.", colnames(overlap))
overlap <- overlap[, !selectCol, with=F]
overlap[, c("library.Q1.min", "library.Q1.max",
"library.Q3.min", "library.Q3.max") := NULL]
## Compute Gaussian likelihood
overlap[, Likelihood := (
(1/(sqrt(2*pi*Variance))) * exp( (-1/2) * ( (value-Mean)^2) / (Variance))
)]
overlap[, c("Mean", "Variance") := NULL]
setnames(overlap, length(overlap), names(DT)[i])
if(i==1){
setnames(overlap, "value", "Retention.Time.value")
DT_return <- overlap
} else{
overlap[, "value" := NULL]
DT_return <- merge(DT_return, overlap, all=T)
}
}
## Merge 'Prior' with 'DT_return'. Only left join because we want to match
## priors with 'DT_return' entries
DT_return <- merge(DT_return, Prior,
by = c("library.Q1", "library.Q3", "Barcode", "matrix"),
all.x = T)
## Remove extra rows from merged 'Prior'
DT_return <- DT_return[is.na(run) == F]
rm(Prior)
## Calculate posterior probability (minus normalizing denominator, the marginal likelihood)
## Converting likelihoods and prior to a matrix for row product
mat_return <- as.matrix(DT_return)
## Find column names of training variables
names <- which(colnames(mat_return) %in% Training)
prior <- which(colnames(mat_return) %in% "prior")
## Calculate row product
mat_return <- mat_return[, c(names, prior)]
mode(mat_return) = "numeric"
pp <- rowProds(mat_return, na.rm=TRUE)
## Add posterior probability to data table
DT_return[, "posterior" := pp]
rm(mat_return)
## Re-order columns with Barcode & in the front
## iGraph assumes edges are the first 2 columns
extract <- grep("Barcode", colnames(DT_return))
temp <- DT_return[, extract, with=FALSE]
DT_return[, c(extract) := NULL]
DT_return <- cbind(temp, DT_return)
## Maximum weighted bipartite matching for each run
## Only one possible testing (run) barcode can match a single training barcode
## NAs can be produced if an MRM peak doesn't match anything in the training library
## Get length of unique runs (not including NA)
MWBMrange <- sum(!is.na(DT_return[, unique(run)]))
DT_return2 <- vector(mode="list", length=MWBMrange)
for(j in 1:MWBMrange){
## Making temporary data table copies to prevent overwriting via assign
## by reference
tempDT2 <- DT_return[run == unique(run)[j]]
tempDT <- copy(tempDT2)
## Create iGraph object for maximum weighted bipartite matching
## Vertices are 'Barcode' and 'Training.Barcode'
## Prefixes added to Barcode columns to distinguish them for MWBM
tempDT[, Barcode := paste("Run.", Barcode, sep="")]
tempDT[, Training.Barcode := paste("Lib.", Training.Barcode, sep="")]
g <- graph.data.frame(tempDT, directed = T)
## Figure out which vertices should be FALSE (column 1) or TRUE (column 2)
Vin <- which(names(V(g)) %in% tempDT[,Barcode])
Vin <- cbind(Vin, FALSE)
Vout <- which(!(1:length(V(g)) %in% Vin))
Vout <- cbind(Vout, TRUE)
Vall <- rbind(Vin, Vout)
Vall <- Vall[order(Vall[,1]), 2]
V(g)$type <- as.logical(Vall)
## Edge weights are posterior probabilities of classifier
E(g)$weight <- as.numeric(tempDT[,posterior])
MWBM <- max_bipartite_match(g)
## Only interested in matching 'Barcode' to 'Training.Barcode'
myFilter <- tempDT[,Barcode]
MWBM <- MWBM$matching[which(names(MWBM$matching) %in% myFilter)]
MWBM <- data.table(Barcode = names(MWBM),
Training.Barcode = as.vector(MWBM))
## Remove prefixes in both Barcode/Training.Barcode columns
MWBM[, Barcode := substring(Barcode, 5)]
MWBM[, Training.Barcode := substring(Training.Barcode, 5)]
## Only keep row in 'DT_return' corresponding to MWBM
setkey(DT_return, "Barcode", "Training.Barcode")
setkey(MWBM, "Barcode", "Training.Barcode")
DT_return2[[j]] <- DT_return[run == unique(run)[j]][MWBM, nomatch=0]
}
## Convert list to a data table by rows
DT_return2 <- do.call(rbind, DT_return2)
## Figure out which label to assign based on MWBM
setkeyv(DT_return, colnames(DT_return))
setkeyv(DT_return2, colnames(DT_return2))
DT_return3 <- DT_return[DT_return2]
DT_return4 <- DT_return[!DT_return2]
DT_return3[, "NoClass" := ifelse(is.na(posterior) == T | posterior == 0, TRUE, FALSE)]
DT_return3[, "MWBM" := ifelse(is.na(NoClass) == F, TRUE, FALSE)]
DT_return4[, "NoClass" := NA]
DT_return4[, "MWBM" := F]
DT_return <- rbind(DT_return3, DT_return4)
rm(DT_return2, DT_return3, DT_return4, MWBM, myFilter, g, tempDT)
## "Correct" barcode assignment based on MWBM
DT_return[, "Actual_choice" := ifelse(Barcode == Training.Barcode, TRUE, FALSE)]
DT_correct <- DT_return[MWBM == T & Actual_choice == T & is.na(NoClass) == F]
## Incorrect barcode assignment
DT_incorrect <- DT_return[MWBM == T & Actual_choice == F & NoClass == F]
## No assignment
DT_noclass <- DT_return[NoClass == T | is.na(NoClass) == T]
return(list(complete=DT_return,
correct=DT_correct,
incorrect=DT_incorrect,
noclass=DT_noclass))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.