Nothing
#' Compare DVI approaches
#'
#' Compare the efficiency of different computational approaches to DVI.
#'
#' The following methods are available for comparison, through the `methods`
#' parameter:
#'
#' 1. Sequential, without LR updates
#'
#' 2. Sequential, with LR updates
#'
#' 3. Sequential (undisputed) + joint (remaining). Always return the most likely
#' solution(s).
#'
#' 4. Joint - brute force. Always return the most likely solution(s).
#'
#' 5. Like 3, but return winner(s) only if LR > `threshold`; otherwise the empty
#' assignment.
#'
#' 6. Like 4, but return winner(s) only if LR > `threshold`; otherwise the empty
#' assignment.
#'
#' @param dvi A `dviData` object, typically created with [dviData()].
#' @param true A character of the same length as `dvi$pm`, with the true
#' solution, e.g., `true = c("M2", "M3", "*")` if the truth is V1 = M2, V2 =
#' M3 and V3 unmatched.
#' @param refs Character vector with names of the reference individuals. By
#' default the typed members of `dvi$am`.
#' @param methods A subset of the numbers 1,2,3,4,5,6.
#' @param markers If `simulate = FALSE`: A vector indicating which markers
#' should be used.
#' @param threshold An LR threshold passed on to the sequential methods.
#' @param simulate A logical, indicating if simulations should be performed.
#' @param db A frequency database used for simulation, e.g.,
#' `forrel::NorwegianFrequencies`. By default the frequencies attached to
#' `dvi$am` are used.
#' @param Nsim A positive integer; the number of simulations.
#' @param returnSims A logical: If TRUE, the simulated data are returned without
#' any DVI comparison.
#' @param seed A seed for the random number generator, or NULL.
#' @param numCores The number of cores used in parallelisation. Default: 1.
#' @param verbose A logical.
#'
#' @return A list of solution frequencies for each method, and a vector of true
#' positive rates for each method.
#'
#' @examples
#'
#' refs = "R1"
#' db = forrel::NorwegianFrequencies[1:3]
#'
#' # True solution
#' true = c("M1", "M2", "M3")
#'
#' # Run comparison
#' \donttest{
#' # dviCompare(example1, refs, true = true, db = db, Nsim = 2, seed = 123)
#' }
#'
#' # Alternatively, simulations can be done first...
#' sims = dviCompare(example1, refs, true = true, simulate = TRUE,
#' db = db, Nsim = 2, seed = 123, returnSims = TRUE)
#'
#' # ... and computations after:
#' \donttest{
#' # dviCompare(sims, refs, true = true, simulate = FALSE)
#' }
#'
#' @importFrom forrel profileSim
#' @importFrom parallel clusterSetRNGStream
#' @export
dviCompare = function(dvi, true, refs = typedMembers(am), methods = 1:6,
markers = NULL, threshold = 1, simulate = TRUE,
db = getFreqDatabase(am), Nsim = 1, returnSims = FALSE,
seed = NULL, numCores = 1, verbose = TRUE) {
st = Sys.time()
refs = as.character(refs)
true = as.character(true)
if(verbose) {
if(simulate)
print(dvi, printMax = 10)
else
print(dvi[[1]], printMax = 10)
cat("\nParameters for DVI comparison:\n")
cat(" True solution:", toString(true), "\n")
cat(" Simulate data:", simulate, "\n")
cat(" Number of sims:", if(simulate) Nsim else length(dvi$pm), "\n")
cat(" Reference IDs:", toString(refs))
cat(" LR threshold:", threshold, "\n\n")
}
if(simulate) {
dvi = consolidateDVI(dvi)
pm = dvi$pm
am = dvi$am
missing = as.character(dvi$missing)
vics = names(pm)
isMatch = true != "*"
stopifnot(length(true) == length(vics),
all(true[isMatch] %in% missing),
all(getSex(am, true[isMatch]) == getSex(pm)[isMatch]))
if(!is.null(seed))
set.seed(seed)
am = setMarkers(am, locusAttributes = db)
pm = setMarkers(pm, locusAttributes = db)
# Simulate AM
AMsims = profileSim(am, N = Nsim, ids = c(refs, true[isMatch]))
# Simulate the unrelated victims
PMsims = forrel::profileSim(pm, N = Nsim, ids = vics[!isMatch])
# For the true matches, transfer from MP to vics
PMsims = lapply(1:Nsim, function(i)
transferMarkers(from = AMsims[[i]], to = PMsims[[i]],
idsFrom = true[isMatch], idsTo = vics[isMatch], erase = FALSE))
# Remove data from missing
AMsims = lapply(AMsims, function(s) setAlleles(s, missing, alleles = 0))
# Collect into list of dviData objects
dviSims = lapply(1:Nsim, function(i) dviData(pm = PMsims[[i]], am = AMsims[[i]], missing = missing))
# Return sims
if(returnSims)
return(dviSims)
}
else {
dviSims = dvi
dvi1 = dvi[[1]]
vics = names(dvi1$pm)
stopifnot(length(true) == length(vics),
all(true %in% c(dvi1$missing, "*")),
setequal(refs, typedMembers(dvi1$am)))
#PMsims = pm
#AMsims = am
if(!is.null(markers)) {
stop2("Marker selection not implemented yet.")
#PMsims = lapply(PMsims, selectMarkers, markers)
#AMsims = lapply(AMsims, selectMarkers, markers)
}
}
N = length(dviSims)
# Setup parallelisation
if(paral <- (numCores > 1)) {
cl = makeCluster(numCores)
if(verbose)
cat("Using", length(cl), "cores\n")
on.exit(stopCluster(cl))
clusterEvalQ(cl, library(dvir))
# clusterExport(cl, c("missing"), envir = environment())
clusterSetRNGStream(cl, iseed = sample.int(1e6,1))
}
# DVI functions (just to reduce typing)
fun1 = function(i) sequentialDVI(dviSims[[i]], threshold = threshold, updateLR = FALSE, check = FALSE, verbose = FALSE)$matches
fun2 = function(i) sequentialDVI(dviSims[[i]], threshold = threshold, updateLR = TRUE, check = FALSE, verbose = FALSE)$matches
fun3 = function(i) top(jointDVI(dviSims[[i]], undisputed = TRUE, threshold = threshold, check = FALSE, verbose = FALSE))
fun4 = function(i) top(jointDVI(dviSims[[i]], undisputed = FALSE, check = FALSE, verbose = FALSE))
fun5 = function(i) top(jointDVI(dviSims[[i]], undisputed = TRUE, threshold = threshold, check = FALSE, verbose = FALSE), threshold)
fun6 = function(i) top(jointDVI(dviSims[[i]], undisputed = FALSE, check = FALSE, verbose = FALSE), threshold)
# Initialise list of results
res = list()
# Approach 1: Sequential - naive
if(1 %in% methods) {
method1 = if(paral) parLapply(cl, 1:N, fun1) else lapply(1:N, fun1)
res$method1 = summar(method1)
if(verbose) print(res['method1'])
}
# Approach 2: Sequential - with LR update
if(2 %in% methods) {
method2 = if(paral) parLapply(cl, 1:N, fun2) else lapply(1:N, fun2)
res$method2 = summar(method2)
if(verbose) print(res['method2'])
}
# Approach 3: Sequential undisputed + joint
if(3 %in% methods) {
method3 = if(paral) parLapply(cl, 1:N, fun3) else lapply(1:N, fun3);
res$method3 = summar(method3)
if(verbose) print(res['method3'])
}
# Approach 4: Joint
if(4 %in% methods) {
method4 = if(paral) parLapply(cl, 1:N, fun4) else lapply(1:N, fun4)
res$method4 = summar(method4)
if(verbose) print(res['method4'])
}
# Approach 5: Sequential undisputed + joint + threshold (LR < thresh => *-*-*)
if(5 %in% methods) {
method5 = if(paral) parLapply(cl, 1:N, fun5) else lapply(1:N, fun5);
res$method5 = summar(method5)
if(verbose) print(res['method5'])
}
# Approach 6: Joint + threshold (LR < thresh => *-*-*)
if(6 %in% methods) {
method6 = if(paral) parLapply(cl, 1:N, fun6) else lapply(1:N, fun6)
res$method6 = summar(method6)
if(verbose) print(res['method6'])
}
# True positive rates
TPR = sapply(res, function(r) as.numeric(r[paste(true, collapse = "-")]))
TPR[is.na(TPR)] = 0
res$TPR = TPR
if(verbose)
cat("Total time used:", format(Sys.time() - st, digits = 3), "\n")
res
}
# Utility for summarising list of solutions
#' @importFrom stats aggregate
summar = function(x) {
sols = lapply(x, apply, 1, paste, collapse = "-")
L = lengths(sols)
df = data.frame(sol = unlist(sols), wei = rep(1/L, L))
agg = aggregate(wei ~ sol, data = df, FUN = sum)
freqs = agg$wei/length(x)
names(freqs) = agg$sol
sort(freqs, decreasing = T)
}
# Utility for including ties in output of jointDVI()
top = function(res, thresh = 1) {
nvic = ncol(res) - 3
if(res$LR[1] < thresh) return(rbind(rep("*", nvic)))
mx = res$LR == res$LR[1]
res[mx, seq_len(nvic), drop = FALSE]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.