Nothing
################################################################################
# TODO LIST
# TODO: ...
################################################################################
# CHANGE LOG (last 20 changes)
# 24.08.2018: Removed unused variables.
# 06.08.2017: Added audit trail.
# 20.07.2016: Added attributes to result.
# 20.07.2016: Added new option 'list.all' to include missing samples in result.
# 28.08.2015: Added importFrom
# 18.06.2014: First version.
#' @title Calculate Concordance.
#'
#' @description
#' Calculates concordance and discordance for profiles in multiple datasets.
#'
#' @details Takes a list of datasets as input. It is assumed that each unique
#' sample name represent a result originating from the same source DNA and
#' thus is expected to give identical DNA profiles. The function first compare
#' the profiles for each sample across datasets and lists discordant results.
#' Then it performs a pair-wise comparison and compiles a concordance table.
#' The tables are returned as two data frames in a list.
#' NB! Typing and PCR artefacts (spikes, off-ladder peaks, stutters etc.)
#' must be removed before analysis.
#' NB! It is expected that the unique set of marker names across a dataset is
#' present in each sample for that dataset (a missing marker is a discordance).
#'
#' @param data list of data frames in 'slim' format with at least columns
#' 'Sample.Name', 'Marker', and 'Allele'.
#' @param kit.name character vector for DNA typing kit names in same order and
#' of same lengths as data sets in 'data' list. Default is NA in which case
#' they will be numbered.
#' @param no.marker character vector for string when marker is missing.
#' @param no.sample character vector for string when sample is missing.
#' @param delimeter character to separate the alleles in a genotype.
#' Default is comma e.g '12,16'.
#' @param list.all logical TRUE to return missing samples.
#' @param debug logical indicating printing debug information.
#'
#' @return list of data.frames (discordance table, and pair-wise comparison).
#'
#' @export
#'
#' @importFrom utils str combn
#'
calculateConcordance <- function(data, kit.name = NA, no.marker = "NO MARKER",
no.sample = "NO SAMPLE",
delimeter = ",", list.all = FALSE, debug = FALSE) {
if (debug) {
print(paste("IN:", match.call()[[1]]))
print("Parameters:")
print("data")
print(str(data))
print("kit.name")
print(kit.name)
print("no.marker")
print(no.marker)
print("no.sample")
print(no.sample)
print("delimeter")
print(delimeter)
}
# CHECK DATA ----------------------------------------------------------------
# Check each dataset in list.
for (d in seq(along = data)) {
if (!"Sample.Name" %in% names(data[[d]])) {
stop("All datasets in 'data' must contain a column 'Sample.Name'.",
call. = TRUE
)
}
# Check dataset.
if (!"Marker" %in% names(data[[d]])) {
stop("All datasets in 'data' must contain a column 'Marker'.",
call. = TRUE
)
}
# Check dataset.
if (!"Allele" %in% names(data[[d]])) {
stop("All datasets in 'data' must contain a column 'Allele'.",
call. = TRUE
)
}
# Check if slim format.
if (sum(grepl("Allele", names(data[[d]]))) > 1) {
stop("'data' must be in 'slim' format.",
call. = TRUE
)
}
}
# Check kit.name vector.
if (all(is.na(kit.name))) {
# Create default names.
kit.name <- paste("Kit", seq(along = data), sep = ".")
} else if (length(kit.name) != length(data)) {
stop("'kit.name' must be of equal length as number of datasets.",
call. = TRUE
)
}
# Check parameter.
if (!is.character(no.sample)) {
stop("'no.sample' must be of type character.",
call. = TRUE
)
}
# Check parameter.
if (!is.character(no.marker)) {
stop("'no.marker' must be of type character.",
call. = TRUE
)
}
# Check parameter.
if (!is.character(delimeter)) {
stop("'delimeter' must be of type character.",
call. = TRUE
)
}
# PREPARE -----------------------------------------------------------------
# Initiate variables.
sampleList <- list() # List for logical sample vectors.
markerList <- list() # List for logical marker vectors.
resAlleleList <- list() # List for result.
resInfoList <- list() # List for result.
sampleNames <- NULL # Unique sample names.
markerNames <- NULL # Unique marker names.
# Loop over all datasets.
for (d in seq(along = data)) {
# Get all unique sample names and marker names.
sampleNames <- unique(c(sampleNames, data[[d]]$Sample.Name))
markerNames <- unique(c(markerNames, data[[d]]$Marker))
}
if (debug) {
print("sampleNames:")
print(sampleNames)
print("markerNames:")
print(markerNames)
}
# Add logical vectors for each dataset.
for (d in seq(along = data)) {
# Add pre-allocated vectors.
sampleList[[d]] <- vector(mode = "logical", length = length(sampleNames))
markerList[[d]] <- vector(mode = "logical", length = length(markerNames))
}
# CALCULATE -----------------------------------------------------------------
# 1) A list of all disconcordant results across datasets.
# Loop over all sample names.
for (s in seq(along = sampleNames)) {
# Progress.
message(paste("Calculate concordance for: ", sampleNames[s],
" (", s, " of ", length(sampleNames), ").",
sep = ""
))
# Loop over all marker names.
for (m in seq(along = markerNames)) {
# Create list.
alleleSet <- list()
# Loop over all data sets. (start with 1 to handle only 1 dataset in list.)
for (d in seq(along = data)) {
# Check if sample and add logical value.
if (any(data[[d]]$Sample.Name == sampleNames[s])) {
sampleList[[d]][s] <- TRUE
} else {
sampleList[[d]][s] <- FALSE
}
# Check if marker and add logical value.
if (any(data[[d]]$Marker == markerNames[m])) {
markerList[[d]][m] <- TRUE
} else {
markerList[[d]][m] <- FALSE
}
# Check if marker.
if (!(markerList[[d]][m] & sampleList[[d]][s])) {
if (!sampleList[[d]][s]) {
# Sample does not exist.
alleleSet[[d]] <- no.sample
} else if (!markerList[[d]][m]) {
# Marker does not exist.
alleleSet[[d]] <- no.marker
}
} else {
# Get current alleles from dataset.
alleleSet[[d]] <- data[[d]][data[[d]]$Sample.Name == sampleNames[s] & data[[d]]$Marker == markerNames[m], "Allele"]
}
}
if (list.all) {
# Check for discordant results excluding if only difference is missing
# marker (but including missing sample).
tmp <- alleleSet[alleleSet != no.marker]
discordance <- !length(unique(tmp)) == 1
} else {
# Check for discordant results excluding differences caused by missing
# sample or missing marker.
tmp <- alleleSet[alleleSet != no.sample & alleleSet != no.marker]
discordance <- !length(unique(tmp)) == 1
}
if (discordance) {
# Create result vectors.
resAlleleVec <- NULL
resInfoVec <- NULL
# Loop through all datasets.
for (d in seq(along = data)) {
resAlleleVec <- c(resAlleleVec, paste(alleleSet[[d]], collapse = delimeter))
}
# Add data to result vector.
resInfoVec <- c(sampleNames[s], markerNames[m])
# Add current marker to result list.
resAlleleList[[length(resAlleleList) + 1]] <- resAlleleVec
resInfoList[[length(resInfoList) + 1]] <- resInfoVec
}
}
}
# Convert to matrix.
if (length(resAlleleList) > 0) {
resAlleleM <- matrix(unlist(resAlleleList), byrow = TRUE, nrow = length(resAlleleList))
resInfoM <- matrix(unlist(resInfoList), byrow = TRUE, nrow = length(resInfoList))
# Make a data.frame:
res1 <- data.frame(cbind(resInfoM, resAlleleM), stringsAsFactors = FALSE)
names(res1) <- c("Sample.Name", "Marker", kit.name)
} else {
# Make a data.frame:
res1 <- data.frame("NO DISCORDANCE", stringsAsFactors = FALSE)
}
# Update audit trail.
res1 <- auditTrail(obj = res1, f.call = match.call(), package = "strvalidator")
# CALCULATE -----------------------------------------------------------------
# 2) A pair-wise comparison.
# Make pair-wise combinations.
kitComb <- combn(kit.name, 2)
iComb <- combn(seq(along = kit.name), 2)
nComb <- ncol(kitComb)
# Initiate vectors.
compareKit <- as.vector(mode = "any", nComb)
commonSamples <- as.vector(mode = "any", nComb)
commonLoci <- as.vector(mode = "any", nComb)
allelesTested <- as.vector(mode = "any", nComb)
discordantAlleles <- as.vector(mode = "any", nComb)
concordanceRate <- as.vector(mode = "any", nComb)
# Loop through all combinations.
for (i in 1:nComb) {
# Current combination.
compareKit[i] <- paste(kitComb[, i], collapse = " vs. ")
# Progress.
message(paste("Compare ", compareKit[i],
" (", i, " of ", nComb, ").",
sep = ""
))
# Number of common samples.
commonSamples[i] <- sum(sampleList[[iComb[1, i]]] & sampleList[[iComb[2, i]]])
# Number of common loci.
commonLoci[i] <- sum(markerList[[iComb[1, i]]] & markerList[[iComb[2, i]]])
# Number of alleles tested.
allelesTested[i] <- 2 * commonSamples[i] * commonLoci[i]
# Find number of discordant results.
# Initiate variable.
sumDiscordances <- 0
# Loop through each row of dscordant result.
for (r in seq(along = resAlleleList)) {
# Get alleles for current kits.
k1 <- resAlleleList[[r]][iComb[1, i]]
k2 <- resAlleleList[[r]][iComb[2, i]]
# Split into alleles.
k1 <- strsplit(k1, delimeter)
k2 <- strsplit(k2, delimeter)
# Only add if sample...
if (!(no.sample %in% k1 | no.sample %in% k2)) {
# ...and marker exist in both datasets.
if (!(no.marker %in% k1 | no.marker %in% k2)) {
# Compare alleles and count differences.
sumDiscordances <- sumDiscordances + sum(!k1 %in% k2)
}
}
}
# Number of discrodances.
discordantAlleles[i] <- sumDiscordances
# Concordance rate.
concordanceRate[i] <- 100 * (allelesTested[i] - discordantAlleles[i]) / allelesTested[i]
}
# Create dataframe.
res2 <- data.frame(
Kits = compareKit,
Samples = commonSamples,
Loci = commonLoci,
Alleles = allelesTested,
Discordances = discordantAlleles,
Concordance = concordanceRate,
stringsAsFactors = FALSE
)
# Update audit trail.
res2 <- auditTrail(obj = res2, f.call = match.call(), package = "strvalidator")
# Return list of the two dataframes.
res <- list(res1, res2)
if (debug) {
print(paste("EXIT:", match.call()[[1]]))
}
# Return result.
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.