#' estimate per-comparison error rates
#'
#'
#'
#' @param gmaData the gmaData object containing your baseline populations and potential descendents. This
#' input is created by \code{createGmaInput}
#' @param relationship the relationship you want to test for: "ssGP" - single sided grandparentage
#' (a pair of either two maternal grandparents OR two paternal grandparents); "sP" - single parent inference
#' @param llrToTest a vector of llr thresholds to estimate error rates for
#' @param N the number of Monte Carlo samples to take to estimate the error rates (ignored for stratified methods)
#' @param seed a positive integer to use as a seed for random number generation.
#' @param itersperMI the number of iterations per Mendelian incompatibility, in order of 0, 1, ...
#' (ignored for non-stratified methods)
#' @param errorType the type of error estimate to make. Options for relationship = "ssGP": "falseNegative",
#' "Unrel", "True_GAunt", "True_Unrel", "True_HGAunt", "True_GpCous", "GAunt_Unrel",
#' "HGAunt_Unrel", "GpCous_Unrel", "GAunt", "GAunt_HGAunt", "Gaunt_GpCous", "HGAunt",
#' "HGAunt_GpCous", "GpCous". Options for relationship "sP": "falseNegative",
#' "Unrel", "Aunt", "HalfAunt", "ParCous". The "pairwise" option is experimental and should
#' not be used.
#' @param MIexcludeProb the maximum probability of exclusion for a true relationship due to
#' Mendelian incompatibilities. If \code{0}, then no filtering is performed
#' based on Mendelian incompatibilities.
#' @param maxMissingGenos the maximum number of missing genotypes a sample can have before you would
#' choose to omit it from analysis. Default is 10\% of the total number of loci, rounded up
#' @param method strat for stratified, IS for importance sampling - currently only available for ssGP. Do not use method = "test",
#' this is currently for internal testing and will be removed.
#'
#' @export
falseGrandma <- function(gmaData, relationship = c("ssGP", "sP"),
llrToTest, N = 10000, seed = sample.int(.Machine$integer.max, 1), itersPerMI = NULL,
errorType = c("falseNegative", "pairwise", "Unrel", "Aunt", "HalfAunt", "ParCous",
"True_GAunt", "True_Unrel", "True_HGAunt", "True_GpCous",
"GAunt_Unrel", "HGAunt_Unrel", "GpCous_Unrel", "GAunt", "GAunt_HGAunt",
"Gaunt_GpCous", "HGAunt", "HGAunt_GpCous", "GpCous"),
MIexcludeProb = .0001, maxMissingGenos = NULL,
method = c("strat", "IS", "test")){
method <- match.arg(method)
rel <- match.arg(relationship)
tRel <- match.arg(errorType)
if(seed < 0) {
warning("seed must not be negative, setting seed to 1")
seed <- 1
}
if(is.null(maxMissingGenos)) maxMissingGenos <- ceiling(.1 * length(gmaData$genotypeKey))
if(maxMissingGenos %% 1 != 0) stop("maxMissingGenos must be an integer")
if(method == "IS" && rel != "ssGP") stop("method of IS is only an option for ssGP")
useUnsamp <- FALSE
skipBaseline <- c() # so unsampled pops aren't tested as a baseline
if(!is.null(gmaData$unsampledPopsParams)){
useUnsamp <- TRUE
if(tRel == "pairwise"){
# need to perform pairwise with unsampled pops
addedUnsampledPopsNames <- paste0("UnsampledPop_", names(gmaData$unsampledPopsParams))
# just in case there is already a pop with that name
index <- 1
while(any(addedUnsampledPopsNames %in% names(gmaData$baselineParams))){
addedUnsampledPopsNames[addedUnsampledPopsNames %in% names(gmaData$baselineParams)] <-
paste0(addedUnsampledPopsNames[addedUnsampledPopsNames %in% names(gmaData$baselineParams)],
"_added_", index)
index <- index + 1
}
rm(index)
addedUPparams <- gmaData$unsampledPopsParams
names(addedUPparams) <- addedUnsampledPopsNames
gmaData$baselineParams <- c(gmaData$baselineParams, addedUPparams)
skipBaseline <- addedUnsampledPopsNames
}
}
# turn pop names into ints for speed and avoid headache of dealing with strings
# 0 index for c++
allPops <- names(gmaData$baselineParams)
popKey <- data.frame(popName = allPops, popInt = 0:(length(allPops)-1), stringsAsFactors = FALSE)
names(gmaData$baselineParams) <- as.character(popKey[match(names(gmaData$baselineParams), popKey[,1]),2])
gmaData$baselineParams <- gmaData$baselineParams[order(as.numeric(names(gmaData$baselineParams)))]
if(length(skipBaseline) > 0) skipBaseline <- popKey[match(skipBaseline, popKey[,1]),2]
skipBaseline <- as.numeric(skipBaseline)
cat("\nPopulations will be evaluated in the following order:\n")
for(i in 1:nrow(popKey)){
cat("\t", i, popKey[i,1], "\n")
}
cat("\n")
if(useUnsamp) names(gmaData$unsampledPopsParams) <- as.character(popKey[match(names(gmaData$unsampledPopsParams), popKey[,1]),2])
# now we have to take into account cases where some (or all) baseline pops don't have "unsampledPopParams"
new <- list()
for(i in 1:length(allPops)){
if(useUnsamp && as.character(i-1) %in% names(gmaData$unsampledPopsParams)){
new[[i]] <- gmaData$unsampledPopsParams[[as.character(i-1)]]
} else {
new[[i]] <- gmaData$baselineParams[[i]]
}
}
gmaData$unsampledPopsParams <- new
# change things to numeric matrices when pass them to c++
gmaData$genotypeKey <- lapply(gmaData$genotypeKey, as.matrix)
ssGP_err_rels <- c("Unrel", "True_GAunt", "True_Unrel", "True_HGAunt", "True_GpCous",
"GAunt_Unrel", "HGAunt_Unrel", "GpCous_Unrel", "GAunt", "GAunt_HGAunt",
"Gaunt_GpCous", "HGAunt", "HGAunt_GpCous", "GpCous")
if(rel == "ssGP"){
if(tRel %in% c(ssGP_err_rels, "pairwise") && method == "strat"){
if(is.null(itersPerMI)) stop("itersPerMI must be input for this option.")
if(any((itersPerMI %% 1) != 0)) stop("all itersPerMI must be integers")
if(any(itersPerMI == 1)) warning("some itersPerMI are 1, SD will be undefined.")
if(any(itersPerMI < 0)) stop("some itersPerMI are negative.")
}
if(tRel == "pairwise"){
errResults <- strat_otherPopERRORssGP(gmaData$baselineParams, gmaData$unsampledPopsParams,
gmaData$missingParams, gmaData$genotypeKey,
gmaData$genotypeErrorRates, llrToTest, itersPerMI,
round(seed), skipBaseline, MIexcludeProb,
maxMissingGenos)
} else {
if(method == "IS"){
if(tRel == "Unrel"){
errResults <- list(ERRORssGP(gmaData$baselineParams, gmaData$unsampledPopsParams,
gmaData$missingParams, gmaData$genotypeKey,
gmaData$genotypeErrorRates, llrToTest, round(N), round(seed),
MIexcludeProb, maxMissingGenos)
)
} else {
errResults <- list(IS_rel_ERRORssGP(gmaData$baselineParams,
gmaData$unsampledPopsParams, gmaData$missingParams, gmaData$genotypeKey,
gmaData$genotypeErrorRates, llrToTest, round(N),
c(0:length(ssGP_err_rels))[which(ssGP_err_rels == tRel)],
round(seed), MIexcludeProb, maxMissingGenos)
)
}
} else if (method == "test"){
errResults <- list(IS_rel_ERRORssGP(gmaData$baselineParams,
gmaData$unsampledPopsParams, gmaData$missingParams, gmaData$genotypeKey,
gmaData$genotypeErrorRates, llrToTest, round(N),
c(0:length(ssGP_err_rels))[which(ssGP_err_rels == tRel)],
round(seed), MIexcludeProb, maxMissingGenos)
)
} else if (tRel == "falseNegative"){
# just running the IS function, doesn't really add significant comp time
errResults <- ERRORssGP(gmaData$baselineParams, gmaData$unsampledPopsParams,
gmaData$missingParams, gmaData$genotypeKey,
gmaData$genotypeErrorRates, llrToTest, round(N), round(seed),
MIexcludeProb, maxMissingGenos)
errResults <- list(errResults[,c(1,2,5,6)])
} else if (tRel %in% ssGP_err_rels) {
# method is strat, false positive of some sort
errResults <- strat_ERRORssGP(gmaData$baselineParams, gmaData$unsampledPopsParams,
gmaData$missingParams, gmaData$genotypeKey,
gmaData$genotypeErrorRates, llrToTest,
itersPerMI,
round(seed), c(0:length(ssGP_err_rels))[which(ssGP_err_rels == tRel)],
MIexcludeProb, maxMissingGenos)
} else {
stop("relationship and error type combination not recognized")
}
}
} else if(rel == "sP"){
if(tRel %in% c("Unrel", "Aunt", "HalfAunt", "ParCous", "pairwise")){
if(is.null(itersPerMI)) stop("itersPerMI must be input for this option.")
if(any((itersPerMI %% 1) != 0)) stop("all itersPerMI must be integers")
if(any(itersPerMI == 1)) warning("some itersPerMI are 1, SD will be undefined.")
if(any(itersPerMI < 0)) stop("some itersPerMI are negative.")
}
if(tRel == "pairwise"){
errResults <- strat_otherPopERRORsP(gmaData$baselineParams,
gmaData$unsampledPopsParams, gmaData$missingParams,
gmaData$genotypeKey,
gmaData$genotypeErrorRates, llrToTest,
itersPerMI,
round(seed), skipBaseline, MIexcludeProb, maxMissingGenos)
} else if(tRel == "falseNegative"){
errResults <- list(falseNeg_ERRORsP(gmaData$baselineParams, gmaData$unsampledPopsParams,
gmaData$missingParams, gmaData$genotypeKey,
gmaData$genotypeErrorRates, llrToTest, round(N), round(seed),
MIexcludeProb, maxMissingGenos)
)
} else if (tRel %in% c("Unrel", "Aunt", "HalfAunt", "ParCous")) {
errResults <- strat_ERRORsP(gmaData$baselineParams,
gmaData$unsampledPopsParams, gmaData$missingParams,
gmaData$genotypeKey,
gmaData$genotypeErrorRates, llrToTest,
itersPerMI,
round(seed), c(0,1,2,3)[which(c("Unrel", "Aunt", "HalfAunt", "ParCous") == tRel)],
MIexcludeProb, maxMissingGenos)
} else {
stop("relationship and error type combination not recognized")
}
} else {
stop("This relationship not set up at this time")
}
# turn pop names back to strings
for(i in 1:length(errResults)){
if(tRel == "pairwise") errResults[[i]][,2] <- popKey[match(errResults[[i]][,2], popKey[,2]),1]
errResults[[i]][,1] <- popKey[match(errResults[[i]][,1], popKey[,2]),1]
}
return(errResults)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.