Nothing
#' For affecteds: Take genetic variant and determine the category of the combo.
#' @param variant Variant for individual. genotypes, phased genotypes, or binary encodings accepted.
assign.a <- function(variant) {
alt.codes <- c("0/1", "1/1", "1", "0|1", "1|0", "1|1")
ref.codes <- c("0/0", "0", "0|0")
if (variant %in% alt.codes) {
return("A.c")
} else if (variant %in% ref.codes) {
return("A.i")
} else {
stop("Incompatible variant value! Supported encodings are: '0' '1' '0/0' '0/1' '0|0' '0|1'")
}
}
#' For unaffecteds: Take a genetic variant and determine the category of the combo.
#' @param variant Variant for individual. genotypes, phased genotypes, or binary encodings accepted.
assign.u <- function(variant) {
alt.codes <- c("0/1", "1/1", "1", "0|1", "1|0", "1|1")
ref.codes <- c("0/0", "0", "0|0")
if (variant %in% alt.codes) {
return("U.i")
} else if (variant %in% ref.codes) {
return("U.c")
} else {
stop("Incompatible variant value! Supported encodings are: '0' '1' '0/0' '0/1' '0|0' '0|1'")
}
}
#' Take a disease status and a genetic variant and determine which category the combo falls in.
#' A.c = Affected individual with ALT variant
#' A.i = Affected individual without ALT variant
#' U.c = Unaffected individual without ALT variant
#' U.i = Unaffected individual with ALT variant
#' If theoretical.max = TRUE the true variant statuses are ignored and all
#' affected/unaffected are assigned A.c and U.c respectively.
#' These encoding can then be used show what a family's max score would be.
#'
#' @param status Disease status of an individual. A = affected, U = unaffected.
#' @param variant Variant for individual. genotypes, phased genotypes, or binary encodings accepted.
#' @param theoretical.max Should the theoretical maxima be returned instead of the observed values?
#' When true, the scoring assumes correct variant-status pair for each individual.
#' Default is FALSE.
#' @return a string
#' @examples
#' assign.status("A", "0/1") == "A.c"
#' assign.status("A", "0|0") == "A.i"
#' assign.status("U", 1) == "U.i"
#' assign.status("U", "0|0") == "U.c"
#' @export
assign.status <- function(status, variant, theoretical.max = FALSE) {
var.err <- "Incompatible variant value! Supported encodings are: '0' '1' '0/0' '0/1' '0|0' '0|1'"
if (status == "A") {
if (theoretical.max) {
return("A.c")
# NOTE - Once in a while 1/0 genotypes crop up; also 0/2 etc. if derived from multi-allelics.
# This edge case not covered at present.
} else {
assign.a(variant)
}
} else if (status == "U") {
if (theoretical.max) {
return("U.c")
} else {
assign.u(variant)
}
} else {
stop("Status must be one of: U or A")
}
}
#' Take the dataframe with variants and status and determine which indivudals
#' are scored correctly and which are scored incorrectly.
#' Assign an A.c, A.i, U.c, U.i, unk
#'
#' Variants can be encoded as binary (0 or 1, genotypes 0/0 or 0/1, or phased genotypes 0|0 0|1).
#' Note the program assumes alt is the disease allele. homozygous alts are allowed.
#'
#' theoretical.max - bool, default is FALSE
#' when TRUE, function encodes the theoretical max,
#' using a dummy perfect associatng variant generated to see what a family could score.
#' TODO - switch to numbers 1-4 and -1?
#' @param indiv.df A dataframe with the format:
#' name status variant
#' MS-5678-1001 A 0/1
#' @param theoretical.max Should the theoretical maxima be returned instead of the observed values?
#' When true, the scoring assumes correct variant-status pair for each individual.
#' Default is FALSE.
#' @return Copy of input dataframe, with dataframe with the status categroies added as a new column "statvar.cat"
#' @export
score.variant.status <- function(indiv.df, theoretical.max = FALSE) {
# when encoding theoretical max, dummy perfect associating variant generated to see what a family could score.
if (theoretical.max) {
indiv.df$statvar.cat <- unlist(lapply(seq_len(nrow(indiv.df)), function(i) {
assign.status(indiv.df$status[[i]], indiv.df$variant[[i]], theoretical.max = TRUE)
}))
} else {
indiv.df$statvar.cat <- unlist(lapply(seq_len(nrow(indiv.df)), function(i) {
assign.status(indiv.df$status[[i]], indiv.df$variant[[i]])
}))
}
return(indiv.df)
}
#' Build dictionary with the relationships falling in the different categories for the query row.
#' @param mat.row A row from a relationship matrix
#' @param name.stat.dict A list with the labelled status/variant combo for each individual.
#' @param drop.unrelated Should unrelated (-1) relationships be dropped? Default = TRUE.
#'
#' @return A list with the categorized relationship/variant information.
#' @export
build.relation.dict <- function(mat.row, name.stat.dict, drop.unrelated = TRUE) {
indiv.rels <- list(
"A.c" = c(),
"A.i" = c(),
"U.c" = c(),
"U.i" = c()
)
for (i in seq_along(mat.row)) {
status.i <- name.stat.dict[[names(mat.row)[[i]]]]
rel.i <- mat.row[[i]]
if (rel.i != -1 || drop.unrelated == FALSE) {
indiv.rels[[status.i]] <- c(indiv.rels[[status.i]], rel.i)
}
}
return(indiv.rels)
}
#' Take the relationship matrix and the encoded statuses of info.
#' For each row, generate the encoded data for scoring.
#' @param relation.mat The relationship matrix for all pairwise combinations of individuals.
#' @param status.df The ID, status, and genotypes for each individual.
#' @param ... Additional arguments to be passed between methods.
#' @return A dictionary with the per-individual relationship lists.
#' One value for each row of the matrix.
#' @export
encode.rows <- function(relation.mat, status.df, ...) {
name.stat.dict <- status.df$statvar.cat
names(name.stat.dict) <- status.df$name
score.dicts <- lapply(seq_len(nrow(relation.mat)), function(i) {
build.relation.dict(relation.mat[i, ], name.stat.dict)
})
names(score.dicts) <- colnames(relation.mat)
return(score.dicts)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.