Nothing
#' Likelihood function for calculation of Pedigree-based autosomal dominant penetrance value.
#' Formula deployed via optimize so as to determine the optimal value.
#'
#' @param K The range of penetrance values to be explored by the optimization function.
#' @param a Count of affected individuals
#' @param b Count of obligate carriers
#' @param c Count of children of either affecteds or carriers, with no children of their own
#' @param d Count of trees of unaffected individuals - specifically, two sequential generations (i.e. a parent and their offspring)
#' @param n Count of the number of second generation progeny in a given tree.
#'
#' @return K Pedigree-based estimation of autosomal dominant penetrance rate.
#' @export
#'
#' @examples
#' K <- optimize(penetrance, c(0,1), 3, 1, 5, 2, 1, maximum=TRUE)$max
penetrance <- function(K, a, b, c, d, n) {
a*log(K) + b*log(1-K) + c*log(2-K) + sum(d*log(2^n + (1-K)*(2-K)^n) - d*(n+1)*log(2))
}
#' Calculation of Identity by descent (IBD).
#'
#' Use the relationship informationfrom the pedigree to
#' estimate of the amount of the genome they have inherited it from a
#' common ancestor without recombination.
#'
#' Can do this for the total potential relatedness
#' in a pedigree (theoretical=TRUE),
#' or for the actual relatedness across collected samples (theoretical=FALSE).
#' For the theoretical=TRUE case, in the unaffected trees,
#' if we have a sample from the parent,
#' then the offspring do not provide any additional information
#' for a max IBD calculation.
#' This means that K does not scale with n.
#'
#' For theoretical=FALSE, sometimes we don’t have the healthy parent in an unaffected tree,
#' and only have a child. In this case, the IBD contribution from the child is 1/4,
#' and since they’re unaffected and therefore are a counter-filter,
#' they would contribute 1-1/4 = 3/4 to the total relatedness.
#' Either the parent is a non-obligate carrier, or is a non-carrier.
#' The probability of the children depends on which of those is true,
#' so we have the additional set of terms in the theoretical=FALSE logic.
#'
#'
#' @param a Count of affected individuals
#' @param b Count of obligate carriers
#' @param c Count of children of either affecteds or carriers, with no children of their own
#' @param d Count of Trees of unaffected individuals - specifically, two sequential generations (i.e. a parent and their offspring)
#' @param n Count of the number of second generation progeny in a given tree.
#' @param K The estimate of penetrance rate.
#' @param theoretical Boolean indicating if the calculation should be
#' theoretical IBD calculation (using only d and k), or if the calculation
#' should use the provided n value.
#'
#'
#' @return pi-hat value. The proportion of genome shared between individuals.
#' @export
#'
#' @examples
#' ibd <- ibd(3, 1, 5, 2, 1, 0.4576484)
ibd <- function(a, b, c, d, n, K, theoretical=TRUE) {
if (theoretical) {
x <- sum(d) * 2^K
} else {
x <- sum(d*(1 - (2^n-1)/2^(n+1))) * 2^K
}
pihat<-(a + b + c * 2^K + x) - 1
return(pihat)
}
#' Score the pedigrees using the pihat values.
#'
#' @param pihat Estimated proportion of genome shared between individuals,
#' from function: ibd.
#'
#' @return The score value.
#' @export
#' @examples
#' s.val <- score(12.61)
score <- function(pihat) {
log(2^pihat)
}
#' Read in the encoded pedigree data file.
#'
#' @param filename name of the file with the data.
#'
#' @return A data frame containing the encoded pedigree information.
#' @export
#'
#' @examples
#' example.pedigree.file <- system.file('extdata/example_pedigree_encoding.tsv',
#' package = 'KinformR')
#' example.pedigree.df <- read.pedigree(example.pedigree.file)
read.pedigree <- function(filename){
h <- read.table(filename, header=TRUE, sep="\t",
check.names=FALSE, colClasses=c("Family"="character"))
return(h)
}
#' Take the encoded information about the pedigrees and calculate penetrance.
#'
#' Determine a value score of families by comparing their relationship structure.
#' More distant relationships between affecteds (e.g. affected cousins)
#' is more valuable that close relationships (e.g. affected siblings)
#' as there is less IBD and therefore a smaller search space.
#'
#'Simplifying assumptions:
#' - Autosomal dominant
#' - No ambiguous statuses
#' - No more than two sequential generations of unknown carrier status
#' (non-obligate carrier vs. non-carrier).
#' Generalized support of arbitrary tree structures gets a lot more
#' complicated, especially for the likelihood function.
#' - Exclude big giant trees of unaffecteds - related to above.
#' Will slightly bias the result toward higher penetrance.
#' - Exclude subjects younger than age of onset
#'
#' @param h A data frame containing the encoded pedigree information
#' @return A data frame containing the theoretical scoring of the power of a
#' family assuming you were able to collect everyone on the simplified pedigree,
#' as well as a current scoreing, examining only those for whom you currently
#' have DNA.
#' @export
#'
#' @examples
#' example.pedigree.file <-system.file('extdata/example_pedigree_encoding.tsv',
#' package = 'KinformR')
#' example.pedigree.df <- read.pedigree(example.pedigree.file)
#' penetrance.df <- score.pedigree(example.pedigree.df)
score.pedigree <- function(h){
family.vec <- c()
penetrance.vec <- c()
max.pihat.vec <- c()
max.score.vec <- c()
current.pihat.vec <- c()
current.score.vec <- c()
for (i in seq_len(nrow(h))) {
family <- h[i,"Family"]
max.a <- h[i, "max_a"]
#Yeezy yeezy what's good, its ya boy
max.b <- h[i, "max_b"]
max.c <- h[i, "max_c"]
max.d <- h[i, "max_d"]
max.n <- h[i, "max_n"]
a.actual <- h[i, "a"]
b.actual <- h[i, "b"]
c.actual <- h[i, "c"]
d.actual <- h[i, "d"]
n.actual <- h[i, "n"]
max.d <- as.numeric(strsplit(as.character(max.d), ",")[[1]])
max.n <- as.numeric(strsplit(as.character(max.n), ",")[[1]])
d.actual <- as.numeric(strsplit(as.character(d.actual), ",")[[1]])
n.actual <- as.numeric(strsplit(as.character(n.actual), ",")[[1]])
K <- optimize(penetrance, c(0,1), max.a, max.b, max.c,
max.d, max.n, maximum=TRUE)$max
max.pihat <- ibd(max.a, max.b, max.c, max.d, max.n, K)
max.score <- score(max.pihat)
current.pihat <- ibd(a.actual, b.actual, c.actual,
d.actual, n.actual, K, FALSE)
current.score <- score(current.pihat)
family.vec <- c(family.vec, family)
penetrance.vec <- c(penetrance.vec, K)
max.pihat.vec <- c(max.pihat.vec, max.pihat)
max.score.vec <- c(max.score.vec, max.score)
current.pihat.vec <- c(current.pihat.vec, current.pihat)
current.score.vec <- c(current.score.vec,current.score)
}
df <- data.frame(family.vec, penetrance.vec, max.pihat.vec, max.score.vec,
current.pihat.vec, current.score.vec)
colnames(df) <- c("family", "penetrance", "max.pi-hat", "max.score",
"current.pi-hat", "current.score")
df$pct.of.max <- df$current.score / df$max.score * 100
df[, -1] <- round(df[, -1], 2)
return(df)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.