Nothing
#' Create a pedigree or pedigreeList object
#'
#' @param id Identification variable for individual
#' @param dadid Identification variable for father. Founders' parents should be coded
#' to NA, or another value specified by missid.
#' @param momid Identification variable for mother. Founders' parents should be coded
#' to NA, or another value specified by missid.
#' @param sex Gender of individual noted in `id'. Either character ("male","female",
#' "unknown","terminated") or numeric (1="male", 2="female", 3="unknown", 4="terminated")
#' data is allowed. For character data the string may be truncated, and of arbitrary case.
#' @param affected A variable indicating affection status. A multi-column matrix
#' can be used to give the status with respect to multiple traits. Logical, factor, and
#' integer types are converted to 0/1 representing unaffected and affected, respectively.
#' NAs are considered missing.
#' @param status Censor/Vital status (0="censored", 1="dead")
#' @param relation A matrix with 3 required columns (id1, id2, code) specifying special
#' relationship between pairs of individuals. Codes: 1=Monozygotic twin, 2=Dizygotic twin,
#' 3=Twin of unknown zygosity, 4=Spouse. (The last is necessary in order to place a marriage
#' with no children into the plot.) If famid is given in the call to create pedigrees, then
#' famid needs to be in the last column of the relation matrix. Note for tuples of >= 3 with
#' a mixture of zygosities, the plotting is limited to showing pairwise zygosity of adjacent
#' subjects, so it is only necessary to specify the pairwise zygosity, in the order the subjects
#' are given or appear on the plot.
#' @param famid An optional vector of family identifiers. If it is present the result will
#' contain individual pedigrees for each family in the set, which can be extacted using
#' subscripts. Individuals need to have a unique id \emph{within} family.
#' @param missid The founders are those with no father or mother in the pedigree. The dadid
#' and momid values for these subjects will either be NA or the value of this variable. The
#' default for missid is 0 if the id variable is numeric, and "" (empty string) otherwise.
#' @param x pedigree object in print and subset methods
#' @param max_message_n max number of individuals to list in error messages
#' @param ... optional arguments passed to internal functions
#' @param drop logical, used in subset function for dropping dimensionality
#' @return An object of class \code{pedigree} or \code{pedigreeList} Containing the following items:
#' famid id findex mindex sex affected status relation
#' @author Terry Therneau
#' @name pedigree
#' @export
pedigree <- function(id, dadid, momid,
sex, affected,
status, relation,
famid, missid, max_message_n = 5) {
n <- length(id)
## Code transferred from noweb to markdown vignette.
## Sections from the noweb/vignettes are noted here with
## Doc: Error and Data Checks
pedigree.idcheck(
id = id,
momid = momid,
dadid = dadid,
sex = sex
)
id <- pedigree.idrepair(id = id)
# momid <- pedigree.idrepair(id = momid)
# dadid <- pedigree.idrepair(id = dadid)
sex <- pedigree.sexrepair(sex = sex)
if (missing(missid)) {
missid <- pedigree.makemissingid(id = id)
}
nofather <- (is.na(dadid) | dadid == missid)
nomother <- (is.na(momid) | momid == missid)
if (!missing(famid)) {
if (any(is.na(famid))) stop("The family id cannot contain missing values")
if (is.factor(famid) || is.character(famid)) {
if (length(grep("^ *$", famid)) > 0) {
stop("The family id cannot be a blank or empty string")
}
}
# Make a temporary new id from the family and subject pair
oldid <- id
id <- paste(as.character(famid), as.character(id), sep = "/")
dadid <- paste(as.character(famid), as.character(dadid), sep = "/")
momid <- paste(as.character(famid), as.character(momid), sep = "/")
}
has_famid <- !missing(famid)
if (any(duplicated(id))) {
duplist <- id[duplicated(id)]
msg.n <- min(length(duplist), max_message_n)
stop(paste("Duplicate subject id:", duplist[1:msg.n]))
}
findex <- match(dadid, id, nomatch = 0)
if (any(sex[findex] != "male")) {
who <- unique((id[findex])[sex[findex] != "male"])
msg.n <- seq_len(min(max_message_n, length(who))) # Don't list a zillion
stop(paste(
"Id not male, but is a father:",
paste(who[msg.n], collapse = " ")
))
}
if (any(findex == 0 & !nofather)) {
who <- dadid[which(findex == 0 & !nofather)]
msg.n <- seq_len(min(max_message_n, length(who))) # Don't list a zillion
stop(paste(
"Value of 'dadid' not found in the id list",
paste(who[msg.n], collapse = " ")
))
}
mindex <- match(momid, id, nomatch = 0)
if (any(sex[mindex] != "female")) {
who <- unique((id[mindex])[sex[mindex] != "female"])
msg.n <- seq_len(min(max_message_n, length(who)))
stop(paste(
"Id not female, but is a mother:",
paste(who[msg.n], collapse = " ")
))
}
if (any(mindex == 0 & !nomother)) {
who <- momid[which(mindex == 0 & !nomother)]
msg.n <- seq_len(min(max_message_n, length(who))) # Don't list a zillion
stop(paste(
"Value of 'momid' not found in the id list",
paste(who[msg.n], collapse = " ")
))
}
if (any(mindex == 0 & findex != 0) || any(mindex != 0 & findex == 0)) {
who <- id[which((mindex == 0 & findex != 0) | (mindex != 0 & findex == 0))]
msg.n <- seq_len(min(max_message_n, length(who))) # Don't list a zillion
stop(paste(
"Subjects must have both a father and mother, or have neither",
paste(who[msg.n], collapse = " ")
))
}
if (!missing(famid)) {
if (any(famid[mindex] != famid[mindex > 0])) {
who <- (id[mindex > 0])[famid[mindex] != famid[mindex > 0]]
msg.n <- seq_len(min(max_message_n, length(who)))
stop(paste(
"Mother's family != subject's family",
paste(who[msg.n], collapse = " ")
))
}
if (any(famid[findex] != famid[findex > 0])) {
who <- (id[findex > 0])[famid[findex] != famid[findex > 0]]
msg.n <- seq_len(min(max_message_n, length(who)))
stop(paste(
"Father's family != subject's family",
paste(who[msg.n], collapse = " ")
))
}
}
## Doc: Creation of Pedigrees
if (missing(famid)) {
temp <- list(id = id, findex = findex, mindex = mindex, sex = sex)
} else {
temp <- list(
famid = famid, id = oldid, findex = findex, mindex = mindex,
sex = sex
)
}
if (!missing(affected)) {
temp$affected <- pedigree.process_affected(affected, n)
}
if (!missing(status)) {
temp$status <- pedigree.process_status(status, n)
}
if (!missing(relation)) {
temp$relation <- pedigree.process_relation(
relation = relation,
has_famid = has_famid,
famid = if (has_famid) famid else NULL,
id = id,
momid = momid,
dadid = dadid,
sex = sex
)
}
## Doc: Finish
if (missing(famid)) {
class(temp) <- "pedigree"
} else {
class(temp) <- "pedigreeList"
}
temp
}
#' @keywords internal
#' @noRd
pedigree.process_relation <- function(relation,
has_famid,
famid,
id,
momid,
dadid,
sex) {
rel_parsed <- pedigree.parse_relation(relation,
has_famid = has_famid
)
id1 <- rel_parsed$id1
id2 <- rel_parsed$id2
rel_code <- pedigree.coerce_relation_code(rel_parsed$code)
rel_famid <- rel_parsed$famid
# Ensure everyone in the relationship is in the pedigree
if (has_famid) {
key1 <- paste(as.character(rel_famid), as.character(id1), sep = "/")
key2 <- paste(as.character(rel_famid), as.character(id2), sep = "/")
} else {
key1 <- id1
key2 <- id2
}
indx1 <- match(key1, id, nomatch = 0L)
indx2 <- match(key2, id, nomatch = 0L)
if (any(indx1 == 0L | indx2 == 0L)) {
stop("Subjects in relationships that are not in the pedigree")
}
if (any(indx1 == indx2)) {
who <- indx1[indx1 == indx2]
stop(paste("Subject", id[who], "is their own spouse or twin"))
}
# Twin consistency checks
numeric_code <- as.numeric(rel_code)
if (any(numeric_code < 3L)) {
twins <- numeric_code < 3L
if (any(momid[indx1[twins]] != momid[indx2[twins]])) {
stop("Twins found with different mothers")
}
if (any(dadid[indx1[twins]] != dadid[indx2[twins]])) {
stop("Twins found with different fathers")
}
}
# MZ twin sex check
if (any(rel_code == "MZ twin")) {
mztwins <- rel_code == "MZ twin"
if (any(sex[indx1[mztwins]] != sex[indx2[mztwins]])) {
stop("MZ twins with different sexes")
}
}
if (has_famid == TRUE) {
data.frame(
famid = rel_famid,
indx1 = indx1,
indx2 = indx2,
code = rel_code
)
} else {
data.frame(
indx1 = indx1,
indx2 = indx2,
code = rel_code
)
}
}
pedigree.parse_relation <- function(relation, has_famid) {
if (is.matrix(relation)) {
expected_cols <- if (has_famid) 4L else 3L
if (ncol(relation) != expected_cols) {
if (has_famid) {
stop("Relation matrix must have 3 columns + famid")
} else {
stop("Relation matrix must have 3 columns: id1, id2, code")
}
}
id1 <- relation[, 1]
id2 <- relation[, 2]
code <- relation[, 3]
fam <- if (has_famid) relation[, 4] else NULL
} else if (is.data.frame(relation)) {
id1 <- relation$id1
id2 <- relation$id2
code <- relation$code
fam <- if (has_famid) relation$famid else NULL
if (is.null(id1) || is.null(id2) || is.null(code) ||
(has_famid && is.null(fam))) {
if (has_famid) {
stop("Relation data must have id1, id2, code, and family id")
} else {
stop("Relation data frame must have id1, id2, and code")
}
}
} else {
if (has_famid) {
stop("Relation argument must be a matrix or a dataframe")
} else {
stop("Relation argument must be a matrix or a list")
}
}
list(
id1 = id1,
id2 = id2,
code = code,
famid = fam
)
}
#'
#' @keywords internal
#' @noRd
#' @inheritParams pedigree
pedigree.idcheck <- function(id, momid, dadid, sex) {
n <- length(id)
if (length(momid) != n) stop("Mismatched lengths, id and momid")
if (length(dadid) != n) stop("Mismatched lengths, id and dadid")
if (length(sex) != n) stop("Mismatched lengths, id and sex")
if (any(is.na(id))) stop("Missing value for the id variable")
}
#' @keywords internal
#' @noRd
#' @inheritParams pedigree
pedigree.idrepair <- function(id) {
if (!is.numeric(id)) {
id <- as.character(id)
if (length(grep("^ *$", id)) > 0) {
stop("A blank or empty string is not allowed as the id variable")
}
}
id
}
#' @keywords internal
#' @noRd
#' @inheritParams pedigree
pedigree.sexrepair <- function(sex) {
# Allow for character/numeric
# Allow for character/numeric/factor in the sex variable
if (is.factor(sex)) {
sex <- as.character(sex)
}
codes <- c("male", "female", "unknown", "terminated")
if (is.character(sex)) {
# if all numeric strings, convert to numeric
if (length(unique(suppressWarnings(as.numeric(sex)))) > 1 &&
all(is.numeric(unique(suppressWarnings(as.numeric(sex)))), na.rm = TRUE)) {
sex <- as.numeric(sex)
} else {
sex <- charmatch(casefold(sex, upper = FALSE), codes,
nomatch = 3
)
}
}
# assume either 0/1/2/4 = female/male/unknown/term, or 1/2/3/4
# if only 1/2 assume no unknowns
if (min(sex) == 0) {
sex <- sex + 1
}
sex <- ifelse(sex < 1 | sex > 4, 3, sex)
if (all(sex > 2)) {
stop(
"All sex values are labeled as unknown. Please try using config options ",
"to specify male, female, and unknown labels ",
"(code_male, code_female, code_unknown)"
)
} else if (mean(sex == 3) > 0.25) {
warning(
"More than 25% of the sex values are 'unknown'. ",
"Please try using config options to specify male, female, and unknown labels."
)
}
sex <- factor(sex, 1:4, labels = codes)
sex
}
#' @keywords internal
#' @noRd
pedigree.process_status <- function(status, n) {
if (length(status) != n) {
stop("Wrong length for affected")
}
if (is.logical(status)) status <- as.integer(status)
if (any(status != 0L & status != 1L)) {
stop("Invalid status code")
}
status
}
#' @keywords internal
#' @noRd
pedigree.process_affected <- function(affected, n) {
if (is.matrix(affected)) {
if (nrow(affected) != n) stop("Wrong number of rows in affected")
if (is.logical(affected)) affected <- 1L * affected
} else {
if (length(affected) != n) {
stop("Wrong length for affected")
}
if (is.logical(affected)) affected <- as.numeric(affected)
if (is.factor(affected)) affected <- as.numeric(affected) - 1
}
if (max(affected, na.rm = TRUE) > min(affected, na.rm = TRUE)) {
affected <- affected - min(affected, na.rm = TRUE)
}
if (!all(affected == 0 | affected == 1 | is.na(affected))) {
stop("Invalid code for affected status")
}
affected
}
## Doc: Subscripting a pedigree
#' @rdname pedigree
#' @export
"[.pedigreeList" <- function(x, ..., drop = FALSE) {
if (length(list(...)) != 1) stop("Only 1 subscript allowed")
ufam <- unique(x$famid)
if (is.factor(..1) || is.character(..1)) {
indx <- ufam[match(..1, ufam)]
} else {
indx <- ufam[..1]
}
if (any(is.na(indx))) {
stop(paste("Family", (..1[is.na(indx)])[1], "not found"))
}
keep <- which(x$famid %in% indx) # which rows to keep
for (i in c("id", "famid", "sex")) {
x[[i]] <- (x[[i]])[keep]
}
kept.rows <- seq_along(x$findex)[keep]
x$findex <- match(x$findex[keep], kept.rows, nomatch = 0)
x$mindex <- match(x$mindex[keep], kept.rows, nomatch = 0)
# optional components
if (!is.null(x$status)) x$status <- x$status[keep]
if (!is.null(x$affected)) {
if (is.matrix(x$affected)) {
x$affected <- x$affected[keep, , drop = FALSE]
} else {
x$affected <- x$affected[keep]
}
}
if (!is.null(x$relation)) {
keep <- !is.na(match(x$relation$famid, indx))
if (any(keep)) {
x$relation <- x$relation[keep, , drop = FALSE]
## Update twin id indexes
x$relation$indx1 <- match(x$relation$indx1, kept.rows, nomatch = 0)
x$relation$indx2 <- match(x$relation$indx2, kept.rows, nomatch = 0)
## If only one family chosen, remove famid
if (length(indx) == 1) {
x$relation$famid <- NULL
}
} else {
x$relation <- NULL
} # No relations matrix elements for this family
}
if (length(indx) == 1) {
class(x) <- "pedigree"
# only one family chosen
} else {
class(x) <- "pedigreeList"
}
x
}
#' @rdname pedigree
#' @export
"[.pedigree" <- function(x, ..., drop = FALSE) {
if (length(list(...)) != 1) stop("Only 1 subscript allowed")
if (is.character(..1) || is.factor(..1)) {
i <- match(..1, x$id)
} else {
i <- seq_along(x$id)[..1]
}
if (any(is.na(i))) paste("Subject", ..1[which(is.na(i))][1], "not found")
z <- list(
id = x$id[i], findex = match(x$findex[i], i, nomatch = 0),
mindex = match(x$mindex[i], i, nomatch = 0),
sex = x$sex[i]
)
if (!is.null(x$affected)) {
if (is.matrix(x$affected)) {
z$affected <- x$affected[i, , drop = FALSE]
} else {
z$affected <- x$affected[i]
}
}
if (!is.null(x$famid)) z$famid <- x$famid[i]
if (!is.null(x$relation)) {
indx1 <- match(x$relation$indx1, i, nomatch = 0)
indx2 <- match(x$relation$indx2, i, nomatch = 0)
keep <- (indx1 > 0 & indx2 > 0) # keep only if both id's are kept
if (any(keep)) {
z$relation <- x$relation[keep, , drop = FALSE]
z$relation$indx1 <- indx1[keep]
z$relation$indx2 <- indx2[keep]
}
}
if (!is.null(x$hints)) {
temp <- list(order = x$hints$order[i])
if (!is.null(x$hints$spouse)) {
indx1 <- match(x$hints$spouse[, 1], i, nomatch = 0)
indx2 <- match(x$hints$spouse[, 2], i, nomatch = 0)
keep <- (indx1 > 0 & indx2 > 0) # keep only if both id's are kept
if (any(keep)) {
temp$spouse <- cbind(
indx1[keep], indx2[keep],
x$hints$spouse[keep, 3]
)
}
}
z$hints <- temp
}
if (any(z$findex == 0 & z$mindex > 0) || any(z$findex > 0 & z$mindex == 0)) {
stop("A subpedigree cannot choose only one parent of a subject")
}
class(z) <- "pedigree"
z
}
#' @keywords internal
#' @noRd
pedigree.coerce_relation_code <- function(code) {
levels <- c("MZ twin", "DZ twin", "UZ twin", "spouse")
if (is.factor(code)) {
code <- as.character(code)
}
if (is.numeric(code)) {
if (any(code < 1L | code > 4L | is.na(code))) {
stop("Invalid relationship code")
}
factor(code, levels = 1:4, labels = levels)
} else {
idx <- match(code, levels)
if (any(is.na(idx))) {
stop("Invalid relationship code")
}
factor(idx, levels = 1:4, labels = levels)
}
}
#' @rdname pedigree
#' @method print pedigree
print.pedigree <- function(x, ...) {
cat("Pedigree object with", length(x$id), "subjects")
if (!is.null(x$famid)) {
cat(", family id=", x$famid[1], "\n")
} else {
cat("\n")
}
cat("Bit size=", kinship2_bitSize(x)$bitSize, "\n")
}
#' @rdname pedigree
#' @method print pedigreeList
print.pedigreeList <- function(x, ...) {
cat(
"Pedigree list with", length(x$id), "total subjects in",
length(unique(x$famid)), "families\n"
)
}
pedigree.makemissingid <- function(id) {
## Doc: Errors2
if (is.numeric(id)) {
0
} else {
""
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.