Nothing
#' Create a pedigree or pedigreeList object
#'
#' Create a pedigree or pedigreeList object
#'
#' @param id Identification variable for individual
#' @param dadid Identification variable for father. Founders' parents should be coded
#' to NA, or another value specified by missid.
#' @param momid Identification variable for mother. Founders' parents should be coded
#' to NA, or another value specified by missid.
#' @param sex Gender of individual noted in `id'. Either character ("male","female",
#' "unknown","terminated") or numeric (1="male", 2="female", 3="unknown", 4="terminated")
#' data is allowed. For character data the string may be truncated, and of arbitrary case.
#' @param affected A variable indicating affection status. A multi-column matrix
#' can be used to give the status with respect to multiple traits. Logical, factor, and
#' integer types are converted to 0/1 representing unaffected and affected, respectively.
#' NAs are considered missing.
#' @param status Censor/Vital status (0="censored", 1="dead")
#' @param relation A matrix with 3 required columns (id1, id2, code) specifying special
#' relationship between pairs of individuals. Codes: 1=Monozygotic twin, 2=Dizygotic twin,
#' 3=Twin of unknown zygosity, 4=Spouse. (The last is necessary in order to place a marriage
#' with no children into the plot.) If famid is given in the call to create pedigrees, then
#' famid needs to be in the last column of the relation matrix. Note for tuples of >= 3 with
#' a mixture of zygosities, the plotting is limited to showing pairwise zygosity of adjacent
#' subjects, so it is only necessary to specify the pairwise zygosity, in the order the subjects
#' are given or appear on the plot.
#' @param famid An optional vector of family identifiers. If it is present the result will
#' contain individual pedigrees for each family in the set, which can be extacted using
#' subscripts. Individuals need to have a unique id \emph{within} family.
#' @param missid The founders are those with no father or mother in the pedigree. The dadid
#' and momid values for these subjects will either be NA or the value of this variable. The
#' default for missid is 0 if the id variable is numeric, and "" (empty string) otherwise.
#' @param x pedigree object in print and subset methods
#' @param ... optional arguments passed to internal functions
#' @param drop logical, used in subset function for dropping dimensionanlity
#' @return An object of class \code{pedigree} or \code{pedigreeList} Containing the following items:
#' famid id findex mindex sex affected status relation
#' @examples
#' data(minnbreast)
#' bpeds <- with(minnbreast,
#' pedigree(id, fatherid, motherid, sex, affected=proband, famid=famid))
#' bped.id8 <- bpeds['8']
#' print(bped.id8)
#' ## show this pedigree with mixed zygosity quadruplets
#' rel8 <- data.frame(id1=c(137,138,139), id2=c(138,139,140), code=c(1,2,2))
#' bped.id8 <- with(minnbreast[minnbreast$famid==8,],
#' pedigree(id, fatherid, motherid, sex, affected=proband, relation=rel8))
#' print(bped.id8)
#' @author Terry Therneau
#' @seealso \code{\link{kinship}}, \code{\link{plot.pedigree}}, \code{\link{autohint}}
#' @name pedigree
NULL
#> NULL
#' @rdname pedigree
#' @export
pedigree <- function(id, dadid, momid, sex, affected, status, relation,
famid, missid) {
n <- length(id)
## Code transferred from noweb to markdown vignette.
## Sections from the noweb/vignettes are noted here with
## Doc: Error and Data Checks
## Doc: Errors1
if (length(momid) != n) stop("Mismatched lengths, id and momid")
if (length(dadid) != n) stop("Mismatched lengths, id and momid")
if (length(sex ) != n) stop("Mismatched lengths, id and sex")
# Don't allow missing id values
if (any(is.na(id))) stop("Missing value for the id variable")
if (!is.numeric(id)) {
id <- as.character(id)
if (length(grep('^ *$', id)) > 0)
stop("A blank or empty string is not allowed as the id variable")
}
# Allow for character/numeric/factor in the sex variable
if(is.factor(sex))
sex <- as.character(sex)
codes <- c("male","female", "unknown", "terminated")
if(is.character(sex)) sex<- charmatch(casefold(sex, upper = FALSE), codes,
nomatch = 3)
# assume either 0/1/2/4 = female/male/unknown/term, or 1/2/3/4
# if only 1/2 assume no unknowns
if(min(sex) == 0)
sex <- sex + 1
sex <- ifelse(sex < 1 | sex > 4, 3, sex)
if(all(sex > 2))
stop("Invalid values for 'sex'")
else if(mean(sex == 3) > 0.25)
warning("More than 25% of the gender values are 'unknown'")
sex <- factor(sex, 1:4, labels = codes)
## Doc: Errors2
if (missing(missid)) {
if (is.numeric(id)) missid <- 0
else missid <- ""
}
nofather <- (is.na(dadid) | dadid==missid)
nomother <- (is.na(momid) | momid==missid)
if (!missing(famid)) {
if (any(is.na(famid))) stop("The family id cannot contain missing values")
if (is.factor(famid) || is.character(famid)) {
if (length(grep('^ *$', famid)) > 0)
stop("The family id cannot be a blank or empty string")
}
#Make a temporary new id from the family and subject pair
oldid <-id
id <- paste(as.character(famid), as.character(id), sep='/')
dadid <- paste(as.character(famid), as.character(dadid), sep='/')
momid <- paste(as.character(famid), as.character(momid), sep='/')
}
if (any(duplicated(id))) {
duplist <- id[duplicated(id)]
msg.n <- min(length(duplist), 6)
stop(paste("Duplicate subject id:", duplist[1:msg.n]))
}
findex <- match(dadid, id, nomatch = 0)
if(any(sex[findex] != "male")) {
who <- unique((id[findex])[sex[findex] != "male"])
msg.n <- 1:min(5, length(who)) #Don't list a zillion
stop(paste("Id not male, but is a father:",
paste(who[msg.n], collapse= " ")))
}
if (any(findex==0 & !nofather)) {
who <- dadid[which(findex==0 & !nofather)]
msg.n <- 1:min(5, length(who)) #Don't list a zillion
stop(paste("Value of 'dadid' not found in the id list",
paste(who[msg.n], collapse= " ")))
}
mindex <- match(momid, id, nomatch = 0)
if(any(sex[mindex] != "female")) {
who <- unique((id[mindex])[sex[mindex] != "female"])
msg.n <- 1:min(5, length(who))
stop(paste("Id not female, but is a mother:",
paste(who[msg.n], collapse = " ")))
}
if (any(mindex==0 & !nomother)) {
who <- momid[which(mindex==0 & !nomother)]
msg.n <- 1:min(5, length(who)) #Don't list a zillion
stop(paste("Value of 'momid' not found in the id list",
paste(who[msg.n], collapse= " ")))
}
if (any(mindex==0 & findex!=0) || any(mindex!=0 & findex==0)) {
who <- id[which((mindex==0 & findex!=0) |(mindex!=0 & findex==0))]
msg.n <- 1:min(5, length(who)) #Don't list a zillion
stop(paste("Subjects must have both a father and mother, or have neither",
paste(who[msg.n], collapse= " ")))
}
if (!missing(famid)) {
if (any(famid[mindex] != famid[mindex>0])) {
who <- (id[mindex>0])[famid[mindex] != famid[mindex>0]]
msg.n <- 1:min(5, length(who))
stop(paste("Mother's family != subject's family",
paste(who[msg.n], collapse=" ")))
}
if (any(famid[findex] != famid[findex>0])) {
who <- (id[findex>0])[famid[findex] != famid[findex>0]]
msg.n <- 1:min(5, length(who))
stop(paste("Father's family != subject's family",
paste(who[msg.n], collapse=" ")))
}
}
## Doc: Creation of Pedigrees
if (missing(famid))
temp <- list(id = id, findex=findex, mindex=mindex, sex=sex)
else temp<- list(famid=famid, id=oldid, findex=findex, mindex=mindex,
sex=sex)
if (!missing(affected)) {
if (is.matrix(affected)){
if (nrow(affected) != n) stop("Wrong number of rows in affected")
if (is.logical(affected)) affected <- 1* affected
}
else {
if (length(affected) != n)
stop("Wrong length for affected")
if (is.logical(affected)) affected <- as.numeric(affected)
if (is.factor(affected)) affected <- as.numeric(affected) -1
}
if(max(affected, na.rm=TRUE) > min(affected, na.rm=TRUE))
affected <- affected - min(affected, na.rm=TRUE)
if (!all(affected==0 | affected==1 | is.na(affected)))
stop("Invalid code for affected status")
temp$affected <- affected
}
if(!missing(status)) {
if(length(status) != n)
stop("Wrong length for affected")
if (is.logical(status)) status <- as.integer(status)
if(any(status != 0 & status != 1))
stop("Invalid status code")
temp$status <- status
}
if (!missing(relation)) {
if (!missing(famid)) {
if (is.matrix(relation)) {
if (ncol(relation) != 4)
stop("Relation matrix must have 3 columns + famid")
id1 <- relation[,1]
id2 <- relation[,2]
code <- relation[,3]
famid <- relation[,4]
}
else if (is.data.frame(relation)) {
id1 <- relation$id1
id2 <- relation$id2
code <- relation$code
famid <- relation$famid
if (is.null(id1) || is.null(id2) || is.null(code) ||is.null(famid))
stop("Relation data must have id1, id2, code, and family id")
}
else stop("Relation argument must be a matrix or a dataframe")
}
else {
if (is.matrix(relation)) {
if (ncol(relation) != 3)
stop("Relation matrix must have 3 columns: id1, id2, code")
id1 <- relation[,1]
id2 <- relation[,2]
code <- relation[,3]
}
else if (is.data.frame(relation)) {
id1 <- relation$id1
id2 <- relation$id2
code <- relation$code
if (is.null(id1) || is.null(id2) || is.null(code))
stop("Relation data frame must have id1, id2, and code")
}
else stop("Relation argument must be a matrix or a list")
}
if (!is.numeric(code))
code <- match(code, c("MZ twin", "DZ twin", "UZ twin", "spouse"))
else code <- factor(code, levels=1:4,
labels=c("MZ twin", "DZ twin", "UZ twin", "spouse"))
if (any(is.na(code)))
stop("Invalid relationship code")
# Is everyone in this relationship in the pedigree?
if (!missing(famid)) {
temp1 <- match(paste(as.character(famid), as.character(id1), sep='/'),
id, nomatch=0)
temp2 <- match(paste(as.character(famid), as.character(id2), sep='/'),
id, nomatch=0)
}
else {
temp1 <- match(id1, id, nomatch=0)
temp2 <- match(id2, id, nomatch=0)
}
if (any(temp1==0 | temp2==0))
stop("Subjects in relationships that are not in the pedigree")
if (any(temp1==temp2)) {
who <- temp1[temp1==temp2]
stop(paste("Subject", id[who], "is their own spouse or twin"))
}
# Check, are the twins really twins?
ncode <- as.numeric(code)
if (any(ncode<3)) {
twins <- (ncode<3)
if (any(momid[temp1[twins]] != momid[temp2[twins]]))
stop("Twins found with different mothers")
if (any(dadid[temp1[twins]] != dadid[temp2[twins]]))
stop("Twins found with different fathers")
}
# Check, are the monozygote twins the same gender?
if (any(code=="MZ twin")) {
mztwins <- (code=="MZ twin")
if (any(sex[temp1[mztwins]] != sex[temp2[mztwins]]))
stop("MZ Twins with different genders")
}
##Use id index as indx1 and indx2
if (!missing(famid)) {
temp$relation <- data.frame(famid=famid, indx1=temp1, indx2=temp2, code=code)
}
else temp$relation <- data.frame(indx1=temp1, indx2=temp2, code=code)
}
## Doc: Finish
if (missing(famid)) class(temp) <- 'pedigree'
else class(temp) <- 'pedigreeList'
temp
}
## Doc: Subscripting a pedigree
#' @rdname pedigree
#' @export
"[.pedigreeList" <- function(x, ..., drop=FALSE) {
if (length(list(...)) != 1) stop ("Only 1 subscript allowed")
ufam <- unique(x$famid)
if (is.factor(..1) || is.character(..1)) indx <- ufam[match(..1, ufam)]
else indx <- ufam[..1]
if (any(is.na(indx)))
stop(paste("Familiy", (..1[is.na(indx)])[1], "not found"))
keep <- which(x$famid %in% indx) #which rows to keep
for (i in c('id', 'famid', 'sex'))
x[[i]] <- (x[[i]])[keep]
kept.rows <- (1:length(x$findex))[keep]
x$findex <- match(x$findex[keep], kept.rows, nomatch=0)
x$mindex <- match(x$mindex[keep], kept.rows, nomatch=0)
#optional components
if (!is.null(x$status)) x$status <- x$status[keep]
if (!is.null(x$affected)) {
if (is.matrix(x$affected)) x$affected <- x$affected[keep,,drop=FALSE]
else x$affected <- x$affected[keep]
}
if (!is.null(x$relation)) {
keep <- !is.na(match(x$relation$famid, indx))
if (any(keep)) {
x$relation <- x$relation[keep,,drop=FALSE]
##Update twin id indexes
x$relation$indx1 <- match(x$relation$indx1, kept.rows, nomatch=0)
x$relation$indx2 <- match(x$relation$indx2, kept.rows, nomatch=0)
##If only one family chosen, remove famid
if (length(indx)==1) {x$relation$famid <- NULL}
}
else x$relation <- NULL # No relations matrix elements for this family
}
if (length(indx)==1) class(x) <- 'pedigree' #only one family chosen
else class(x) <- 'pedigreeList'
x
}
#' @rdname pedigree
#' @export
"[.pedigree" <- function(x, ..., drop=FALSE) {
if (length(list(...)) != 1) stop ("Only 1 subscript allowed")
if (is.character(..1) || is.factor(..1)) i <- match(..1, x$id)
else i <- (1:length(x$id))[..1]
if (any(is.na(i))) paste("Subject", ..1[which(is.na(i))][1], "not found")
z <- list(id=x$id[i],findex=match(x$findex[i], i, nomatch=0),
mindex=match(x$mindex[i], i, nomatch=0),
sex=x$sex[i])
if (!is.null(x$affected)) {
if (is.matrix(x$affected)) z$affected <- x$affected[i,, drop=F]
else z$affected <- x$affected[i]
}
if (!is.null(x$famid)) z$famid <- x$famid[i]
if (!is.null(x$relation)) {
indx1 <- match(x$relation$indx1, i, nomatch=0)
indx2 <- match(x$relation$indx2, i, nomatch=0)
keep <- (indx1 >0 & indx2 >0) #keep only if both id's are kept
if (any(keep)) {
z$relation <- x$relation[keep,,drop=FALSE]
z$relation$indx1 <- indx1[keep]
z$relation$indx2 <- indx2[keep]
}
}
if (!is.null(x$hints)) {
temp <- list(order= x$hints$order[i])
if (!is.null(x$hints$spouse)) {
indx1 <- match(x$hints$spouse[,1], i, nomatch=0)
indx2 <- match(x$hints$spouse[,2], i, nomatch=0)
keep <- (indx1 >0 & indx2 >0) #keep only if both id's are kept
if (any(keep))
temp$spouse <- cbind(indx1[keep], indx2[keep],
x$hints$spouse[keep,3])
}
z$hints <- temp
}
if (any(z$findex==0 & z$mindex>0) | any(z$findex>0 & z$mindex==0))
stop("A subpedigree cannot choose only one parent of a subject")
class(z) <- 'pedigree'
z
}
#' @rdname pedigree
#' @method print pedigree
#' @export
print.pedigree <- function(x, ...) {
cat("Pedigree object with", length(x$id), "subjects")
if (!is.null(x$famid)) cat(", family id=", x$famid[1], "\n")
else cat("\n")
cat("Bit size=", bitSize(x)$bitSize, "\n")
}
#' @rdname pedigree
#' @method print pedigreeList
#' @export
print.pedigreeList <- function(x, ...) {
cat("Pedigree list with", length(x$id), "total subjects in",
length(unique(x$famid)), "families\n")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.