R/kinship2_pedigree.R

Defines functions pedigree.makemissingid print.pedigreeList print.pedigree pedigree.coerce_relation_code pedigree.process_affected pedigree.process_status pedigree.sexrepair pedigree.idrepair pedigree.idcheck pedigree.parse_relation pedigree.process_relation pedigree

Documented in pedigree print.pedigree print.pedigreeList

#' Create a pedigree or pedigreeList object
#'
#' @param id Identification variable for individual
#' @param dadid Identification variable for father. Founders' parents should be coded
#' to NA, or another value specified by missid.
#' @param momid Identification variable for mother. Founders' parents should be coded
#' to NA, or another value specified by missid.
#' @param sex Gender of individual noted in `id'. Either character ("male","female",
#' "unknown","terminated") or numeric (1="male", 2="female", 3="unknown", 4="terminated")
#' data is allowed.  For character data the string may be truncated, and of arbitrary case.
#' @param affected A variable indicating affection status.  A multi-column matrix
#' can be used to give the status with respect to multiple traits. Logical, factor, and
#' integer types are converted to 0/1 representing unaffected and affected, respectively.
#' NAs are considered missing.
#' @param status Censor/Vital status (0="censored", 1="dead")
#' @param relation A matrix with 3 required columns (id1, id2, code) specifying special
#' relationship between pairs of individuals. Codes: 1=Monozygotic twin,  2=Dizygotic twin,
#' 3=Twin of unknown zygosity, 4=Spouse. (The last is necessary in order to place a marriage
#' with no children into the plot.) If famid is given in the call to create pedigrees, then
#' famid needs to be in the last column of the relation matrix. Note for tuples of >= 3 with
#' a mixture of zygosities, the plotting is limited to showing pairwise zygosity of adjacent
#' subjects, so it is only necessary to specify the pairwise zygosity, in the order the subjects
#' are given or appear on the plot.
#' @param famid An optional vector of family identifiers.  If it is present the result will
#' contain individual pedigrees for each family in the set, which can be extacted using
#' subscripts. Individuals need to have a unique id \emph{within} family.
#' @param missid The founders are those with no father or mother in the pedigree. The dadid
#' and momid values for these subjects will either be NA or the value of this variable. The
#' default for missid is 0 if the id variable is numeric, and "" (empty string) otherwise.
#' @param x pedigree object in print and subset methods
#' @param max_message_n max number of individuals to list in error messages
#' @param ... optional arguments passed to internal functions
#' @param drop logical, used in subset function for dropping dimensionality
#' @return An object of class \code{pedigree} or \code{pedigreeList} Containing the following items:
#'  famid id findex mindex sex  affected status relation
#' @author Terry Therneau
#' @name pedigree
#' @export


pedigree <- function(id, dadid, momid,
                     sex, affected,
                     status, relation,
                     famid, missid, max_message_n = 5) {
  n <- length(id)
  ## Code transferred from noweb to markdown vignette.
  ## Sections from the noweb/vignettes are noted here with
  ## Doc: Error and Data Checks

  pedigree.idcheck(
    id = id,
    momid = momid,
    dadid = dadid,
    sex = sex
  )


  id <- pedigree.idrepair(id = id)

  # momid <- pedigree.idrepair(id = momid)
  # dadid <- pedigree.idrepair(id = dadid)

  sex <- pedigree.sexrepair(sex = sex)

  if (missing(missid)) {
    missid <- pedigree.makemissingid(id = id)
  }

  nofather <- (is.na(dadid) | dadid == missid)
  nomother <- (is.na(momid) | momid == missid)

  if (!missing(famid)) {
    if (any(is.na(famid))) stop("The family id cannot contain missing values")
    if (is.factor(famid) || is.character(famid)) {
      if (length(grep("^ *$", famid)) > 0) {
        stop("The family id cannot be a blank or empty string")
      }
    }
    # Make a temporary new id from the family and subject pair
    oldid <- id
    id <- paste(as.character(famid), as.character(id), sep = "/")
    dadid <- paste(as.character(famid), as.character(dadid), sep = "/")
    momid <- paste(as.character(famid), as.character(momid), sep = "/")
  }
  has_famid <- !missing(famid)

  if (any(duplicated(id))) {
    duplist <- id[duplicated(id)]
    msg.n <- min(length(duplist), max_message_n)
    stop(paste("Duplicate subject id:", duplist[1:msg.n]))
  }
  findex <- match(dadid, id, nomatch = 0)
  if (any(sex[findex] != "male")) {
    who <- unique((id[findex])[sex[findex] != "male"])
    msg.n <- seq_len(min(max_message_n, length(who))) # Don't list a zillion
    stop(paste(
      "Id not male, but is a father:",
      paste(who[msg.n], collapse = " ")
    ))
  }

  if (any(findex == 0 & !nofather)) {
    who <- dadid[which(findex == 0 & !nofather)]
    msg.n <- seq_len(min(max_message_n, length(who))) # Don't list a zillion
    stop(paste(
      "Value of 'dadid' not found in the id list",
      paste(who[msg.n], collapse = " ")
    ))
  }

  mindex <- match(momid, id, nomatch = 0)

  if (any(sex[mindex] != "female")) {
    who <- unique((id[mindex])[sex[mindex] != "female"])
    msg.n <- seq_len(min(max_message_n, length(who)))
    stop(paste(
      "Id not female, but is a mother:",
      paste(who[msg.n], collapse = " ")
    ))
  }

  if (any(mindex == 0 & !nomother)) {
    who <- momid[which(mindex == 0 & !nomother)]
    msg.n <- seq_len(min(max_message_n, length(who))) # Don't list a zillion
    stop(paste(
      "Value of 'momid' not found in the id list",
      paste(who[msg.n], collapse = " ")
    ))
  }

  if (any(mindex == 0 & findex != 0) || any(mindex != 0 & findex == 0)) {
    who <- id[which((mindex == 0 & findex != 0) | (mindex != 0 & findex == 0))]
    msg.n <- seq_len(min(max_message_n, length(who))) # Don't list a zillion
    stop(paste(
      "Subjects must have both a father and mother, or have neither",
      paste(who[msg.n], collapse = " ")
    ))
  }

  if (!missing(famid)) {
    if (any(famid[mindex] != famid[mindex > 0])) {
      who <- (id[mindex > 0])[famid[mindex] != famid[mindex > 0]]
      msg.n <- seq_len(min(max_message_n, length(who)))
      stop(paste(
        "Mother's family != subject's family",
        paste(who[msg.n], collapse = " ")
      ))
    }
    if (any(famid[findex] != famid[findex > 0])) {
      who <- (id[findex > 0])[famid[findex] != famid[findex > 0]]
      msg.n <- seq_len(min(max_message_n, length(who)))
      stop(paste(
        "Father's family != subject's family",
        paste(who[msg.n], collapse = " ")
      ))
    }
  }
  ## Doc: Creation of Pedigrees
  if (missing(famid)) {
    temp <- list(id = id, findex = findex, mindex = mindex, sex = sex)
  } else {
    temp <- list(
      famid = famid, id = oldid, findex = findex, mindex = mindex,
      sex = sex
    )
  }

  if (!missing(affected)) {
    temp$affected <- pedigree.process_affected(affected, n)
  }


  if (!missing(status)) {
    temp$status <- pedigree.process_status(status, n)
  }

  if (!missing(relation)) {
    temp$relation <- pedigree.process_relation(
      relation = relation,
      has_famid = has_famid,
      famid = if (has_famid) famid else NULL,
      id = id,
      momid = momid,
      dadid = dadid,
      sex = sex
    )
  }
  ## Doc: Finish
  if (missing(famid)) {
    class(temp) <- "pedigree"
  } else {
    class(temp) <- "pedigreeList"
  }
  temp
}

#' @keywords internal
#' @noRd
pedigree.process_relation <- function(relation,
                                      has_famid,
                                      famid,
                                      id,
                                      momid,
                                      dadid,
                                      sex) {
  rel_parsed <- pedigree.parse_relation(relation,
    has_famid = has_famid
  )
  id1 <- rel_parsed$id1
  id2 <- rel_parsed$id2
  rel_code <- pedigree.coerce_relation_code(rel_parsed$code)
  rel_famid <- rel_parsed$famid

  # Ensure everyone in the relationship is in the pedigree
  if (has_famid) {
    key1 <- paste(as.character(rel_famid), as.character(id1), sep = "/")
    key2 <- paste(as.character(rel_famid), as.character(id2), sep = "/")
  } else {
    key1 <- id1
    key2 <- id2
  }

  indx1 <- match(key1, id, nomatch = 0L)
  indx2 <- match(key2, id, nomatch = 0L)

  if (any(indx1 == 0L | indx2 == 0L)) {
    stop("Subjects in relationships that are not in the pedigree")
  }
  if (any(indx1 == indx2)) {
    who <- indx1[indx1 == indx2]
    stop(paste("Subject", id[who], "is their own spouse or twin"))
  }

  # Twin consistency checks
  numeric_code <- as.numeric(rel_code)
  if (any(numeric_code < 3L)) {
    twins <- numeric_code < 3L
    if (any(momid[indx1[twins]] != momid[indx2[twins]])) {
      stop("Twins found with different mothers")
    }
    if (any(dadid[indx1[twins]] != dadid[indx2[twins]])) {
      stop("Twins found with different fathers")
    }
  }

  # MZ twin sex check
  if (any(rel_code == "MZ twin")) {
    mztwins <- rel_code == "MZ twin"
    if (any(sex[indx1[mztwins]] != sex[indx2[mztwins]])) {
      stop("MZ twins with different sexes")
    }
  }

  if (has_famid == TRUE) {
    data.frame(
      famid = rel_famid,
      indx1 = indx1,
      indx2 = indx2,
      code  = rel_code
    )
  } else {
    data.frame(
      indx1 = indx1,
      indx2 = indx2,
      code  = rel_code
    )
  }
}

pedigree.parse_relation <- function(relation, has_famid) {
  if (is.matrix(relation)) {
    expected_cols <- if (has_famid) 4L else 3L
    if (ncol(relation) != expected_cols) {
      if (has_famid) {
        stop("Relation matrix must have 3 columns + famid")
      } else {
        stop("Relation matrix must have 3 columns: id1, id2, code")
      }
    }
    id1 <- relation[, 1]
    id2 <- relation[, 2]
    code <- relation[, 3]
    fam <- if (has_famid) relation[, 4] else NULL
  } else if (is.data.frame(relation)) {
    id1 <- relation$id1
    id2 <- relation$id2
    code <- relation$code
    fam <- if (has_famid) relation$famid else NULL

    if (is.null(id1) || is.null(id2) || is.null(code) ||
      (has_famid && is.null(fam))) {
      if (has_famid) {
        stop("Relation data must have id1, id2, code, and family id")
      } else {
        stop("Relation data frame must have id1, id2, and code")
      }
    }
  } else {
    if (has_famid) {
      stop("Relation argument must be a matrix or a dataframe")
    } else {
      stop("Relation argument must be a matrix or a list")
    }
  }

  list(
    id1 = id1,
    id2 = id2,
    code = code,
    famid = fam
  )
}

#'
#' @keywords internal
#' @noRd
#' @inheritParams pedigree

pedigree.idcheck <- function(id, momid, dadid, sex) {
  n <- length(id)
  if (length(momid) != n) stop("Mismatched lengths, id and momid")
  if (length(dadid) != n) stop("Mismatched lengths, id and dadid")
  if (length(sex) != n) stop("Mismatched lengths, id and sex")
  if (any(is.na(id))) stop("Missing value for the id variable")
}

#' @keywords internal
#' @noRd
#' @inheritParams pedigree

pedigree.idrepair <- function(id) {
  if (!is.numeric(id)) {
    id <- as.character(id)
    if (length(grep("^ *$", id)) > 0) {
      stop("A blank or empty string is not allowed as the id variable")
    }
  }
  id
}

#' @keywords internal
#' @noRd
#' @inheritParams pedigree

pedigree.sexrepair <- function(sex) {
  # Allow for character/numeric
  # Allow for character/numeric/factor in the sex variable
  if (is.factor(sex)) {
    sex <- as.character(sex)
  }
  codes <- c("male", "female", "unknown", "terminated")
  if (is.character(sex)) {
    # if all numeric strings, convert to numeric
    if (length(unique(suppressWarnings(as.numeric(sex)))) > 1 &&
      all(is.numeric(unique(suppressWarnings(as.numeric(sex)))), na.rm = TRUE)) {
      sex <- as.numeric(sex)
    } else {
      sex <- charmatch(casefold(sex, upper = FALSE), codes,
        nomatch = 3
      )
    }
  }
  # assume either 0/1/2/4 =  female/male/unknown/term, or 1/2/3/4
  #  if only 1/2 assume no unknowns
  if (min(sex) == 0) {
    sex <- sex + 1
  }
  sex <- ifelse(sex < 1 | sex > 4, 3, sex)
  if (all(sex > 2)) {
    stop(
      "All sex values are labeled as unknown. Please try using config options ",
      "to specify male, female, and unknown labels ",
      "(code_male, code_female, code_unknown)"
    )
  } else if (mean(sex == 3) > 0.25) {
    warning(
      "More than 25% of the sex values are 'unknown'. ",
      "Please try using config options to specify male, female, and unknown labels."
    )
  }
  sex <- factor(sex, 1:4, labels = codes)

  sex
}


#' @keywords internal
#' @noRd
pedigree.process_status <- function(status, n) {
  if (length(status) != n) {
    stop("Wrong length for affected")
  }
  if (is.logical(status)) status <- as.integer(status)
  if (any(status != 0L & status != 1L)) {
    stop("Invalid status code")
  }
  status
}

#' @keywords internal
#' @noRd
pedigree.process_affected <- function(affected, n) {
  if (is.matrix(affected)) {
    if (nrow(affected) != n) stop("Wrong number of rows in affected")
    if (is.logical(affected)) affected <- 1L * affected
  } else {
    if (length(affected) != n) {
      stop("Wrong length for affected")
    }

    if (is.logical(affected)) affected <- as.numeric(affected)
    if (is.factor(affected)) affected <- as.numeric(affected) - 1
  }
  if (max(affected, na.rm = TRUE) > min(affected, na.rm = TRUE)) {
    affected <- affected - min(affected, na.rm = TRUE)
  }
  if (!all(affected == 0 | affected == 1 | is.na(affected))) {
    stop("Invalid code for affected status")
  }
  affected
}


## Doc: Subscripting a pedigree
#' @rdname pedigree
#' @export
"[.pedigreeList" <- function(x, ..., drop = FALSE) {
  if (length(list(...)) != 1) stop("Only 1 subscript allowed")
  ufam <- unique(x$famid)
  if (is.factor(..1) || is.character(..1)) {
    indx <- ufam[match(..1, ufam)]
  } else {
    indx <- ufam[..1]
  }

  if (any(is.na(indx))) {
    stop(paste("Family", (..1[is.na(indx)])[1], "not found"))
  }

  keep <- which(x$famid %in% indx) # which rows to keep
  for (i in c("id", "famid", "sex")) {
    x[[i]] <- (x[[i]])[keep]
  }

  kept.rows <- seq_along(x$findex)[keep]
  x$findex <- match(x$findex[keep], kept.rows, nomatch = 0)
  x$mindex <- match(x$mindex[keep], kept.rows, nomatch = 0)

  # optional components
  if (!is.null(x$status)) x$status <- x$status[keep]
  if (!is.null(x$affected)) {
    if (is.matrix(x$affected)) {
      x$affected <- x$affected[keep, , drop = FALSE]
    } else {
      x$affected <- x$affected[keep]
    }
  }
  if (!is.null(x$relation)) {
    keep <- !is.na(match(x$relation$famid, indx))
    if (any(keep)) {
      x$relation <- x$relation[keep, , drop = FALSE]
      ## Update twin id indexes
      x$relation$indx1 <- match(x$relation$indx1, kept.rows, nomatch = 0)
      x$relation$indx2 <- match(x$relation$indx2, kept.rows, nomatch = 0)
      ## If only one family chosen, remove famid
      if (length(indx) == 1) {
        x$relation$famid <- NULL
      }
    } else {
      x$relation <- NULL
    } # No relations matrix elements for this family
  }

  if (length(indx) == 1) {
    class(x) <- "pedigree"
    # only one family chosen
  } else {
    class(x) <- "pedigreeList"
  }
  x
}

#' @rdname pedigree
#' @export
"[.pedigree" <- function(x, ..., drop = FALSE) {
  if (length(list(...)) != 1) stop("Only 1 subscript allowed")
  if (is.character(..1) || is.factor(..1)) {
    i <- match(..1, x$id)
  } else {
    i <- seq_along(x$id)[..1]
  }

  if (any(is.na(i))) paste("Subject", ..1[which(is.na(i))][1], "not found")

  z <- list(
    id = x$id[i], findex = match(x$findex[i], i, nomatch = 0),
    mindex = match(x$mindex[i], i, nomatch = 0),
    sex = x$sex[i]
  )
  if (!is.null(x$affected)) {
    if (is.matrix(x$affected)) {
      z$affected <- x$affected[i, , drop = FALSE]
    } else {
      z$affected <- x$affected[i]
    }
  }
  if (!is.null(x$famid)) z$famid <- x$famid[i]


  if (!is.null(x$relation)) {
    indx1 <- match(x$relation$indx1, i, nomatch = 0)
    indx2 <- match(x$relation$indx2, i, nomatch = 0)
    keep <- (indx1 > 0 & indx2 > 0) # keep only if both id's are kept
    if (any(keep)) {
      z$relation <- x$relation[keep, , drop = FALSE]
      z$relation$indx1 <- indx1[keep]
      z$relation$indx2 <- indx2[keep]
    }
  }

  if (!is.null(x$hints)) {
    temp <- list(order = x$hints$order[i])
    if (!is.null(x$hints$spouse)) {
      indx1 <- match(x$hints$spouse[, 1], i, nomatch = 0)
      indx2 <- match(x$hints$spouse[, 2], i, nomatch = 0)
      keep <- (indx1 > 0 & indx2 > 0) # keep only if both id's are kept
      if (any(keep)) {
        temp$spouse <- cbind(
          indx1[keep], indx2[keep],
          x$hints$spouse[keep, 3]
        )
      }
    }
    z$hints <- temp
  }

  if (any(z$findex == 0 & z$mindex > 0) || any(z$findex > 0 & z$mindex == 0)) {
    stop("A subpedigree cannot choose only one parent of a subject")
  }
  class(z) <- "pedigree"
  z
}
#' @keywords internal
#' @noRd
pedigree.coerce_relation_code <- function(code) {
  levels <- c("MZ twin", "DZ twin", "UZ twin", "spouse")

  if (is.factor(code)) {
    code <- as.character(code)
  }

  if (is.numeric(code)) {
    if (any(code < 1L | code > 4L | is.na(code))) {
      stop("Invalid relationship code")
    }
    factor(code, levels = 1:4, labels = levels)
  } else {
    idx <- match(code, levels)
    if (any(is.na(idx))) {
      stop("Invalid relationship code")
    }
    factor(idx, levels = 1:4, labels = levels)
  }
}
#' @rdname pedigree
#' @method print pedigree
print.pedigree <- function(x, ...) {
  cat("Pedigree object with", length(x$id), "subjects")
  if (!is.null(x$famid)) {
    cat(", family id=", x$famid[1], "\n")
  } else {
    cat("\n")
  }
  cat("Bit size=", kinship2_bitSize(x)$bitSize, "\n")
}

#' @rdname pedigree
#' @method print pedigreeList
print.pedigreeList <- function(x, ...) {
  cat(
    "Pedigree list with", length(x$id), "total subjects in",
    length(unique(x$famid)), "families\n"
  )
}


pedigree.makemissingid <- function(id) {
  ## Doc:  Errors2

  if (is.numeric(id)) {
    0
  } else {
    ""
  }
}

Try the ggpedigree package in your browser

Any scripts or data that you put into this service are public.

ggpedigree documentation built on March 16, 2026, 9:07 a.m.