R/pedigree.shrink.R

Defines functions print.pedigree.shrink pedigree.shrink

Documented in pedigree.shrink print.pedigree.shrink

#' Shrink pedigree object
#'
#' @description
#' Shrink pedigree object to specified bit size with priority placed on
#' trimming uninformative subjects. The algorithm is useful for getting a
#' pedigree condensed to a minimally informative size for algorithms or testing
#' that are limited by size of the pedigree.
#'
#' @details
#' Iteratively remove subjects from the pedigree. The random removal of members
#' was previously controlled by a seed argument, but we remove this, forcing
#' users to control randomness outside the function. First remove uninformative
#' subjects, i.e., unavailable (not genotyped) with no available descendants.
#' Next, available terminal subjects with unknown phenotype if both parents
#' available. Last, iteratively shrinks pedigrees by preferentially removing
#' individuals (chosen at random if there are multiple of the same status):
#' 1. Subjects with unknown affected status
#' 2. Subjects with unaffected affected status
#' 3. Affected subjects.
#'
#' @aliases pedigree.shrink print.pedigree.shrink
#' @param ped Pedigree object created by the pedigree function,
#' @param avail Vector of binary availability status (0/1), i.e. having data,
#' or sample available
#' @param affected Vector of binary affected status (0/1/NA). If NULL, uses
#' first column of the pedigree object affected matrix.
#' @param maxBits Optional, the bit size for which to shrink the pedigree
#' @param x Pedigree.shrink object used in method functions
#' @param ... Optional arguments passed to internal functions
#'
#' @return Pedigree object shrinked to the desired size
#'
#' @examples
#' data(sample.ped)
#' pedAll <- with(sample.ped, pedigree(id, father, mother, sex,
#'   affected = cbind(affected, avail), famid = ped
#' ))
#' ped1 <- pedAll["1"]
#' ped1trim <- pedigree.shrink(ped1, maxBits = 12)
#'
#' @author Original by Dan Schaid, updated to kinship2 by Jason Sinnwell
#' @seealso \code{\link{pedigree}}, \code{\link{plot.pedigree.shrink}}
#' @export pedigree.shrink
pedigree.shrink <- function(ped, avail, affected = NULL, maxBits = 16) {
  if (!inherits(ped, "pedigree")) {
    stop("Must be a pegigree object.\n")
  }

  if (any(is.na(avail))) {
    stop("NA values not allowed in avail vector.")
  }

  if (is.null(affected)) {
    affected <- if (is.matrix(ped$affected)) ped$affected[, 1] else ped$affected
  }

  ped$affected <- affected

  idTrimmed <- numeric()
  idList <- list()
  nOriginal <- length(ped$id)

  bitSizeOriginal <- bitSize(ped)$bitSize

  ## first find unavailable subjects to remove anyone who is not
  ## available and does not have an available descendant

  idTrimUnavail <- findUnavailable(ped, avail)

  if (length(idTrimUnavail)) {
    pedTrimmed <- pedigree.trim(idTrimUnavail, ped)
    avail <- avail[match(pedTrimmed$id, ped$id)]
    idTrimmed <- c(idTrimmed, idTrimUnavail)
    idList$unavail <- idTrimUnavail

  } else {
    ## no trimming, reset to original ped
    pedTrimmed <- ped
  }

  ## Next trim any available terminal subjects with unknown phenotype
  ## but only if both parents are available

  ## added nNew>0 check because no need to trim anymore if empty ped

  nChange <- 1
  idList$noninform <- NULL
  nNew <- length(pedTrimmed$id)

  while (nChange > 0 & nNew > 0) {
    nOld <- length(pedTrimmed$id)

    ## findAvailNonInform finds non-informative, but after suggesting
    ## their removal, checks for more unavailable subjects before returning
    idTrimNonInform <- findAvailNonInform(pedTrimmed, avail)

    if (length(idTrimNonInform)) {
      pedNew <- pedigree.trim(idTrimNonInform, pedTrimmed)
      avail <- avail[match(pedNew$id, pedTrimmed$id)]
      idTrimmed <- c(idTrimmed, idTrimNonInform)
      idList$noninform = c(idList$noninform, idTrimNonInform)
      pedTrimmed <- pedNew
    }
    nNew <- length(pedTrimmed$id)
    nChange <- nOld - nNew
  }

  ##  Determine number of subjects & bitSize after initial trimming
  nIntermed <- length(pedTrimmed$id)

  bitSize <- bitSize(pedTrimmed)$bitSize

  ## Now sequentially shrink to fit bitSize <= maxBits

  bitVec <- c(bitSizeOriginal, bitSize)

  isTrimmed <- TRUE
  idList$affect <- NULL

  while (isTrimmed & (bitSize > maxBits)) {
    ## First, try trimming by unknown status
    save <- findAvailAffected(pedTrimmed, avail, affstatus = NA)
    isTrimmed <- save$isTrimmed

    ## Second, try trimming by unaffected status if no unknowns to trim
    if (!isTrimmed) {
      save <- findAvailAffected(pedTrimmed, avail, affstatus = 0)
      isTrimmed <- save$isTrimmed
    }


    ## Third, try trimming by affected status if no unknowns & no unaffecteds
    ## to trim
    if (!isTrimmed) {
      save <- findAvailAffected(pedTrimmed, avail, affstatus = 1)
      isTrimmed <- save$isTrimmed
    }

    if (isTrimmed) {
      pedTrimmed <- save$ped
      avail <- save$newAvail
      bitSize <- save$bitSize
      bitVec <- c(bitVec, bitSize)
      idTrimmed <- c(idTrimmed, save$idTrimmed)
      idList$affect = c(idList$affect, save$idTrimmed)
    }
  }
  ## end while (isTrimmed) & (bitSize > maxBits)

  nFinal <- length(pedTrimmed$id)

  obj <- list(
    pedObj = pedTrimmed,
    idTrimmed = idTrimmed,
    idList = idList,
    bitSize = bitVec,
    avail = avail,
    pedSizeOriginal = nOriginal,
    pedSizeIntermed = nIntermed,
    pedSizeFinal = nFinal
  )

  class(obj) <- "pedigree.shrink"

  return(obj)
}

#' @rdname pedigree.shrink
#' @method print pedigree.shrink
#' @export
print.pedigree.shrink <- function(x, ...) {
  cat("Pedigree Size:\n")

  if (length(x$idTrimmed) > 2) {
    n <- c(x$pedSizeOriginal, x$pedSizeIntermed, x$pedSizeFinal)
    b <- c(x$bitSize[1], x$bitSize[2], x$bitSize[length(x$bitSize)])
    row.nms <- c("Original", "Only Informative", "Trimmed")
  } else {
    n <- c(x$pedSizeOriginal, x$pedSizeIntermed)
    b <- c(x$bitSize[1], x$bitSize[2])
    row.nms <- c("Original", "Trimmed")
  }

  df <- data.frame(N.subj = n, Bits = b)
  rownames(df) <- row.nms
  print(df, quote = FALSE)

  if (!is.null(x$idList$unavail)) {
    cat("\n Unavailable subjects trimmed:\n", x$idList$unavail, "\n")
  }

  if (!is.null(x$idList$noninform)) {
    cat("\n Non-informative subjects trimmed:\n", x$idList$noninform, "\n")
  }

  if (!is.null(x$idList$affect)) {
    cat("\n Informative subjects trimmed:\n", x$idList$affect, "\n")
  }

  invisible()
}
sinnweja/kinship2 documentation built on July 8, 2023, 11:26 p.m.