R/Ped_Methods.R

Defines functions new.ped ped is.ped summary.ped plot.ped

Documented in is.ped new.ped ped plot.ped summary.ped

#' Create an object of class ped.
#'
#' Create an object of class \code{ped}, from a \code{data.frame}, required input for \code{\link{reassign_gen}}, \code{\link{censor_ped}}, and \code{\link{trim_ped}} functions.
#'
#' The data frame supplied to \code{new.ped}, \code{ped_file}, \emph{must} contain the following variables:
#' \tabular{lll}{
#' \strong{name} \tab \strong{type} \tab \strong{description} \cr
#' \code{FamID} \tab numeric \tab family identification number \cr
#' \code{ID} \tab numeric \tab individual identification number \cr
#' \code{dadID} \tab numeric \tab identification number of father \cr
#' \code{momID} \tab numeric \tab identification number of mother \cr
#' \code{sex} \tab numeric \tab gender identification; if male \code{sex = 0}, if female \code{sex = 1} \cr
#' \code{affected} \tab logical \tab disease-affection status: \cr
#' \tab \tab \code{affected = TRUE} if affected by disease, and \code{FALSE} otherwise, \cr
#' }
#'
#' Optionally, \code{ped_file} \emph{may} contain any of the following variables:
#' \tabular{lll}{
#' \strong{name} \tab \strong{type}\tab \strong{description} \cr
#' \code{available} \tab logical \tab availibility status; \cr
#' \tab\tab \code{available = TRUE} if available, and \code{FALSE} otherwise. \cr
#' \code{DA1} \tab numeric \tab paternally inherited allele at the assumed disease locus: \cr
#' \tab \tab \code{DA1} = 1 if rare variant is present, and 0 otherwise\cr
#' \code{DA2} \tab numeric \tab maternally inherited allele at the assumed disease locus: \cr
#' \tab \tab \code{DA2} = 1 if rare variant is present, and 0 otherwise\cr
#' \code{birthYr} \tab numeric \tab the individual's birth year \cr
#' \code{onsetYr} \tab numeric \tab the individual's year of disease onset, when applicable, otherwise \code{NA} \cr
#' \code{deathYr} \tab numeric \tab the individual's year of death, when applicable, otherwise \code{NA} \cr
#' \code{RR} \tab numeric \tab the individual's relative-risk of disease \cr
#' \code{Gen} \tab numeric \tab the individual's generation number relative to the eldest founder. \cr
#' \tab \tab For the eldest founder \code{Gen} = 1, for his or her offspring \code{Gen} = 2, etc. \cr
#' \code{proband} \tab logical \tab proband identifier: \cr
#' \tab \tab \code{proband = TRUE} if individual is the proband, and \code{FALSE} otherwise.\cr
#' \code{subtype} \tab character \tab the individual's disease subtype, when applicable, otherwise \code{NA} \cr
#' }
#'
#' \emph{We note that some of the optional fields above may be required for various ped functions}
#'
#' @param ped_file Data.frame. A pedigree, see details.
#'
#' @return An object of class ped.
#' @export
#'
#' @examples
#' data(EgPeds)
#' head(EgPeds)
#'
#' ped1 = new.ped(EgPeds[EgPeds$FamID == 1, ])
#' head(ped1, n = 3)
#' class(ped1)
#' summary(ped1)
#'
#' AllPeds = new.ped(EgPeds)
#' head(AllPeds)
#' class(AllPeds)
#' summary(AllPeds)
#'
new.ped <- function(ped_file) {
  n <- length(unique(ped_file$FamID))

  if (n > 1){
    lapply(seq(1, n, by = 1), function(x){
      check_ped(ped_file[ped_file$FamID == unique(ped_file$FamID)[x], ])})
  } else {
    check_ped(ped_file)
  }

  if (!"available" %in% colnames(ped_file)) ped_file$available <- T

  return(ped(ped_file))

}

#' Constructor function for an object of class ped
#'
#' @param ped_file A pedigree generated by \code{sim_ped} pr \code{sim_RVped}.
#'
#' @return an object of class \code{ped}.
#' @keywords internal
ped <- function(ped_file) {
  class(ped_file) <- c("ped", class(ped_file))
  return(ped_file)
}

#' Check to see if object is of class ped
#'
#' @param x An R object.
#'
#' @return Logical. Indicates if \code{x} is of class \code{ped}.
#' @keywords internal
#'
is.ped <- function(x) {
  return(inherits(x, "ped"))
}

#' Summarize a sample of pedigrees
#'
#' Summarize a sample of pedigrees
#'
#' The \code{summary.ped} function returns two data frames. The first is called \code{family_info}, and contains the following fields for each family supplied.
#' \tabular{ll}{
#' \strong{variable} \tab \strong{description} \cr
#' \code{FamID} \tab family identification number\cr
#' \code{totalRelatives} \tab total number of relatives \cr
#' \code{numAffected}\tab total number of disease-affected individuals\cr
#' \code{aveOnsetAge}\tab average onset age among the disease-affected relatives\cr
#' \code{aveIBD}\tab average of the pairwise IBD probabilities among the disease-affected relatives\cr
#' \code{ascertainYear}\tab the year the pedigree was ascertained\cr
#' \code{segRV} \tab logical Indicates whether or not pedigree segregates a causal variant. \cr \tab If the pedigree segregates the variant \code{segRV = TRUE}.\cr
#' \code{p_subtypeLabel} \tab NOTE: this is only listed when pedigrees contain relatives affected by multiple subtypes. \cr
#'                       \tab the proportion of disease-affected relatives in the family with the specified subtype.\cr
#'                       \tab Here "subtypeLabel" is determined by subtype ID specifed by the user when creating the \cr
#'                       \tab \code{\link{hazard}} object.\cr
#' }
#'
#' The second item returned by \code{summary.ped} is called \code{affected_info}, and contains the following fields for each disease-affected relative supplied.
#' \tabular{ll}{
#' \strong{variable} \tab \strong{description} \cr
#' \code{FamID} \tab family identification number \cr
#' \code{ID} \tab individual identification number \cr
#' \code{birthYr}\tab the individual's birth year, when applicable, otherwise \code{NA} \cr
#' \code{onsetYr}\tab the individual's year of disease onset, when applicable, otherwise \code{NA} \cr
#' \code{deathYr}\tab the individual's year of death, when applicable, otherwise \code{NA} \cr
#' \code{proband}\tab a proband identifier: \code{proband = TRUE} if the individual is the proband, and \code{FALSE} otherwise.\cr
#' \code{RVstatus}\tab the individual's causal RV status; set to 1 if individual is a carrier, and 0 otherwise.\cr
#' \code{subtype}\tab NOTE: this is only listed when pedigree was simulated for diseases with multiple subtypes.\cr
#'               \tab The individual's disease subtype.\cr
#' }
#'
#' @param object An object of class ped.
#' @param ... additional arguments passed to other methods.
#'
#' @return \item{\code{family_info} }{A data frame containing family specific variables for each pedigree supplied.  See details.}
#' @return \item{\code{affected_info} }{A data frame containing information for the affected individuals in each pedigree supplied.  See details.}
#' @export
#'
#' @importFrom stats na.omit
#'
#' @examples
#' #Read in age-specific harard data and create hazard object.
#' data(AgeSpecific_Hazards)
#' haz_obj <- hazard(hazardDF = AgeSpecific_Hazards)
#'
#' #Simulate a pedigree ascertained for multiple affecteds
#' set.seed(6)
#' RVped2015 <- sim_RVped(hazard_rates = haz_obj,
#'                        num_affected = 2,
#'                        ascertain_span = c(1900, 2015),
#'                        GRR = 30, carrier_prob = 0.002,
#'                        RVfounder = TRUE,
#'                        stop_year = 2015,
#'                        recall_probs = c(1),
#'                        founder_byears = c(1900, 1925),
#'                        FamID = 1)[[2]]
#'
#' # Plot the pedigree with age labels at the year 2015
#' plot(RVped2015, ref_year = 2015)
#'
#' # View summary information for the pedigree
#' summary(RVped2015)
#'
#'
#'
#' # Import the EgPeds dataset and create ped object
#' data(EgPeds)
#' study_peds <- new.ped(EgPeds)
#'
#' # View summary information for study_peds
#' summary(study_peds)
#'
summary.ped <- function(object, ...) {
  n <- length(unique(object$FamID))

  #gather subtype information, when applicable
  if ("subtype" %in% colnames(object)) {
    s_ID <- unique(na.omit(object$subtype))
  } else {
    s_ID <- NULL
  }


  famdat <- lapply(unique(object$FamID), function(y){
    get_famInfo(object[object$FamID == y, ], s_ID)
  })

  afdat <- lapply(unique(object$FamID), function(y){
    get_affectedInfo(object[object$FamID == y, ])
    })

  return(list(family_info = do.call(rbind, famdat),
              affected_info = do.call(rbind, afdat)))
}


#' Plot pedigree
#'
#' @param x An object of class ped.
#' @param ref_year When provided, the reference year for age labels.  Users may supply a (numeric) year which will create age labels at the specified year. Alternatively, users may set \code{ref_year}\code{ = "ascYR"}, which will create age lables for the year the pedigree was ascertained, when ascertained.  By default, \code{ref_year = NULL} and no age labels are created.
#' @param gen_lab Logical. Should generation labels be printed in the margin.  By default, \code{FALSE}.
#' @param plot_legend Logical. Should legend for symbol shading be plotted.  By default, \code{TRUE}.
#' @param location The location for the pedigree legend, as in \code{\link{pedigree.legend}}. Options include: \code{"topleft"}, \code{"topright"}, \code{"bottomright"}, or \code{"bottomleft"}.  By default, \code{location = "topleft"}.
#' @param radius The radius size for the pedigree legend, as in \code{\link{pedigree.legend}}.  By default, \code{radius = 0.2}.
#' @param density The density of shading in plotted symbols, as in \code{\link{plot.pedigree}}.  By default, \code{density = c(-1, 35, 55)}.
#' @param angle The angle of shading in plotted symbols, as in \code{\link{plot.pedigree}}.  By default, \code{angle = c(90, 65, 40)}.
#' @param gen_stretch Numeric. Used to stretch the spacing between generation lables.  By default, \code{gen_stretch = 2}.  Increase for more space between labels, decrease for less space.
#' @param cex The text size. By default, \code{cex = 1}.
#' @param adj When \code{ref_year} is supplied, used to adjust position of reference year, as in \code{\link{mtext}}.  By default, \code{adj = 1}.
#' @param line When \code{ref_year} is supplied, used to adjust position of reference year, as in \code{\link{mtext}}.  By default, \code{line = 2}.
#' @param mar The sizes for plot margins, as in \code{\link{par}}.
#' @param ... Extra options that feed to \code{\link{plot.pedigree}}, or \code{\link{plot}}.
#'
#' @seealso \code{\link{plot.pedigree}}, \code{\link{pedigree.legend}}, \code{\link{plot}}, \code{\link{par}}
#' @importFrom graphics plot
#' @importFrom graphics mtext
#' @importFrom graphics par
#' @importFrom kinship2 pedigree.legend
#' @importFrom utils as.roman
#' @references Terry M Therneau and Jason Sinnwell (2015). \strong{kinship2: Pedigree Functions.} \emph{R package version 1.6.4.} \url{https://CRAN.R-project.org/package=kinship2}
#' @export
#'
#' @examples
#' #Read in age-specific harard data and create hazard object.
#' data(AgeSpecific_Hazards)
#' haz_obj <- hazard(hazardDF = AgeSpecific_Hazards)
#'
#' #Simulate a pedigree ascertained for multiple affecteds
#' set.seed(2)
#' RVped2015 <- sim_RVped(hazard_rates = haz_obj,
#'                        num_affected = 2,
#'                        ascertain_span = c(1900, 2015),
#'                        GRR = 30, carrier_prob = 0.002,
#'                        RVfounder = TRUE,
#'                        stop_year = 2015,
#'                        recall_probs = c(1),
#'                        founder_byears = c(1900, 1905),
#'                        FamID = 1)[[2]]
#'
#' summary(RVped2015)
#'
#' #plot pedigree without age labels
#' plot(RVped2015)
#'
#' #plot pedigree with age labels, set the
#' #reference year to be the ascertainment year
#' plot(RVped2015, ref_year = "ascYr")
#'
#' #plot pedigree with age lablels at specified reference years.
#' plot(RVped2015, ref_year = 2015, cex = 0.75, symbolsize = 0.95)
#' plot(RVped2015, ref_year = 2005, cex= 0.75, symbolsize = 1.25)
#' plot(RVped2015, ref_year = 1995, cex= 0.75, symbolsize = 1.25)
#' plot(RVped2015, ref_year = 1985, cex= 0.75, symbolsize = 1.25)
#'
#' # plot pedigree generation labels
#' plot(RVped2015, ref_year = 2015,
#'      gen_lab = TRUE,
#'      cex = 0.75, symbolsize = 0.95)
#'
#' # use gen_stretch to place extra space between generation labels
#' # NOTE: by default, gen_stretch = 2; increase for extra space.
#' plot(RVped2015, ref_year = 2015,
#'      gen_lab = TRUE, gen_stretch = 3,
#'      cex = 0.75, symbolsize = 0.95)
#'
plot.ped <- function(x, ref_year = NULL, gen_lab = FALSE,
                     plot_legend = TRUE,
                     location = "topleft", radius = 0.2,
                     density = c(-1, 35, 55),
                     angle = c(90, 65, 40),
                     gen_stretch = 2, cex = 1, adj = 1, line = 2,
                     mar = c(5.1, 4.1, 4.1, 2.1), ...) {



  if (is.null(ref_year)) {
    # If ref_year is not provided
    # plot the pedigree with ID and subtype lables, when present.
    k2ped <- ped2pedigree(x)

    if (!is.na(match("subtype", colnames(x)))) {
      # Create a death age label for individuals who have died.
      Sub_lab <- ifelse(is.na(x$subtype),
                        "", paste0("\n subtype: ",
                                   x$subtype))

      pedLabs = paste0("ID: ", sep = "", x$ID,
                       Sub_lab)
    } else {
      pedLabs <- x$ID
    }

  } else if (ref_year == "ascYr") {
    if(!("proband" %in% colnames(x))){
      stop("\n \n Proband not detected, cannot determine ascertainment year. \n Please supply ref_year. \n")
    } else if (sum(x$proband) > 1) {
      stop("\n \n Multiple probands selected, cannot determine ascertainment year. \n Please supply ref_year. \n")
    } else if (!("onsetYr" %in% colnames(x))) {
      stop("\n \n OnsetYr not detected, cannot determine ascertainment year. \n Please supply ref_year. \n")
    } else if (all(is.na(x$onsetYr))) {
      stop("\n \n OnsetYr missing, cannot determine ascertainment year. \n Please supply ref_year. \n")
    }
    # If ascertainment year specified censor all pedigree info that occurs after
    # the ascertainment year and create associated age lables.
    cped <- censor_ped(x, censor_year = x$onsetYr[x$proband])
    pedLabs <- pedigreeLabels(x = cped, ref_year = x$onsetYr[x$proband])
    k2ped <- ped2pedigree(cped)
    pYr <- x$onsetYr[x$proband]

  } else if (is.numeric(ref_year)) {
    # Censor all pedigree info that occurs after
    # ref_year and create associated age lables.
    cped <- censor_ped(x, censor_year = ref_year)
    pedLabs <- pedigreeLabels(x = cped, ref_year)
    k2ped <- ped2pedigree(cped)
    pYr <- ref_year
  }


  if (gen_lab) {
    #add extra space to margin for generation labels
    mar[4] <- mar[4] + 1
    #determine how many levels the pedigree has
    nlevel <- nrow(align.pedigree(ped2pedigree(x))$nid)
    # get generation lables
    gen_labels <- get_gen_labs(x, nlevel)
  }

  par(mar = mar)
  plot(x = k2ped, id = pedLabs,
       status = k2ped$status, affected = k2ped$affected,
       density = density, angle = angle, cex = cex,
       mar = mar, ...)
  if (plot_legend) {
    pedigree.legend(ped = k2ped, labels = dimnames(k2ped$affected)[[2]],
                    radius = radius, location = location,
                    density = density,  angle = angle, cex = cex)
  }

  if (!is.null(ref_year)) {
    mtext(paste0("Reference Year: ",  pYr), adj = adj, line = line, cex = cex)
  }

  if (gen_lab) {
    mtext(gen_labels,
          side = 4, las = 2, cex = 1.5,
          line = mar[4]/2,
          at = seq(par("usr")[4],
                   par("usr")[3] - diff(par("usr")[4:3])/(gen_stretch*nlevel),
                   length.out = nlevel))
  }

  #reset plot margins the default settings
  par(mar = c(5, 4, 4, 2) + 0.1)
}

Try the SimRVPedigree package in your browser

Any scripts or data that you put into this service are public.

SimRVPedigree documentation built on Feb. 10, 2020, 1:07 a.m.