R/PedSim_Functions.R

Defines functions create_pedFile sim_founderRVstatus create_founder create_mate create_offspring sim_nFam sim_ped

Documented in create_founder create_mate create_offspring create_pedFile sim_founderRVstatus sim_nFam sim_ped

#' Create an empty pedigree
#'
#' \code{create_pedFile} initializes an empty data frame with all of the fields required by \code{sim_ped} to simulate a pedigree.
#'
#' @return An empty data frame with all fields required by \code{sim_ped} to generate a pedigree.
#' @keywords internal
#'
create_pedFile = function(){
  data.frame(FamID = numeric(),
             ID = numeric(),
             sex = numeric(),
             dadID = numeric(),
             momID = numeric(),
             affected = logical(),
             DA1 = numeric(),
             DA2 = numeric(),
             birthYr = numeric(),
             onsetYr = numeric(),
             deathYr = numeric(),
             #RR = numeric(),
             available = logical(),
             Gen = numeric(),
             subtype = character(),
             do_sim = logical(),
             stringsAsFactors = FALSE)
}

#' Determine founder genotype at the disease locus and determime their relative-risk of disease
#'
#' @inheritParams sim_RVped
#' @param intro_RV Logical. If \code{intro_RV = TRUE} a founder has introduced a rare, causal variant to the pedigree, otherwise no RV has been introduced to the pedigree.
#'
#' @return A list containing: 1. the founder's genotype at the disease locus, their relative-risk of disease, and an updated value for intro_RV.
#'
#' @keywords internal
#'
sim_founderRVstatus <- function(GRR, carrier_prob, RVfounder){
  if (all(GRR == 1)) {
    # If GRR (genetic relative risk) = 1, the variant is not associated with
    # the disease; hence we do not allow an RV to segregate in the pedigree
    d_locus <- c(0, 0)
    fRR <- rep(1, length(GRR))
  } else if (RVfounder == FALSE){
    # If FALSE has been selected we allow the founder
    # the opportunity to introduce 1 copy of the RV with
    # proportional to its carrier frequency in the population, and set RR and
    # update intro_RV appropriately
    d_locus <- sample(x = c(0, ifelse(runif(1) <= carrier_prob, 1, 0)),
                      size = 2, replace = F)
    fRR <- ifelse(rep(any(d_locus == 1), length(GRR)), GRR, rep(1, length(GRR)))
  } else if (RVfounder == TRUE){
    d_locus <- sample(x = c(0, 1), size = 2, replace = F)
    fRR <- GRR
  }

founder_dat <- list(d_locus, fRR)
  #founder_dat <- d_locus
  return(founder_dat)
}

#' Create a new seed founder
#'
#' Create new seed founder for pedigree.
#'
#' @inheritParams sim_RVped
#'
#' @return \code{new_founder_info} the pedigree information for the new mate.
#' @keywords internal
create_founder = function(FamID, GRR, carrier_prob,
                          RVfounder, founder_byears){

  founder_dat <- sim_founderRVstatus(GRR, carrier_prob, RVfounder)

  new_founder_info <- data.frame(FamID = FamID,
                                 ID = 1,
                                 sex = round(runif(1)),
                                 dadID = NA,
                                 momID = NA,
                                 affected = F,
                                 DA1 = founder_dat[[1]][1],
                                 DA2 = founder_dat[[1]][2],
                                 birthYr = round(runif(1,
                                                       min = founder_byears[1],
                                                       max = founder_byears[2])),
                                 onsetYr = NA,
                                 deathYr = NA,
                                 #RR = founder_dat[[2]],
                                 available = T,
                                 Gen = 1,
                                 subtype = NA,
                                 do_sim = T,
                                 stringsAsFactors = FALSE)

  return(new_founder_info)
}

#' Create a new mate
#'
#' Create new mating partner in pedigree.
#'
#' @inheritParams sim_RVped
#' @param partner_info  Data.frame with 1 row. All pedigree information for the individual requiring a new mate, must contain all fields generated by \code{\link{create_pedFile}}.
#' @param last_id Numeric.  The last id used in the pedigree.
#'
#' @return \code{new_mate_info} the pedigree information for the new mate.
#' @return \code{last_id} the updated value of last_id after adding the new mate.
#'
#' @keywords internal
#'
#'
create_mate = function(partner_info, last_id,
                       carrier_prob,
                       RVfounder){

  new_mate_info <- data.frame(FamID = partner_info$FamID,
                              ID = last_id + 1,
                              sex = abs(partner_info$sex - 1),
                              dadID = NA,
                              momID = NA,
                              affected = F,
                              DA1 = 0,
                              DA2 = 0,
                              birthYr = NA,
                              onsetYr = NA,
                              deathYr = NA,
                              #RR = 1,
                              available = F,
                              Gen = partner_info$Gen,
                              subtype = NA,
                              do_sim = F,
                              stringsAsFactors = FALSE)

  mate_return <- list(new_mate_info, last_id + 1)
  return(mate_return)
}

#' Create a new offspring
#'
#' Create new offspring in pedigree.
#'
#' @inheritParams create_mate
#' @inheritParams sim_RVped
#' @param dad_info Data.frame with 1 row. All pedigree information of offspring's father, must contain all fields generated by \code{\link{create_pedFile}}.
#' @param mom_info Data.frame with 1 row. All pedigree information of offspring's mother, must contain all fields generated by \code{\link{create_pedFile}}.
#' @param byear The birth year of offspring.
#'
#' @return new_child_info the pedigree information for the new child
#' @return last_id the updated value of last_id after adding new offspring
#'
#' @keywords internal
#'
#' @importFrom stats runif
#'
create_offspring = function(dad_info, mom_info, byear, last_id, GRR){
  new_child_info <- data.frame(FamID = dad_info$FamID,
                               ID = last_id + 1,
                               sex = round(runif(1)),
                               dadID = dad_info$ID,
                               momID = mom_info$ID,
                               affected = F,
                               DA1 = sample(x = c(dad_info$DA1, dad_info$DA2), size = 1),
                               DA2 = sample(x = c(mom_info$DA1, mom_info$DA2), size = 1),
                               birthYr = byear,
                               onsetYr = NA,
                               deathYr = NA,
                               #RR = NA,
                               available = T,
                               Gen = dad_info$Gen + 1,
                               subtype = NA,
                               do_sim = T,
                               stringsAsFactors = FALSE)
  #new_child_info$RR <- ifelse(sum(new_child_info$DA1, new_child_info$DA2) == 0, 1, GRR)
  child_return <- list(new_child_info, last_id + 1)
  return(child_return)
}


#' Simulate a nuclear family from a single founder
#'
#' Simulate all life events for an individual and create a nuclear family, when appropriate.
#'
#' @param found_info Data.frame with 1 row. All pedigree information for the founder, must contain all fields generated by \code{\link{create_pedFile}}.
#' @inheritParams create_mate
#' @inheritParams sim_RVped
#'
#' @keywords internal
#' @return A pedigree with updated life events for the founder, and
#' additional information for mate and offspring, when offspring are generated.
#'
sim_nFam = function(found_info, stop_year, last_id,
                    hazard_rates, NB_params, GRR,
                    carrier_prob, RVfounder, fert){

  nfam_ped <- found_info

  #Simulate life steps for founder
  sim_years <- sim_life(hazard_rates, GRR, carrier_prob,
                        RV_status = any(found_info[, 7:8] == 1),
                        YOB = found_info$birthYr, stop_year,
                        NB_params, fert)



  # update disease status and onset year if onset occured prior to stop_year
  if (!is.na(sim_years$onset_event)) {
    nfam_ped$affected <- T
    nfam_ped$onsetYr <- sim_years$onset_event
    nfam_ped$subtype <- sim_years$subtype
  } else {
    nfam_ped$affected <- F
  }

  # update death status and death year if onset occured prior to stop_year
  if (!is.na(sim_years$death_event)) {
    nfam_ped$deathYr <- sim_years$death_event
  }

  #store the birth years of each child
  birth_events <- sim_years$repro_events

  if (!is.null(birth_events)) {
    #add a mate
    new_mate <- create_mate(partner_info = nfam_ped[1,], last_id,
                            carrier_prob, RVfounder)

    nfam_ped <- rbind(nfam_ped, new_mate[[1]])
    last_id <- new_mate[[2]]

    #store info for mom and dad
    dad <- nfam_ped[which(nfam_ped$sex == 0), ]
    mom <- nfam_ped[which(nfam_ped$sex == 1), ]


    #add a child for each birth event
    for (k in 1:length(birth_events)) {
      #add child
      new_child <- create_offspring(dad_info = dad, mom_info = mom,
                                    byear = birth_events[k], last_id, GRR)
      nfam_ped <- rbind(nfam_ped, new_child[[1]])
      last_id <- new_child[[2]]
    }

  }

  #set do_sim to FALSE for individual whose life events we just simulated
  nfam_ped$do_sim[1] = F

  sim_fam_return = list(nfam_ped, last_id)
  return(sim_fam_return)
}

#' Simulate a pedigree
#'
#' Please note the distinction between \code{sim_ped} and \code{sim_RVped}.  Pedigrees simulated using \code{sim_ped} do not account for study design.  To simulate a pedigree ascertained to contain multiple family members affected by a disease please use \code{\link{sim_RVped}}.
#'
#'
#' To introduce the rare variant to the pedigree, We allow users to choose from one of the following two assumptions:
#' \enumerate{
#' \item Assume that the variant is rare enough that a single copy has been introduced by one founder, and begin the simulation of the pedigree with this founder, as in Bureau (2014).
#' \item Simulate the starting founder's rare-variant status with probability equal to the carrier probability of the rare variant in the population.  We note that under this setting pedigrees may not segregate the rare variant.
#' }
#' The \code{sim_ped} function starts simulating the pedigree by generating the birth year for the starting founder, uniformly between the years specified by \code{founder_byears}.  Next, all life events are simulated for the founder via \code{\link{sim_life}}.  Possible life events include: reproduction, disease onset, and death.  We only allow disease onset to occur once, i.e. no remission.  Computationally, this implies that after disease onset, the waiting time to death is always simulated using the age-specific mortality rates for the \emph{affected} population.  Life events for individuals who have inherited the rare variant are simulated such that their relative-risk of disease is \code{GRR}, according to a proportional hazards model.  The relative-risk of disease onset for individuals who have not inherited the causal variant is assumed to be 1.  Any life events that occur after \code{stop_year} are censored.
#'
#' When segregating in the pedigree, the rare variant is transmitted from parent to offspring according to Mendel's laws.  The process of simulating life events is repeated for any offspring that are produced before \code{stop_year}.
#'
#'
#' @inheritParams sim_RVped
#'
#' @return The simulated pedigree.
#' @export
#' @importFrom stats runif
#'
#' @references Nieuwoudt, Christina and Jones, Samantha J and Brooks-Wilson, Angela and Graham, Jinko (2018). \emph{Simulating Pedigrees Ascertained for Multiple Disease-Affected Relatives}. Source Code for Biology and Medicine, 13:2.
#' @references Ken-Ichi Kojima, Therese M. Kelleher. (1962), \emph{Survival of Mutant Genes}. The American Naturalist 96, 329-346.
#' @references Alexandre Bureau, Samuel G. Younkin, Margaret M. Parker, Joan E. Bailey-Wilson, Mary L. Marazita, Jeffrey C. Murray, Elisabeth Mangold, Hasan Albacha-Hejazi, Terri H. Beaty, and Ingo Ruczinski (2014). \emph{Inferring rare disease risk variants based on exact probabilities of sharing by multiple affected relatives.} Bioinformatics; Vol. 30, No. 15, pp. 2189-2196.
#'
#' @section See Also:
#' \code{\link{sim_RVped}}, \code{\link{sim_life}}
#'
#' @examples
#' data(AgeSpecific_Hazards)
#'
#' # Simulate a random pedigree
#' set.seed(5)
#' ex_ped <- sim_ped(hazard_rates = hazard(hazardDF = AgeSpecific_Hazards),
#'                   GRR = 10,
#'                   FamID = 1,
#'                   founder_byears = c(1900, 1910),
#'                   stop_year = 2015)
#'
#' # View the simulated pedigree
#' ex_ped
#'
#' # Plot the pedigree
#' plot(ex_ped, location = "topleft")
#'
#' # Plot the pedigree, this time with age labels for
#' # all descendents of the starting founder (ID 1)
#' plot(ex_ped, ref_year = 2015,
#'      cex= 0.75, symbolsize = 1.25,
#'      location = "topleft")
#'
#'
#' # Simulate a random pedigree. This time set RVfounder to TRUE so that
#' # the eldest introduces a causal rare variant with probability 1.
#' set.seed(5)
#' ex_ped <- sim_ped(hazard_rates = hazard(hazardDF = AgeSpecific_Hazards),
#'                   RVfounder = TRUE,
#'                   GRR = 10,
#'                   FamID = 1,
#'                   founder_byears = c(1900, 1910),
#'                   stop_year = 2015)
#'
#'
#' # Plot the pedigree with age labels
#' plot(ex_ped, ref_year = 2015,
#'      cex= 0.75, symbolsize = 1.25,
#'      location = "topleft")
#'
sim_ped = function(hazard_rates, GRR,
                   FamID, founder_byears, stop_year = NULL,
                   carrier_prob = 0.002,
                   RVfounder = FALSE,
                   NB_params = c(2, 4/7),
                   fert = 1){

  if(!(RVfounder %in% c(T, F))){
    stop ('Please set RVfounder to TRUE or FALSE.')
  }

  if (carrier_prob <= 0 | carrier_prob >= 1){
    stop ('carrier_prob must be a value between 0 and 1')
  } else if (carrier_prob > 0.01) {
    warning('carrier_prob > 0.01: sim_RVped is intended for simulating the transmission of rare variants.')
  }

  if (is.null(stop_year)){
    stop_year <- as.numeric(format(Sys.Date(),'%Y'))
  }

  fam_ped <- create_pedFile()
  fam_ped[1, ] <- create_founder(FamID, GRR, carrier_prob,
                                 RVfounder, founder_byears)[1, ]

  last_id <- 1
  last_gen <- 1

  #store the ID of individuals for whom we need to simulate life events
  re_sim <- fam_ped$ID[which(fam_ped$do_sim & fam_ped$Gen == last_gen)]
  while (length(re_sim) > 0) {
    for (k in 1:length(re_sim)) {
      newKin <- sim_nFam(found_info = fam_ped[which(fam_ped$ID == re_sim[k]),],
                         stop_year, last_id,
                         hazard_rates, NB_params,
                         GRR, carrier_prob,
                         RVfounder, fert)

      #replace individual by their simulated self and add family members when necessary
      fam_ped <- rbind(fam_ped[-which(fam_ped$ID == re_sim[k]), ],
                       newKin[[1]])

      last_id <- newKin[[2]]
    }
    last_gen <- max(fam_ped$Gen)
    re_sim <- fam_ped$ID[which(fam_ped$do_sim & fam_ped$Gen == last_gen)]
  }

  rownames(fam_ped) <- NULL

  #return all pedigree fields, except for do_sim and subtype,
  #unless multiple subtypes are simulated
  return(ped(fam_ped[, c(1:(13 + (length(hazard_rates$subtype_ID) > 1)))]))
}
simrvprojects/SimRVPedigree documentation built on Feb. 12, 2020, 6:12 p.m.