R/CalcBYprobs.R

Defines functions CalcBYprobs

Documented in CalcBYprobs

#=======================================================================
#' @title Birth year probabilities
#'
#' @description Estimate the probability that an individual with unknown birth
#'   year is born in year \code{y}, based on \code{BirthYears} or \code{BY.min}
#'   and/or \code{BY.max} of its parents, offspring, and siblings, combined with
#'   \code{AgePrior} (the age distribution of other parent-offspring pairs),
#'   and/or \code{Year.last} of its parents.
#'
#'
#' @details This function assists in estimating birth years of individuals for
#'   which these are unknown, provided they have at least one parent or one
#'   offspring in the pedigree. It is not a substitute for field-based estimates
#'   of age, only a method to summarise the pedigree + birth year based
#'   information.
#'
#'
#' @param Pedigree  dataframe with columns id-dam-sire.
#' @param AgePrior a matrix with probability ratios for individuals with age
#'  difference A to have relationship R, as generated by
#'  \code{\link{MakeAgePrior}}. If \code{NULL}, \code{\link{MakeAgePrior}} is
#'  called using its default values.
#' @inheritParams sequoia
#'
#' @return A matrix with for each individual (rows) in the pedigree that has a missing
#'   birth year in \code{LifeHistData}, or that is not included in
#'   \code{LifeHistData}, the probability that it is born in \code{y} (columns).
#'   Probabilities are rounded to 3 decimal points and may therefore not sum
#'   exactly to 1.
#'
#' @section WARNING:
#'  Any errors in the pedigree or lifehistory data will cause errors in the
#'  birth year probabilities of their parents and offspring, and putatively also
#'  of more distant ancestors and descendants. If the ageprior is based on the
#'  same erroneous pedigree and lifehistory data, all birth year probabilities
#'  will be affected.
#'
#'
#' @seealso \code{\link{MakeAgePrior}} to estimate effect of age on
#'   relationships.
#'
#' @examples
#' BYprobs <- CalcBYprobs(Pedigree = SeqOUT_griffin$Pedigree,
#'                        LifeHistData = SeqOUT_griffin$LifeHist)
#' \dontrun{
#' # heatmap
#' lattice::levelplot(t(BYprobs), aspect="fill", col.regions=hcl.colors)
#' }
#'
#' @useDynLib sequoia, .registration = TRUE
#
#' @export

CalcBYprobs <- function(Pedigree = NULL,
                        LifeHistData = NULL,
                        AgePrior = NULL) {

  on.exit(.Fortran(deallocall), add=TRUE)

  # pedigree: pretend all individuals are genotyped ----
  Ped <- PedPolish(Pedigree, gID=NULL, NullOK = FALSE)
  PedN <- PedToNum(Ped, gID=Ped$id, DoDummies = "no")
  Ng <- nrow(Ped)

  # lifehistory data ----
  LHF <- CheckLH(LifeHistData, gID=Ped$id, sorted=TRUE)
  NumNoBY <- sum(LHF$BirthYear < 0)


  # check/make ageprior ----
  if (is.null(AgePrior)) {
    AP <- MakeAgePrior(Pedigree=Ped, LifeHistData, Plot = FALSE, quiet = TRUE)
  } else if (is.matrix(AgePrior) | is.data.frame(AgePrior)) {
    AP <- CheckAP(AgePrior)
  } else {
    stop("'AgePrior' must be a matrix or NULL")
  }

  # first & last possible birth year ----
  # NOTE: this is an R-translated copy of code in subroutine PrepAgeData
  BYtmp <- with(LHF, c(BirthYear, BY.min, BY.max))
  BYzero <- min(BYtmp[BYtmp >= 0]) -1
  BYlast <- max(BYtmp[BYtmp >= 0])
  if (BYzero == Inf)  stop("Need at least 1 birthyear in LifeHistData")

  MaxAgePO <- max(which(AP[,1] > 0), which(AP[,2] > 0)) -1   # row1 = agediff of 0
  BYzero <- BYzero - MaxAgePO +1
  if ((BYlast - BYzero) < 2*MaxAgePO | BYlast==0) {
    BYzero <- BYzero - MaxAgePO    # dummy parents + real grandparents w unknown BY
  }

  nYears <- BYlast - BYzero


  #=========================
  # call fortran ----

  TMP <- .Fortran(getbyprobs,
                  ng = as.integer(Ng),
                  nx = as.integer(NumNoBY),
                  nap = as.integer(nrow(AP)),
                  nYearsIn = as.integer(nYears),
                  byrf = as.integer(c(LHF[['BirthYear']], LHF[['BY.min']], LHF[['BY.max']])),
                  lyrf = as.integer(LHF[['Year.last']]),
                  aprf = as.double(AP),
                  parentsrf = as.integer(PedN[['PedPar']]),
                  # OUT
                  byprobv = double(NumNoBY*nYears) )


  #=========================
  # output vector to matrix with dimnames ----

  BYprobM <- round(VtoM(TMP$byprobv, nc=nYears), 3)
  rownames(BYprobM) <- Ped$id[LHF$BirthYear < 0]
  colnames(BYprobM) <- seq(from=BYzero +1, to=BYlast)

  return(BYprobM)
}

Try the sequoia package in your browser

Any scripts or data that you put into this service are public.

sequoia documentation built on Sept. 8, 2023, 5:29 p.m.