R/vital-rates.R

#' Calculate median, minimum, and maximum age at first reproduction
#'
#' @param b Name of the tbl_df containing biography data generated by the function read_bio_table.
#' @return Output is a tbl_df that gives the median, minimum, and maximum age at first reproduction for each study species in days and years. Sample size is also included.
#' @export
#' @examples
#' afr <- age_first_rep(lh)
age_first_rep <- function(b){

  # Find all animals that are first born offspring
  # Since there are currently errors in the first-born column,
  # choose the first offspring for each mother that is called her first-born
  fb <- b %>%
    dplyr::filter(First.Born == "Y") %>%
    dplyr::group_by(Study.Id, Mom.Id) %>%
    dplyr::arrange(Birth.Date) %>%
    dplyr::summarise(Animal.Id = dplyr::first(Animal.Id),
              Birth.Date = dplyr::first(Birth.Date),
              Sex = dplyr::first(Sex),
              First.Born = dplyr::first(First.Born)) %>%
    dplyr::select(Study.Id, Animal.Id, Birth.Date, Mom.Id, First.Born, Sex)

  # Create a temporary data frame of mothers of these animals with their birth dates
  # Note that cases where Mom.Id doesn't match exactly with Animal.Id are dropped silently
  # Use find_mom_errors() function to find these cases!
  mothers <- suppressWarnings(dplyr::semi_join(b, fb,
                                               by = c("Study.Id" = "Study.Id",
                                                      "Animal.Id" = "Mom.Id")))
  mothers <- mothers %>%
    dplyr::select(Study.Id, Animal.Id, Birth.Date) %>%
    dplyr::rename(Mother.Birth.Date = Birth.Date)

  # For first-born animals in fb, get the mother's birth date using inner join
  temp <- suppressWarnings(dplyr::inner_join(fb, mothers, by = c("Study.Id" = "Study.Id",
                                                                 "Mom.Id" = "Animal.Id")))

  # Calculate mother's age when the first offspring was born
  ages <- temp %>%
    dplyr::mutate(age_days = as.numeric(Birth.Date - Mother.Birth.Date),
                  age_years = as.numeric(difftime(Birth.Date, Mother.Birth.Date) / 365.25)) %>%
    dplyr::group_by(Study.Id) %>%
    dplyr::summarise(median_age_days = median(age_days),
                     median_age_years = median(age_years),
                     minimum_age_days = min(age_days),
                     minimum_age_years = min(age_years),
                     maximum_age_days = max(age_days),
                     maximum_age_years = max(age_years),
                     n_first_births = n())

  return(ages)
}


#' Calculate stage-specific survival
#' Based on Bill Morris' methods for calculating vital rates, available on the Wiki.
#'
#' @param b Name of the tbl_df containing biography data generated by the function read_bio_table.
#' @param adult_definition How adult females are defined in terms of age at first birth. Options are "median" or "minimum".
#' @param census_start_month A vector of start months for each study. Should be same length as the number of studies. Defaults to January.
#' @return Output is a tbl_df that includes the probability of survival for each life-history stage for each study species. Life history stages include newborns, juveniles, and adults; these are defined as in Morris et al. 2011. Also included are the number of animals and the "animal-years" of observation on which the calculation is based.
#' @export
#' @examples
#' sss <- stage_specific_survival(lh)
stage_specific_survival <- function(b, adult_definition = "median", weaning_ages = NULL, census_start_month = NULL) {

  if (!adult_definition %in% c("median", "minimum")) {
    stop("Unrecognized definition for adult females (must be 'median' or 'minimum').")
  }

  b$Animal.Id <- as.character(b$Animal.Id)

  # For now, because of database issues, there are some individuals
  # for which Depart.Date < Entry.Date
  # Need to filter these out
  b <- dplyr::filter(b, Depart.Date >= Entry.Date)

  # Get life history stage (newborn, juvenile, or adult) of each animal at each
  # pseudocensus date during the study
  ps_ages <- pseudocensus_ages(b, adult_definition, weaning_ages, census_start_month)

  # Remove years for which census_end < Entry.Date or census_date > Depart.Date
  ps_ages <- ps_ages %>%
    dplyr::mutate(census_date = lubridate::ymd(census_date, tz = "UTC"),
                   census_end = census_date + lubridate::years(1)) %>%
    dplyr::filter(census_end > Entry.Date & census_date <= Depart.Date)

  # Get distinct animals
  animals <- ps_ages %>%
    dplyr::distinct(Study.Id, Animal.Id) %>%
    dplyr::select(Study.Id, Animal.Id)

  m <- list()

  # Loop through all animals
  for (i in 1:nrow(animals)) {

    temp_study <- animals[i, ]$Study.Id
    temp_animal <- animals[i, ]$Animal.Id

    # Obtain subset of ps_ages corresponding to the current animals
    temp_set <- ps_ages %>%
      filter(Study.Id == temp_study & Animal.Id == temp_animal) %>%
      mutate(Alive = 0, Deaths = 0) %>%
      arrange(census_date)

    # For each record in temp_set, calculate the appropriate weight for survival rates
    for (j in 1:nrow(temp_set)) {

      # Weight starts at zero
      w <- 0

      # MinCensus is a special case for which the weight has to be adjusted
      if (j == 1) {

        # Non-newborns
        # If the animal is not newborn, the observation is probably left-censored
        # Includes immigrants and older individuals added to the study
        # The weight corresponds to the fraction of the interval during
        # which the animal was observed (i.e., excluding the censored part)
        if (temp_set[j, ]$discrete_age_class != 0) {

          # If individual survives to next census
          if (temp_set[j, ]$Depart.Date > temp_set[j, ]$census_end) {

            # Weight is 1 if entry date equals census date (no censoring)
            if (temp_set[j, ]$Entry.Date == temp_set[j, ]$census_date) {
              w <- 1
            }
            # Otherwise, weight is fraction of year excluding left-censored part
            else{
              w <- difftime(temp_set[j, ]$census_end,
                            temp_set[j, ]$Entry.Date,
                            units = "days") / 365.25
            }
          }

          # If individual not present at next census
          # This means there is only one entry for the animal, i.e. nrow(temp_set) == 1
          # Weight is fraction of year that animal was observed (between entry and depart)
          else{
            w <- difftime(temp_set[j, ]$Depart.Date,
                          temp_set[j, ]$Entry.Date,
                          units = "days") / 365.25

            # If confirmed death, weight is added to deaths
            if (temp_set[j, ]$Depart.Type == "D") {
              temp_set[j, ]$Deaths <- w
            }
          }
        }

        # Newborns
        # If the animal is newborn, the observation is probably left-censored
        # We don't discount the fraction of the year before birth
        else if (temp_set[j, ]$discrete_age_class == 0) {

          # If infant survives to next census
          # Weight is one
          if (temp_set[j, ]$Depart.Date >= temp_set[j, ]$census_date + lubridate::years(1)) {
            w <- 1
          }

          # Otherwose, infant not present at next census
          # This means there is only one entry for the animal, i.e. nrow(temp_set) == 1
          else{
            # If observation ended before next census without confirmed death
            # Weight is fraction of year that excludes right-censored part
            if (temp_set[j, ]$Depart.Type %in% c("E", "O", "P")) {

              w <- difftime(temp_set[j, ]$Depart.Date,
                            temp_set[j, ]$census_date,
                            units = "days") / 365.25
            }

            # If infant died before next census
            # Weight is 1 and this gets added to deaths
            else if (temp_set[j, ]$Depart.Type == "D") {
              w <- 1
              temp_set[j, ]$Deaths <- w
            }
          }
        }
      }

      # MaxCensus (when nrow(temp_set) > 1)
      else if (j > 1 & j == nrow(temp_set)) {
        # If confirmed death, weight is 1
        if (temp_set[j, ]$Depart.Type == "D") {
          w <- 1
          temp_set[j, ]$Deaths <- w
        }
        # Otherwise, weight is fraction of year excluding right-censored part
        else{
          w <- difftime(temp_set[j, ]$Depart.Date,
                        temp_set[j, ]$census_date,
                        units = "days") / 365.25
        }

      }

      # All other census dates correspond to complete observation years without death
      else {
        w <- 1
      }

      temp_set[j, ]$Alive <- w
    }

    m[[i]] <- temp_set
  }

  m <- bind_rows(m)

  sss_summary <- m %>%
    dplyr::mutate(year_of = lubridate::year(census_date)) %>%
    dplyr::group_by(Study.Id, year_of, age_class) %>%
    dplyr::summarise(n_animals = n(),
                     individual_years = sum(Alive),
                     s = 1 - (sum(Deaths) / sum(Alive)),
                     deaths = sum(Deaths),
                     trials = round(individual_years, 0),
                     successes = round(individual_years, 0) - round(deaths, 0))

  return(sss_summary)
}


#' Calculate stage-specific fertility
#' Based on Bill Morris' methods for calculating vital rates, available on the Wiki.
#'
#' @param b Name of the tbl_df containing biography data generated by the function read_bio_table.
#' @param f Name of the tbl_df containing fertility data generated by the function read_fert_table.
#' @param adult_definition Character. Either "median" or "minimum". Determines how females should be defined: minimum age at first birth or median age at first birth.
#' @param annual Logical. Should the fertility be calculated for each year separately?
#' @param census_start_month Numeric vector of length equal to number of studies.
#' @param filter_fertile Logical. If set to true, only females that are likely to be fertile are retained.
#' @return Output is a tbl_df that includes the per capita fertility for each life-history stage for each study species. Life history stages include newborns, juveniles, and adults; these are defined as in Morris et al. 2011. Also included are the number of animals and the "female-years" of observation on which the calculation is based.
#' @export
#' @examples
#' ssf <- stage_specific_fertility(lh, fert)
stage_specific_fertility <- function(b, f, adult_definition = "median", weaning_ages = NULL, census_start_month = NULL) {

  if (!adult_definition %in% c("median", "minimum")) {
    stop("Unrecognized definition for adult females (must be 'median' or 'minimum').")
  }

  f$Animal.Id <- as.character(f$Animal.Id)
  b$Animal.Id <- as.character(b$Animal.Id)

  # Get subset of animals for which fertility was monitored

  # Get life history stage (newborn, juvenile, or adult) of each animal at each
  # pseudocensus date during the study
  ps_ages <- pseudocensus_ages(b, adult_definition, weaning_ages, census_start_month)

  # Get subset of animals for which fertility was monitored
  # Semi-join retains all rows in b that have a match in f
  # Join to fertility data to get start and stop dates
  ps_ages <- dplyr::inner_join(ps_ages, f, by = c("Study.Id", "Animal.Id"))

  ps_ages$Weight <- NA
  ps_ages$Num.Offspring <- NA

  # Remove years for which census_end < Start.Date or census_date > End.Date
  ps_ages <- ps_ages %>%
    dplyr::mutate(census_date = as.POSIXct(census_date),
                   census_end = census_date + lubridate::years(1)) %>%
    dplyr::filter(census_end > Start.Date & census_date <= Stop.Date)

  # Calculate weight as the proportion of the psuedocensus year for which
  # the female's fertility was observed
  # Takes about 2 minutes to run
  for (i in 1:nrow(ps_ages)) {

    # Store these values temporarily for future use in matching
    s <- as.character(ps_ages[i, ]$Study.Id)
    m <- as.character(ps_ages[i, ]$Animal.Id)

    # If start date and end date fully encompass census year, weight is 1
    if (ps_ages[i, ]$census_date >= ps_ages[i, ]$Start.Date &
        ps_ages[i, ]$census_end <= ps_ages[i, ]$Stop.Date) {
      ps_ages[i, ]$Weight <- 1
    }

    # Else if census date before start date, fertility observation is censored
    # Get length between start date and min of {census end, stop date}
    else if (ps_ages[i, ]$census_date < ps_ages[i, ]$Start.Date) {
      ps_ages[i, ]$Weight <- (min(c(ps_ages[i, ]$census_end,
                                    ps_ages[i, ]$Stop.Date)) -
                                ps_ages[i, ]$Start.Date) / 365.25
    }

    # Else if stop date is before next census, fertility observation is censored
    # Get length between max of {census start, start date} and stop date
    else if (ps_ages[i, ]$Stop.Date < ps_ages[i, ]$census_end) {
      ps_ages[i, ]$Weight <- (ps_ages[i, ]$Stop.Date -
        (max(c(ps_ages[i, ]$census_date, ps_ages[i, ]$Start.Date)))) / 365.25
    }

    t1 <- ps_ages[i, ]$census_date
    t2 <- ps_ages[i, ]$census_end

    # Add up the number of individuals born to
    # that female in that census year
    ps_ages[i, ]$Num.Offspring <- b %>%
      dplyr::filter(Study.Id == s & Mom.Id == m & Birth.Date >= t1 & Birth.Date < t2) %>%
      nrow()
  }

  # if (filter_fertile) {
  #
  #   # Retain females who:
  #   # 1. Were monitored in previous year AND did NOT give birth
  #   # 2. Were monitored in previous year AND gave birth AND infant died before age 1
  #
  #   temp <- ps_ages %>%
  #     dplyr::arrange(Study.Id, census_date, Animal.Id) %>%
  #     distinct(Study.Id, Animal.Id, census_date) %>%
  #     dplyr::group_by(Study.Id, Animal.Id)
  #
  #   # For monitored females, determine if gave birth in previous census year
  #   # Also ensure that no gaps in census years
  #   temp <- temp %>%
  #     dplyr::mutate(rep_prev_yr = as.numeric(ifelse(dplyr::lag(Num.Offspring) > 0, 1, 0)),
  #                   census_diff = lubridate::year(census_date) - lubridate::year(dplyr::lag(census_date))) %>%
  #     ungroup()
  #
  #   # Retain only females monitored in previous year
  #   temp <- temp %>%
  #     dplyr::filter(census_diff == 1)
  #
  #   # For monitored females that gave birth, determine if offspring survived
  #   # First, get the mothers
  #   moms_birth_prev_yr <- temp %>%
  #     dplyr::filter(rep_prev_yr == 1) %>%
  #     dplyr::mutate(dob_start = census_date - lubridate::years(1),
  #                   dob_end = census_date - lubridate::days(1)) %>%
  #     dplyr::select(census_date, Study.Id, Mom.Id = Animal.Id, dob_start, dob_end) %>%
  #     dplyr::ungroup()
  #
  #   # Second, get set of offspring to match
  #   offspring <- b %>%
  #     dplyr::filter(!is.na(Mom.Id)) %>%
  #     dplyr::select(Study.Id, offspring_id = Animal.Id, Mom.Id, Birth.Date, Depart.Date,
  #            Depart.Type) %>%
  #     dplyr::arrange(Study.Id, Mom.Id, Birth.Date) %>%
  #     dplyr::ungroup()
  #
  #   get_offspring <- function(of, df) {
  #     df$Mom.Id <- as.character(df$Mom.Id)
  #     res <- of %>%
  #       dplyr::filter(Mom.Id == df$Mom.Id[[1]] & Study.Id == df$Study.Id[[1]] & Birth.Date %within% lubridate::interval(df$dob_start[[1]], df$dob_end[[1]]))
  #     res$census_date <- df$census_date[[1]]
  #     return(res)
  #   }
  #
  #   # Third, match each mother to the infant born in the previous year
  #   inf <- list()
  #   for (i in 1:nrow(moms_birth_prev_yr)) {
  #     inf[[i]] <- get_offspring(offspring, moms_birth_prev_yr[i, ])
  #   }
  #
  #   inf <- bind_rows(inf)
  #
  #   # Fourth, find all infants that died before age 1
  #   inf_died_young <- inf %>%
  #     dplyr::group_by(Study.Id, Mom.Id, census_date) %>%
  #     dplyr::mutate(age_at_depart = (Depart.Date - Birth.Date) / lubridate::dyears(1)) %>%
  #     dplyr::filter(age_at_depart < 1)
  #
  #   # These females were monitored in the previous year AND did not give birth during that year
  #   fems1 <- dplyr::filter(temp, rep_prev_yr == 0)
  #
  #   # These females were monitored in previous year AND gave birth AND infant died before age 1
  #   fems2 <- suppressWarnings(dplyr::inner_join(select(inf_died_young, Study.Id, Animal.Id = Mom.Id, census_date),
  #                       temp, by = c("Study.Id", "Animal.Id", "census_date")))
  #   fems2 <- dplyr::distinct(fems2, Study.Id, Animal.Id, census_date)
  #
  #   ps_ages <- dplyr::bind_rows(fems1, fems2)
  #
  # }

  ssf_summary <- ps_ages %>%
    dplyr::mutate(year_of = lubridate::year(census_date)) %>%
    dplyr::group_by(Study.Id, year_of, age_class) %>%
    dplyr::summarise(n_animals = n(),
                     female_years = sum(Weight),
                     f = sum(Num.Offspring * Weight) / sum(Weight),
                     trials = round(female_years, 0),
                     successes = round(sum(Num.Offspring * Weight), 0))

  return(ssf_summary)
}


#' Calculates the age class of an animal based on its age and the median age at
#' first reproduction for the species.
#'
#' @param age_years Age of the animal in years.
#' @param mafr Median age of first reproduction in years for the species.
#' @return Output is a character string: "newborn", "juvenile", or "adult"
#' @export
#' @examples
#' age_class <- get_age_class(5.1, 8.78589)
get_age_class <- function(age_years, mafr, weaning){

  res <- ifelse(age_years <= weaning, "newborn",
                ifelse(age_years > 0 & age_years < mafr, "juvenile", "adult"))

  return(res)
}

#' Create pseudocensus dates and calculate ages of all animals present at each census.
#' This function is used by the stage-specific vital rate functions.
#'
#' @param b Name of the tbl_df containing biography data generated by the function read_bio_table.
#' @param adult_definition How adult females are defined in terms of age at first birth. Options are "median" or "minimum".
#' @param census_start_month A vector of start months for each study. Should be same length as the number of studies. Defaults to January.
#' @return Output is a tbl_df that includes an entry for each animal present at each pseudo-census date along with its discrete age class and life-history stage at that date.
#' @export
#' @examples
#' ages <- pseudocensus_ages(lh)
pseudocensus_ages <- function(b, adult_definition = "median", weaning_ages = NULL, census_start_month = NULL){

  if (!adult_definition %in% c("median", "minimum")) {
    stop("Unrecognized definition for adult females (must be 'median' or 'minimum').")
  }

  if (is.null(census_start_month)) {
    census_start_month <- rep(1, times = length(levels(factor(b$Study.Id))))
  }

  if (is.null(weaning_ages)) {
    weaning_ages <- rep(1, times = length(levels(factor(b$Study.Id))))
  }

  # pseudocensus
  census <- b %>%
    dplyr::group_by(Study.Id) %>%
    dplyr::summarise(min_entry = min(Entry.Date))


  census <- census %>%
    dplyr::mutate(census_start = lubridate::ymd(paste(lubridate::year(min_entry),
                                                      census_start_month, "01", sep = "-"),
                                                tz = "UTC"))

  census <- census %>%
    dplyr::mutate(census_start = as.POSIXct(ifelse(census_start < min_entry,
                                                   census_start,
                                                   census_start - lubridate::years(1)),
                                            origin = "1970-01-01"))

  census_end <- b %>%
    dplyr::group_by(Study.Id) %>%
    dplyr::filter(Depart.Type == "O") %>%
    dplyr::summarise(study_end = max(Depart.Date)) %>%
    dplyr::mutate(census_end = lubridate::ymd(paste(year(study_end),
                                                    census_start_month, "01",
                                                    sep = "-"),
                                              tz = "UTC"))

  census_end <- census_end %>%
    dplyr::mutate(census_end = as.POSIXct(ifelse(census_end < study_end,
                                                 census_end,
                                                 census_end - lubridate::years(1)),
                                          origin = "1970-01-01"))

  census <- suppressMessages(dplyr::inner_join(census, census_end))

  census_seq <- function(df) {
    seq(df$census_start, df$census_end, "1 year")
  }

  census_dates <- plyr::dlply(census, .(Study.Id), census_seq)

  mafr <- age_first_rep(b)

  # For each census date, calculate the age of each animal present
  age_df <- list()
  for (j in 1:7) {
    site <- as.character(census[j, ]$Study.Id)
    site_seq <- census_dates[[site]]
    temp_lh <- b %>%
      dplyr::filter(Study.Id == site)

    if (adult_definition == "median") {
      temp_mafr <- mafr[mafr$Study.Id == site, ]$median_age_years
    } else if (adult_definition == "minimum") {
      temp_mafr <- mafr[mafr$Study.Id == site, ]$minimum_age_years
    }

    temp_weaning <- weaning_ages[weaning_ages$site == site, ]$weaning_age

    ages <- list()
    for (i in 1:length(site_seq)) {
      # For each date, extract subset of animals present or born before the next census
      t <- site_seq[i]

      temp_set <- temp_lh %>%
        dplyr::filter(Entry.Date - lubridate::years(1) < t & Depart.Date >= t)

      # If present, calculate age and assign to age category; calculate weight if neccesary
      ages[[i]] <- temp_set %>%
        dplyr::mutate(age_days = difftime(t, Birth.Date, units = "days"),
                      age_years = age_days / 365.25,
                      census_date = as.Date(t),
                      discrete_age_class = ceiling(age_years),
                      age_class = get_age_class(age_years, temp_mafr, temp_weaning)) %>%
        dplyr::select(census_date, 1:4, age_days, age_years,
                      discrete_age_class, age_class, 13:16)
    }

    site_ages <- dplyr::bind_rows(ages)
    age_df[[j]] <- site_ages
  }

  age_df <- dplyr::bind_rows(age_df)

  return(age_df)
}

#' Converts the stage-specific survivorship data to binomial (trials & successes).
#'
#' @param df Name of the tbl_df containing survivorship data generated by the function stage_specific_survival.
#' @export
#' @examples
#' surv_trials <- make_survivorship_trials(sss)
make_survivorship_trials <- function(df){

  get_trials <- function(df){

    fates <- list()

    for (i in 1:nrow(df)){
      fate <- c(rep(1, df[i, ]$successes),
                rep(0, df[i, ]$trials - df[i, ]$successes))

      f <- data.frame(fate = fate)

      f$Study.Id <- df[i, ]$Study.Id
      f$year_of <- df[i, ]$year_of
      f$age_class <- df[i, ]$age_class

      fates[[i]] <- f
    }

    fates <- dplyr::bind_rows(fates)

    fates <- dplyr::select(fates, 2:4, 1)

    return(fates)
  }

  surv_trials <- bind_rows(plyr::dlply(filter(df, trials != 0),
                                       .(Study.Id, age_class),
                                       get_trials))

  surv_trials$age_class <- factor(surv_trials$age_class,
                                  levels = c("newborn", "juvenile", "adult"))

  return(surv_trials)
}

fertile_periods <- function(b, f, weaning_ages) {

  # For each female, find every:
  # - date of infant birth
  # - date of weaning OR date of infant death before weaning age
  females <- f %>%
    dplyr::select(Study.Id, Animal.Id) %>%
    dplyr::distinct(Animal.Id)

  females$Animal.Id <- as.character(females$Animal.Id)

  offspring <- b %>%
    dplyr::filter(!is.na(Mom.Id)) %>%
    dplyr::select(Study.Id, Offspring.Id = Animal.Id, Mom.Id, Birth.Date, Depart.Date,
                  Depart.Type) %>%
    dplyr::arrange(Study.Id, Mom.Id, Birth.Date) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(Age.Depart = (Depart.Date - Birth.Date) / lubridate::dyears(1)) %>%
    dplyr::left_join(weaning_ages, by = c("Study.Id" = "site")) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(Resume.Date = as.Date(Birth.Date + lubridate::dyears(min(Age.Depart, weaning_age))))

  offspring$Mom.Id <- as.character(offspring$Mom.Id)

  # Deal with twins (retain only the twin that lived longer)
  offspring <- offspring %>%
    dplyr::ungroup %>%
    dplyr::group_by(Study.Id, Mom.Id, Birth.Date) %>%
    dplyr::top_n(1, Age.Depart)

  fert_list <- list(nrow(females))

  for (i in seq_along(females$Animal.Id)) {

    current_fert <- dplyr::filter(fert, Animal.Id == females$Animal.Id[[i]] & Study.Id == females$Study.Id[[i]])
    current_off <- dplyr::filter(offspring, Mom.Id == females$Animal.Id[[i]] & Study.Id == females$Study.Id[[i]])

    fert_ints <- lubridate::interval(current_fert$Start.Date, current_fert$Stop.Date)
    off_ints <- lubridate::interval(current_off$Birth.Date, current_off$Resume.Date)

    retain_start <- logical(length(current_fert$Start.Date))
    retain_end <- logical(length(current_fert$Stop.Date))
    for (j in seq_along(current_fert$Start.Date)) {
      retain_start[j] <- sum(current_fert$Start.Date[j] %within% off_ints) == 0
      retain_end[j] <- sum(current_fert$Stop.Date[j] %within% off_ints) == 0
    }

    fert_start <- current_fert$Start.Date[retain_start]
    fert_end <- current_fert$Stop.Date[retain_end]

    retain_start <- logical(length(current_off$Resume.Date))
    retain_end <- logical(length(current_off$Birth.Date))
    for (j in seq_along(current_off$Birth.Date)) {
      retain_start[j] <- sum(current_off$Resume.Date[j] %within% fert_ints) > 0
      retain_end[j] <- sum(current_off$Birth.Date[j] %within% fert_ints) > 0
    }

    off_start <- current_off$Resume.Date[retain_start]
    off_end <- current_off$Birth.Date[retain_end]

    # If any offspring start dates are equal to the fertility end date
    # Remove the date from offspring start (because new observation period doesn't start)
    date_isect <- as.Date(as.POSIXct(intersect(as.POSIXct(off_start),
                                               int_end(fert_ints)),
                                     origin = ymd("1970-01-01")))
    if (length(date_isect > 0)) {
      off_start <- off_start[off_start != date_isect]
    }

    # If offspring end dates (DOBs) are equal to a fertility start date
    # Remove the date from offspring end if not contained in fert_end
    date_isect <- as.POSIXct(intersect(as.POSIXct(off_end),
                                               int_start(fert_ints)),
                                     origin = ymd("1970-01-01"))
    if (length(date_isect) > 0) {
      if (!(date_isect %in% fert_end)) {
        off_end <- off_end[off_end != date_isect]
      }
    }

    start_dates <- sort(c(as.Date(fert_start), as.Date(off_start)))
    end_dates <- sort(c(as.Date(fert_end), as.Date(off_end)))

    if (length(start_dates) == 0 & length(end_dates) == 0) {
      fert_list[[i]] <- NULL
    }
    else {
      df <- data.frame(start_dates, end_dates)

      df$Study.Id <- females$Study.Id[[i]]
      df$Animal.Id <- females$Animal.Id[[i]]

      fert_list[[i]] <- df
    }
  }

  new_fert <- dplyr::bind_rows(fert_list)

  new_fert <- new_fert %>%
    dplyr::rename(Start.Date = start_dates,
           Stop.Date = end_dates) %>%
    dplyr::select(Study.Id, Animal.Id, Start.Date, Stop.Date)

  new_fert$Start.Date <- as.POSIXct(new_fert$Start.Date)
  new_fert$Stop.Date <- as.POSIXct(new_fert$Stop.Date)

  return(new_fert)

  # For each fert interval
  # Find all births that occurred during that interval
  # These dates + 1 day are now stop dates
  # Find all weanings/infant deaths during that interval
  # These are now start dates

  # Which start/stop dates to retain?
  # Ignore offspring start dates (weaning) outside of fert intervals
  # Ignore offspring stop dates (births) outside of fert intervals
  # Ignore fert start dates inside of offspring intervals (which means not fertile)
  # Ignore fert stop dates inside of offspring intervals (which means not fertile)
}
camposfa/plhdbR documentation built on May 13, 2019, 11:02 a.m.