R/integrity.checks.R

Defines functions integrity.checks

Documented in integrity.checks

# Name   : integrity.checks
# Desc   : Separate function to check GT and begin/end values
# Date   : 2012/06/08
# Update : 2023/03/03
# Author : Boelle, Obadia
###############################################################################


#' @title
#' Integrity checks for input parameters
#' 
#' @description
#' Before any requested estimation routine is ran, [integrity.checks()] is called
#' to ensure the data passed as arguments meet the proper format and can be 
#' properly interpreted by subsequent functions.
#' 
#' @details
#' For internal use. Called by all implemented estimation methods.
#' All integrity/class checks are handled by this core function. GT must be an 
#' object of class `R0.GT`, and epidemic curve along with time values are handled 
#' here. If you plan on calling manually any other estimation function, make sure 
#' data are provided with correct format.
#' 
#' The epidemic curve `epid` may be provided as a vector. In that case, a vector 
#' `t` may be provided with the dates of observation. If `t` is not numeric, an 
#' attempt is made to convert to dates with [as.Date()]. If `t` is not provided, 
#' dates are obtained from the names of incid, and if not available, index 
#' values are used. Finally, one can provide an epidemic curve object generated by 
#' the epitools package (see [check.incid()] for more details).
#' 
#' A quick note on `t`, `begin` and `end` : when a date vector is provided (`t`), 
#' it will be used instead of index values to establish a date-related incidence. 
#' If no date vector is provided, then `begin` and `end` can still be forced to 
#' *numeric* values. It then links to the corresponding index values for incidence 
#' data. If a date vector is provided, `begin` and `end` can either be *numeric* 
#' values or *dates*. If numeric, they will link to the correspondig index values 
#' for incidence, and be afterward interpreted as the associated date. If date, 
#' they will be directly associated to incidence data. 
#' 
#' Basicly, if specified, `begin` and `end` must always have the same class.
#' 
#' @param epid Epidemic dataset, expecting incidence counts in a varity of possible formats (see [check.incid()]).
#' @param GT Generation time distribution from [generation.time()]. 
#' @param t Vector of dates at which incidence was observed. 
#' @param begin Begin date for estimation. Can be an integer or a date (YYYY-mm-dd or YYYY/mm/dd).
#' @param end End date for estimation. Can be an integer or a date (YYYY-mm-dd or YYYY/mm/dd).
#' @param date.first.obs Optional date of first observation, if `t` not specified. 
#' @param time.step Optional. If date of first observation is specified, number of day between each incidence observation. 
#' @param AR Attack rate as a percentage from total population. 
#' @param S0 Initial proportion of the population considered susceptible.
#' @param methods Vector of methods to be used for R/R0/Rt estimation. Must be provided as `c("method 1", "method 2", ...)`.
#' 
#' @return
#' A list with two components, `begin` and  `end`.
#' 
#' @export
#' 
#' @author Pierre-Yves Boelle, Thomas Obadia



# Function declaration

integrity.checks <- function(
    epid, 
    GT, 
    t, 
    begin, 
    end,  
    date.first.obs, 
    time.step, 
    AR, 
    S0, 
    methods 
)

# Code

{
  #Integrity checks are different for AR and other methods.
  if ("EG" %in% methods | "ML" %in% methods | "TD" %in% methods | "SB" %in% methods) {
    #Computes the date vector with date.first.obs
    if (!is.null(date.first.obs)) {
      date.first.obs = as.Date(date.first.obs)
    }
    tmp.epid = check.incid(epid, t, date.first.obs, time.step)
    
    # Various class and integrity checks are ran
    #check generation time (can be omitted if method is limited to AR)
    if (length(methods) == 1) {
      if (methods != "AR") {
        if (!inherits(GT, "R0.GT")) {
          stop("'GT' must be provided as a GT class object.")
        }
      }
    }
    
    #Checks on 'begin' and 'end'
    if (!is.null(begin) & !is.null(end)) {
      # if begin and end are not null
      if (class(begin) != class(end)) {
        # must be of the same class 
        stop(paste("If both 'begin' =",begin," and 'end'=", end, "are provided, they must be of the same class (dates, character strings or integers)."))
      }
    }
    
    if ((is.character(begin) | is.character(end) | inherits(begin, "Date") | inherits(end, "Date")) & !inherits(tmp.epid$t, "Date")) {
      # begin ou end ne peuvent ?tre des dates que si t est une date
      stop("'begin' or 'end' may be provided as dates only if 'epid' or 't' contains dates.")
      
    }
    
    # from here, begin and end are of the same class or missing
    begin.nb <- 1
    if (is.null(begin)) {
      #begin is missing, start at first date
      begin <- tmp.epid$t[1]
    } else if (is.numeric(begin)) {
      # begin is given 
      if ((begin <1) | begin > length(tmp.epid$t)) begin=1
      begin.nb <- begin
      begin <- tmp.epid$t[begin]
    } else if (inherits(begin, "Date")) {
      begin.nb <- which(tmp.epid$t == begin)
    } else if (is.character(begin)) { # try to convert using standard formats
      tmp.begin <- try(as.Date(begin, format = "%Y-%m-%d"),silent=T)
      if (inherits(tmp.begin, "Date")) begin <- tmp.begin
      tmp.begin <- try(as.Date(begin, format = "%d/%m/%Y"),silent=T)
      if (inherits(tmp.begin, "Date")) begin <- tmp.begin
      begin.nb <- which(tmp.epid$t == begin)
    } 
    if (is.null(begin)) {
      warning("Could not interpret 'begin'. Using default (=1).")
      begin <- tmp.epid$t[1]
      begin.nb <- 1
    }
    
    # from here, begin and end are of the same class or missing
    #default value for end.nb, may be amended after
    
    if (is.null(end)) {
      #end is missing, use default = epidemic peak
      end.nb <- which.max(tmp.epid$incid)
      
      #Other possible value is the closest to half the epidemic peak. Not used currently.
      #tmp <- list(incid=tmp.epid$incid[1:which.max(tmp.epid$incid)], t=tmp.epid$t[1:which.max(tmp.epid$incid)])
      #end.nb = min(which((tmp$incid-max(tmp$incid)/2)>0)) + which.min(tmp$incid[min(which((tmp$incid-max(tmp$incid)/2)>0)):length(tmp$incid)]) - 1
      
      end <- tmp.epid$t[end.nb]
    } else if (is.numeric(end)) {
      # end is given 
      # provide default value for end.nb
      end.nb <- end
      if ((end.nb < begin.nb) | (end > length(tmp.epid$t))) end = end.nb
      if (end.nb <= begin.nb) end <- length(tmp.epid$t)
      end <- tmp.epid$t[end.nb]
    } else if (inherits(end, "Date")) {
      end.nb <- which(tmp.epid$t == end)
    } else if (is.character(end)) { # try to convert using standard formats
      tmp.end <- try(as.Date(end,format = "%Y-%m-%d"),silent=T)
      if (inherits(tmp.end, "Date")) end <- tmp.end
      tmp.end <- try(as.Date(end,format = "%d/%m/%Y"),silent=T)
      if (inherits(tmp.end, "Date")) end <- tmp.end
      end.nb <- which(tmp.epid$t == end)
    } 
    if (is.null(end)) {
      warning("Could not interpret 'end'. Using default (=peak).")
      end.nb <- min(which((tmp.epid$incid - max(tmp.epid$incid)/2)>0))
      end <- tmp.epid$t[end.nb]
    }
    
    if (end.nb <= begin.nb) stop("'begin' and 'end' are not consistent.")
  }
  
  #If method is only AR, checks if arguments are consistent
  else {
    if ((!is.null(S0)) && (S0 < 0 | S0 > 1)) {
      stop("S0 should only take value between 0 and 1.")
    }
    
    if ((!is.null(AR)) && (AR < 0 | AR > S0)) {
      stop("AR should only take value between 0 and S0")
    }
  }
  
  #Function returns only begin and end, which are the only two values that might have been tweaked here
  #They are now consistent with the epidemic curve, or the t vector provided
  return (list(begin=begin, end=end))
}
tobadia/R0 documentation built on Sept. 24, 2023, 5:16 p.m.