R/data-SurvData.R

Defines functions is.between is_exposure_constant checking_table check_concNsurv check_TimeNsurv check_Nsurv check_concentration check_time add_Ninit pool_constant survDataCheck survData.data.frame survData

Documented in check_concentration check_concNsurv checking_table check_Nsurv check_time check_TimeNsurv is.between is_exposure_constant survData survDataCheck survData.data.frame

#' @name SurvData
#' 
#' @title Creates a data set for survival analysis
#'
#' @description
#' This function creates a \code{SurvData} object from experimental data
#' provided as a \code{data.frame}. The resulting object
#' can then be used for plotting and model fitting. It can also be used
#' to generate \emph{individual-time} estimates.
#'
#'
#' The \code{x} argument describes experimental results from a survival
#' toxicity test. Each line of the \code{data.frame}
#' corresponds to one experimental measurement, that is a number of alive
#' individuals at a given concentration at a given time point and in a given replicate.
#'  Note that either the concentration
#' or the number of alive individuals may be missing. The data set is inferred
#' to be under constant exposure if the concentration is constant for each
#' replicate and systematically available. The function \code{survData} fails if
#' \code{x} does not meet the
#' expected requirements. Please run \code{\link{survDataCheck}} to ensure
#' \code{x} is well-formed.
#'
#' @param data a \code{data.frame} containing the following four columns:
#' \itemize{
#' \item \code{replicate}: a vector of any class \code{numeric}, \code{character} or \code{factor} for replicate
#' identification. A given replicate value should identify the same group of
#' individuals followed in time
#' \item \code{conc}: a vector of class \code{numeric} with tested concentrations
#' (positive values, may contain NAs)
#' \item \code{time}: a vector of class \code{integer} with time points, minimal value must be 0
#' \item \code{Nsurv}: a vector of class \code{integer} providing the number of
#' alive individuals at each time point for each concentration and each replicate
#' (may contain NAs)
#' }
#' @param \dots Further arguments to be passed to generic methods
#'
#' @return A dataframe of class \code{survData} and column \code{replicate} as \code{factor}.
#'
#' @seealso \code{\link{survDataCheck}}
#'
#' @keywords transformation
#'
#' @export
#'
survData <- function(data, ...){
  UseMethod("survData")
}

#' @name SurvData
#' @export
survData.data.frame <- function(data, ...){
  
  # CHECKING
  tab_check <- survDataCheck(data, quiet = TRUE)
  if (nrow(tab_check) > 0) {
    stop("data not well-formed. See 'survDataCheck'.")
  }
  # PROCESSING
  data <- add_Ninit(data)
  data$replicate = as.factor(data$replicate)
  rownames(data) <- NULL
  
  if (is_exposure_constant(data)) {
    data <- pool_constant(data)
    child_class <- "SurvDataCstExp"
  } else {
    child_class <-  "SurvDataVarExp"
  }
  class(data) <- append(c(child_class, "SurvData"), class(data))
  return(data)
}

#' @name SurvDataCheck
#' 
#' @title Checks if an object can be used to perform survival analysis
#'
#' @description
#' The \code{survDataCheck} function can be used to check if an object
#' containing survival data is formatted according to the expectations of the
#' \code{survData} function.
#'
#' @param data any object looking as a data.frame.
#' @param quiet binary (TRUE, FALSE). If FALSE (default), remove some messages
#' in console.
#'
#' @return The function returns a dataframe with message describting the error
#' in the formatting of the data. When no error is detected the object is empty.
#'
#' @export
survDataCheck <- function(data, quiet = FALSE) {
  
  tb <- data.frame(msg = character(0))
  # 0. check colnames
  REFCOLNAMES <- c("replicate","conc","time","Nsurv")
  valid_colnames <- REFCOLNAMES %in% names(data)
  if (!all(valid_colnames)) {
    msg = "Colnames are missing: 'replicate', 'conc', 'time', 'Nsurv'."
    tb <- rbind(tb, data.frame(msg = msg))
    return(tb)
  }
  # Next check are not depending to each other
  subdata <- split(data, data$replicate, drop = TRUE)
  ## 1. check time is (1) numeric, (2) unique/replicate, (3) minimal value is 0.
  time_check <- sapply(subdata, function(d) check_time(d))
  msg = "'time' column must be numerical values,
    unique/replicate, and min 0 / replicate."
  tb <- checking_table(tb, time_check, msg)
  ## 2. assert concentrations are numeric and positive
  concentration_check <- check_concentration(data)
  msg = "'conc' column must be positive numerical values"
  tb <- checking_table(tb, concentration_check, msg)
  ## 3. assert Nsurv contains integer and positive
  Nsurv_check <- check_Nsurv(data)
  msg = "Column 'Nsurv' must contain only integer values"
  tb <- checking_table(tb, Nsurv_check, msg)
  ## 4. assert Nsurv never increases with time and >0 at 0
  tN_check <- sapply(subdata, function(d) check_TimeNsurv(d))
  msg = "'Nsurv' must be >0 at t=0 and decrease with time."
  tb <- checking_table(tb, tN_check, msg)
  ## 6. Assert max(time in data_conc) >= max(time in data_surv)
  cN_check <- sapply(subdata, function(d) check_concNsurv(d))
  msg = "Timeline in conc must covert timeline of Nsurv."
  tb <- checking_table(tb, cN_check, msg)
  
  if (quiet == FALSE) {
    if (nrow(tb) == 0) {
      message("Correct formatting")
    } else{
      message("Some mistake in formating. Look the message outputs.")
    }
  }
  return(tb)
}


# @name SurvData
# 
# @title pool replicates
# 
# @description
# * **pool_constant**: pool replicates with the same concentration when the 
# exposure profile is constant.
# 
# @param data An object of class \code{SurvData}
# @return \code{data} with pooling replicate
pool_constant <- function(data){
  x <- aggregate(cbind(Nsurv, Ninit) ~ time + conc, data, sum)
  x[["replicate"]] <- as.character(x[["conc"]])
  return(x)
}

# @name SurvData
# 
# @title Add Nsurv initial
#
# @description
# Add a column Ninit for each replicate with the initial number of survival
# object the initial number of individuals in the corresponding replicate
#
# @param self a data.frame succeeding \code{survDataCheck} function
# 
# @return a data.frame with an additional column "Ninit"
add_Ninit <- function(self){
  subdata <- split(self, self$replicate, drop = TRUE)
  if ("Ninit" %in% colnames(self)) {
    check_Ninit <- sapply(subdata, function(d){
      unique(d$Ninit) == max(d$Nsurv, na.rm = TRUE)
    })
    if (!all(check_Ninit)) {
      stop("A column name 'Ninit' has a wrong formatting.
                 If you remove the column, it will be reformatted.")
    }
  } else{
    ls_Ninit <- lapply(subdata, function(d){
      d$Ninit = max(d$Nsurv, na.rm = TRUE)
      return(d)
    })
    self <- do.call("rbind", c(ls_Ninit, make.row.names = FALSE))
  }
  return(self)
}

################################################################################
# A SET OF INTERNAL METHODS FOR CHECKING DATA


#' @name CheckData
#' 
#' @title Set of function to test conformity of data
#' 
#' @description
#' * **check_time**: check if the \code{time} within a \code{time serie} 
#' is (1) numeric, (2) unique, (3) minimal value is 0.
#' * **check_concentration**: check if the \code{concentration} is numeric 
#' and always positive.
#' * **check_Nsurv**: check if the \code{Nsurv} is (1) integer and 
#' (2) always positive (3) can be NA.
#' * **check_TimeNsurv**: check if the pair \code{time} - \code{Nsurv} within 
#' a \code{time serie} satisfies (1) Nsurv at t=0 is >0, (2) decreasing. 
#' * **check_concNsurv**: check if the pair \code{conc} - \code{Nsurv} within
#'  a \code{time serie} satisfies that the timeline of concentration covers
#'  timeline of Nsurv.
#' * **checking_table**: add \code{msg} in a data.frame \code{data} if 
#'  \code{check} are not all TRUE.
#' * **is_exposure_constant**: Test in a well-formed argument to function 
#'  \code{SurvData} if the concentration is constant and different 
#'  from \code{NA} for each replicate (each time-serie).
#' * **is.between**: Test if \code{x} is between \code{min} and \code{max}
#' 
#' @param data a data.frame
#' @param check binary vector of TRUE/FALSE
#' @param msg a message to add to the data.frame
#' 
#' @return a boolean \code{TRUE} if concentration in \code{replicate} is constant,
#'  or \code{FALSE} if the concentration in at least one of the replicates is time-variable,
#'  and/or if \code{NA} occurs. 
#'
NULL

#' @name CheckData
check_time <- function(data){
  t <- data$time
  unicity <- length(unique(t)) == length(t)
  numeric <- is.numeric(t)
  minimal <- min(t) == 0
  positivity <- sum(t < 0) == 0
  return(all(c(unicity, numeric, minimal, positivity)))
}

#' @name CheckData
check_concentration <- function(data){
  c <- data$conc
  numeric <- is.numeric(c)
  positivity <- all(c >= 0, na.rm = TRUE)
  return(all(c(numeric, positivity)))
}

#' @name CheckData
check_Nsurv <- function(data){
  n <- data$Nsurv[!is.na(data$Nsurv)]
  integer <- all(n == as.integer(n))
  positivity <- all(n >= 0)
  return(all(c(integer, positivity)))
}

#' @name CheckData
check_TimeNsurv <- function(data){
  data <- data[!is.na(data$Nsurv),]
  if (check_time(data) && check_Nsurv(data)) {
    n <- data$Nsurv
    t <- data$time
    nt0_sup0 <- n[t == 0] > 0
    decrease <- all(diff(n[order(t)]) <= 0)
    return(all(c(nt0_sup0, decrease)))
  } else{
    return(FALSE)
  }
}

#' @name CheckData
check_concNsurv <- function(data){
  tN <- max(data[!is.na(data$Nsurv), ]$time)
  tc <- max(data[!is.na(data$conc), ]$time)
  return(tc >= tN)
}

#' @name CheckData
checking_table <- function(data, check, msg){
  if (!all(check)) {
    data <- rbind(data, data.frame(msg = msg))
  }
  return(data)
}

#' @name CheckData
#' @export
is_exposure_constant <- function(data) {
  no_NA <- all(!is.na(data$conc))
  subdata <- split(data, data$replicate, drop = TRUE)
  unicity <- sapply(subdata, function(d){
    length(unique(d$conc)) == 1
  })
  return(no_NA && all(unicity))
}

#' @name CheckData
#' @param x parameter to check if it's between min and max
#' @param min minimal value. x must be greater than min
#' @param max maximal value. x must be lower than max
#' @export
is.between <- function(x, min, max){ (x >= min & x <= max) }

Try the morseTKTD package in your browser

Any scripts or data that you put into this service are public.

morseTKTD documentation built on June 8, 2025, 10:28 a.m.