R/time_varying-function.R

Defines functions time_varying

Documented in time_varying

#' Generate time-varying data
#'
#' @param func Functions, an object class generated by
#' \code{\link{function_from_edge}} or \code{\link{function_from_user}}
#' functions. All vertices must be defined for their functions. The causal
#' structure needs to be a directed cyclic graph (DCG), which means loops are
#' allowed. Use \code{\link{edge_from_function}} to identify edges given a list
#' of functions, then draw a causal diagram using the edges data frame (see
#' vignettes). All arguments within any function must be defined by their
#' respective functions, except the argument 'n'. The output lengths of vertex
#' functions must match the input length.
#' @param data Data, a data frame generated by \code{\link{data_from_function}}
#' which contains causally-simulated data at t=0. Column names of 'i', 't', and
#' 't_max' are not allowed, which respectively refer to instance, time, and
#' maximum time.
#' @param T_max Maximum time for every instance, a numeric vector of length
#' equal to the number of rows in 'data' and must be non-negative and
#' non-decimal.
#'
#' @return A data frame which include the simulated data for each vertex as a
#' column for each time up to maximum time for every instance.
#'
#' @keywords time-varying-data-generation
#'
#' @export
#'
#' @importFrom magrittr %>%
#' @importFrom igraph graph_from_data_frame is_dag
#' @importFrom dplyr mutate select everything select_at
#' @importFrom purrr pmap
#'
#' @examples
#'
#' data(functions)
#' simulated_data <- data_from_function(functions, n = 100)
#'
#' function_B <- function(B){
#'   B + 1
#' }
#'
#' functions <- define(functions, which = "B", what = function_B)
#' T_max <- rpois(nrow(simulated_data), lambda = 25)
#'
#' time_varying(functions, data = simulated_data, T_max = T_max)

time_varying=function(func,data,T_max){
  # Check if 'func' is 'Functions'
  if(!inherits(func,"Functions")){
    stop(
      paste0(
        '\n'
        ,'The argument \'func\' is not of class \'Functions\'. Please use\n'
        ,'function_from_edge() or function_from_user() to create valid input\n'
        ,'for this function.'
      )
    )
  }

  # Check if all vertices in 'func' have been defined for their functions
  if(!all(sapply(func,is.function))){
    stop(
      paste0(
        '\n'
        ,'These vertices in \'func\' has not been defined for their '
        ,'functions:\n'
        ,names(func)[!sapply(func,is.function)] %>%
          paste0(collapse=', ')
      )
    )
  }

  # Check if 'data' is a data frame
  if(!is.data.frame(data)){
    stop(
      paste0(
        '\n'
        ,'The argument \'data\' must be a data frame. Please ensure that\n'
        ,'\'data\' is specified correctly.'
      )
    )
  }

  # Check if 'T_max' is a numeric vector equal to the number of rows in 'data'
  if(!all(is.numeric(T_max),length(T_max)==nrow(data))){
    stop(
      paste0(
        '\n'
        ,'The argument \'T_max\' must be a numeric vector of length equal to\n'
        ,'the number of rows in \'data\' . Please ensure that \'T_max\' is\n'
        ,'specified correctly.'
      )
    )
  }

  # Check if 'T_max' is a numeric vector of non-negative and non-decimal
  if(!all(T_max>=0,T_max==as.integer(T_max))){
    stop(
      paste0(
        '\n'
        ,'The argument \'T_max\' must be a non-negative and non-decimal\n'
        ,'numeric. Please ensure that \'T_max\' is specified correctly.'
      )
    )
  }

  # Check if edges construct a directed cyclic graph
  edges_as_graph=
    func %>%
    edge_from_function() %>%
    graph_from_data_frame(directed=T)

  if(is_dag(edges_as_graph)){
    stop(
      paste0(
        '\n'
        ,'Your causal structure needs to be a directed cyclic graph (DCG), \n'
        ,'which means loops are allowed. Please check your diagram and \n'
        ,'make sure at least an arrow circles back.'
      )
    )
  }

  # List arguments in each function
  arg=
    func %>%
    lapply(formals) %>%
    lapply(names)

  arg_unique=
    arg %>%
    unlist() %>%
    unique()

  # Check if any function that has the argument 'n'
  is_arg_with_n=
    arg %>%
    sapply(\(x)'n'%in%x)

  # Check if a function has the argument 'n' exists;
  # if yes, then it must be the only one
  if(sum(is_arg_with_n)>0){
    if(!all(sapply(arg[is_arg_with_n],\(x)length(x)==1))){
      stop(
        paste0(
          '\n'
          ,'If a function includes the argument \'n\', it must be the only\n'
          ,'argument in that function. Please modify the functions for these\n'
          ,'vertices, accordingly:\n'
          ,arg[is_arg_with_n][!sapply(arg[is_arg_with_n],\(x)length(x)==1)] %>%
            names() %>%
            paste0(collapse=', ')
        )
      )
    }
  }

  # Check if arguments in any functions have their own functions, except 'n'
  arg_unique_except_n=
    arg_unique[arg_unique!='n']

  if(!all(arg_unique[arg_unique!='n']%in%names(func))){
    stop(
      paste0(
        '\n'
        ,'All arguments within any function must be defined by their\n'
        ,'respective functions, except the argument \'n\'. Please ensure\n'
        ,'these arguments comply with this rule:\n'
        ,arg_unique_except_n[!arg_unique_except_n%in%names(func)] %>%
          paste0(collapse=', ')
      )
    )
  }

  # Check if column names in 'data' include 'i', 't', or 't_max'
  if(any(c('i','t','t_max')%in%colnames(data))){
    stop(
      paste0(
        '\n'
        ,'A column name \'i\', \'t\', or \'t_max\' is not allowed. Please\n'
        ,'choose a different name for the column.'
      )
    )
  }

  # Check if column names in 'data' include all arguments, except n
  if(!all(arg_unique[arg_unique!='n']%in%colnames(data))){
    stop(
      paste0(
        '\n'
        ,'All column names in \'data\' must include all arguments, except\n'
        ,'the argument \'n\'. Please ensure these arguments are included:\n'
        ,arg_unique_except_n[!arg_unique_except_n%in%colnames(data)] %>%
          paste0(collapse=', ')
      )
    )
  }

  # Exclude functions that require 'n'
  func=
    func[!is_arg_with_n]

  # Set data at t_0
  v=data %>%
    mutate(
      i=seq(nrow(data))
      ,t=0
      ,t_max=T_max
    ) %>%
    select('i','t','t_max',everything())

  # Define a variable which include data at any t in T
  v_T=v

  # For each t up maximum t_max in T_max,
  # generate data for i if t_max_i is greater or equal to t
  for(t in seq(max(T_max))){
    # Subset data for eligible i
    v_updated=
      v[T_max>=t,]

    # If data for eligible i exist
    if(nrow(v_updated)>1){
      # For each which variable in func, generate data for eligible i
      for(which in names(func)){

        # Identify arguments in the function
        arg=
          func[[which]] %>%
          formals() %>%
          names()

        # Generate data for which variable using the function
        v_which=
          v_updated %>%
          select_at(arg) %>%
          pmap(func[[which]]) %>%
          unlist()

        # Check if each vertex has a vector length equal to the input length
        if(length(v_which)!=nrow(v_updated)){
          stop(
            paste0(
              '\n'
              ,'The output lengths of a vertex function does not match the\n'
              ,'input length:\n'
              ,which
            )
          )
        }

        # Update the subset for which variable
        v_updated[[which]]=
          v_which
      }

      # Update the data for eligible i
      v[T_max>=t,]=
        v_updated

      # Annotate t in the updated data
      t_values=
        t

      v_updated=
        v_updated %>%
        mutate(t=t_values)

      # Join the updated data with those at any t in T
      v_T=
        v_T %>%
        rbind(v_updated)
    }
  }

  # Return time-varying data
  v_T
}

Try the rcausim package in your browser

Any scripts or data that you put into this service are public.

rcausim documentation built on June 24, 2024, 5:06 p.m.