R/data_from_function-function.R

Defines functions data_from_function

Documented in data_from_function

#' Generate causally-simulated data
#'
#' @param func Functions, an object class generated by
#' \code{\link{function_from_edge}} or \code{\link{function_from_user}}
#' functions. All vertices must be defined for their functions. The causal
#' structure needs to be a directed acyclic graph (DAG), which means no loops
#' are allowed. Use \code{\link{edge_from_function}} to identify edges given a
#' list of functions, then draw a causal diagram using the edges data frame
#' (see vignettes). At least a function in the list must include 'n' as the
#' only argument. All arguments within any function must be defined by their
#' respective functions, except the argument 'n'. The output lengths of vertex
#' functions must match the specified length 'n'.
#' @param n Number of observations, a numeric of length 1, non-negative, and
#' non-decimal.
#'
#' @return A data frame which include the simulated data for each vertex as a
#' column.
#'
#' @keywords time-fixed-data-generation
#'
#' @export
#'
#' @importFrom magrittr %>%
#' @importFrom igraph graph_from_data_frame is_dag
#' @importFrom purrr pmap
#'
#' @examples
#'
#' data(functions)
#' data_from_function(functions, n = 100)

data_from_function=function(func,n){
  # Check if 'func' is 'Functions'
  if(!inherits(func,"Functions")){
    stop(
      paste0(
        '\n'
        ,'The argument \'func\' is not of class \'Functions\'. Please use\n'
        ,'function_from_edge() or function_from_user() to create valid input\n'
        ,'for this function.'
      )
    )
  }

  # Check if all vertices in 'func' have been defined for their functions
  if(!all(sapply(func,is.function))){
    stop(
      paste0(
        '\n'
        ,'These vertices in \'func\' has not been defined for their '
        ,'functions:\n'
        ,names(func)[!sapply(func,is.function)] %>%
          paste0(collapse=', ')
      )
    )
  }

  # Check if 'n' is a single numeric
  if(!all(is.numeric(n),length(n)==1)){
    stop(
      paste0(
        '\n'
        ,'The argument \'n\' must be a numeric of length 1. Please\n'
        ,'ensure that \'n\' is specified correctly.'
      )
    )
  }

  # Check if 'n' is a non-negative, non-zero, and non-decimal numeric
  if(!all(n>0,n==as.integer(n))){
    stop(
      paste0(
        '\n'
        ,'The argument \'n\' must be a non-negative, non-zero, and non-decimal "
        ,"numeric. Please ensure that \'n\' is specified correctly.'
      )
    )
  }

  # Check if edges construct a directed acyclic graph
  edges_as_graph=
    func %>%
    edge_from_function() %>%
    graph_from_data_frame(directed=T)

  if(!is_dag(edges_as_graph)){
    stop(
      paste0(
        '\n'
        ,'Your causal structure needs to be a directed acyclic graph (DAG), \n'
        ,'which means no loops are allowed. Please check your diagram and \n'
        ,'make sure all arrows flow in one direction without circling back.'
      )
    )
  }

  # List arguments in each function
  arg=
    func %>%
    lapply(formals) %>%
    lapply(names)

  arg_unique=
    arg %>%
    unlist() %>%
    unique()

  # Check if at least one function has the argument 'n'
  is_arg_with_n=
    arg %>%
    sapply(\(x)'n'%in%x)

  if(sum(is_arg_with_n)==0){
    stop(
      paste0(
        '\n'
        ,'None of the functions in the list includes \'n\' as an argument.\n'
        ,'At least one function must include \'n\' as the only argument.'
      )
    )
  }

  # Check if a function has the argument 'n' then it must be the only one
  if(!all(sapply(arg[is_arg_with_n],\(x)length(x)==1))){
    stop(
      paste0(
        '\n'
        ,'If a function includes the argument \'n\', it must be the only\n'
        ,'argument in that function. Please modify the functions for these\n'
        ,'vertices, accordingly:\n'
        ,arg[is_arg_with_n][!sapply(arg[is_arg_with_n],\(x)length(x)==1)] %>%
          names() %>%
          paste0(collapse=', ')
      )
    )
  }

  # Check if arguments in any functions have their own functions, except 'n'
  arg_unique_except_n=
    arg_unique[arg_unique!='n']

  if(!all(arg_unique_except_n%in%names(func))){
    stop(
      paste0(
        '\n'
        ,'All arguments within any function must be defined by their\n'
        ,'respective functions, except the argument \'n\'. Please ensure\n'
        ,'these arguments comply with this rule:\n'
        ,arg_unique_except_n[!arg_unique_except_n%in%names(func)] %>%
          paste0(collapse=', ')
      )
    )
  }

  # Generate data from terminal vertices
  v_term=
    arg[sapply(arg,\(x)'n'%in%x & length(x)==1)] %>%
    names()

  v_term=
    v_term %>%
    `names<-`(as.character(v_term)) %>%
    lapply(\(x)func[[x]](n))

  # Check if each terminal vertex has a vector length equal to 'n'
  if(!all(sapply(v_term,length)==n)){
    stop(
      paste0(
        '\n'
        ,'The output lengths of these terminal vertex functions do not match\n'
        ,'the specified length \'n\':\n'
        ,v_term[sapply(v_term,length)!=n] %>%
          names() %>%
          paste0(collapse=', ')
      )
    )
  }

  # Create an empty list for data from non-terminal vertices
  v_nonterm=
    list()

  # List vertices of which data have been generated
  v_generated=
    v_term %>%
    names()

  # List vertices of which data have not been generated yet
  v_ungenerated=
    arg[!sapply(arg,\(x)'n'%in%x & length(x)==1)] %>%
    names()

  # Start from the first vertex among those with ungenerated data
  i=1

  # Loop until no vertex with ungenerated data
  while(length(v_ungenerated)>0){
    # Define a vertex for current loop
    j=v_ungenerated[[i]]

    # If all arguments are available among vertices with generated data,
    # then:
    if(all(arg[[j]]%in%v_generated)){
      # Generate data for the vertex
      v_nonterm[[j]]=
        v_term %>%
        c(v_nonterm)

      v_nonterm[[j]]=
        v_nonterm[[j]][arg[[j]]] %>%
        as.data.frame() %>%
        pmap(func[[j]]) %>%
        unlist()

      # Include the vertex in the list of vertices with generated data
      v_generated=
        v_generated %>%
        c(j)

      # Exclude the vertext out the list of vertices with ungenerated data
      v_ungenerated=
        v_ungenerated[v_ungenerated!=j]

      # Start from the first vertex among those with ungenerated data
      i=1
    }

    # otherwise, go to the next vertex among those with ungenerated data
    else{
      i=i+1
    }
  }

  # Check if each non-terminal vertex has a vector length equal to 'n'
  if(!all(sapply(v_nonterm,length)==n)){
    stop(
      paste0(
        '\n'
        ,'The output lengths of at least one non-terminal vertex function\n'
        ,'does not match the specified length \'n\':\n'
        ,v_nonterm[sapply(v_nonterm,length)!=n] %>%
          names() %>%
          paste0(collapse=', ')
      )
    )
  }

  # Concatenate data from terminal- and non-terminal vertices
  v=v_term %>%
    c(v_nonterm)

  # Return as a data frame
  v %>%
    as.data.frame()
}

Try the rcausim package in your browser

Any scripts or data that you put into this service are public.

rcausim documentation built on June 24, 2024, 5:06 p.m.