R/mc.R

Defines functions mc_data gen_y pred_dt

Documented in gen_y mc_data pred_dt

#' Generate Monte Carlo Samples
#'
#' @description
#' This function generates \code{\link[data.table]{data.table}} which consists of Monte Carlo sets.
#' @param rand Random sample generator function with parameter n
#' @param N Sample size in each MC sample
#' @param M Number of MC samples
#' @param mcname column name of the MC sample. By default, \code{"mc"}.
#' @param xname column name of the data. By default, \code{"x"}.
#' @param char Indicator for MC sample, "s" by default. Distinguished by suffix number.
#' @param ... additional arguments for \code{rand}
#' @return
#' \code{data.table} with two columns. \code{x} is generated samples and \code{mc} indicates the MC sample.
#' @details
#' This function prepares for \code{data.table} group operation.
#' By grouping \code{mc} column, MC simulation can be easily done.
#' @export
mc_data <- function(rand, N = 100, M = 1000, mcname = "mc", xname = "x", char = "s", ...) {
  mc <- data.table()
  mc %>%
    .[,
      (mcname) := gl(M, k = N, labels = paste0(char, 1:M))] %>%
    .[,
      (xname) := rand(n = N, ...),
      by = mcname]
}

#' Generate Response
#'
#' @description
#' This function generates the response variable using true model assumed.
#' @param data MC data set generated by \code{\link{mc_data}}.
#' @param fit True model function with \code{xname} argument.
#' @param mcname column name of the MC sample. By default, \code{"mc"}.
#' @param xname column name of the data. By default, \code{"x"}.
#' @param yname column name of the response. By default, \code{"y"}.
#' @param fitname column name of the true fit. By default, \code{"fx"}.
#' @param rand Random sample generator function for error term. By default, \link[stats]{rnorm}
#' @param fit_col Add the true function value column? \code{TRUE} by default.
#' @param ... additional arguments for \code{rand}
#' @return
#' Add two columns named \code{fx}, a true function value and \code{y}, error added.
#' @export
gen_y <- function(data, fit, rand, mcname = "mc", xname = "x", yname = "y", fitname = "fx", fit_col = TRUE, ...) {
  mcname <- sym(mcname)
  xname <- sym(xname)
  fit_sym <- sym(fitname)
  data <-
    data %>%
    data.table() %>%
    .[,
      (fitname) := fit(eval(xname))] %>%
    .[,
      (yname) := eval(fit_sym) + rand(.N, ...),
      by = eval(mcname)] %>%
    .[]
  if (fit_col) {
    data
  } else {
    data[, (fitname) := NULL][]
  }
}

#' Train and Predict a Model in each MC Sample
#'
#' @description
#' This function produces fitted values in each MC sample.
#' @param data MC data set form of \code{\link{mc_data}}.
#' @param mcname column name of the MC sample. By default, \code{"mc"}.
#' @param mod Model function.
#' @param formula an object of class \link[stats]{formula}.
#' @param pred_name column name of the predicted values. By default, \code{"pred"}.
#' @param ... Additional arguments for \code{mod}
#' @return
#' The function adds a column named \code{pred} of which values are the fitted values from \code{mod}
#' @details
#' \code{mod} should have \code{formula} argument yet.
#' It uses predict.mod() function. Some class has an option called \code{type}. For example, \link[stats]{predict.glm}.
#' This function uses \code{type = "response"}.
#' Some model functions require \code{x} and \code{y}, so I will add that feature.
#' @importFrom rlang enquo
#' @importFrom stats predict
#' @export
pred_dt <- function(data, mcname = "mc", mod, formula, pred_name = "pred", ...) {
  formul <- enquo(formula)
  mcname <- sym(mcname)
  data %>%
    data.table() %>%
    .[,
      (pred_name) :=
        mod(formula = formula, data = .SD, ...) %>%
        predict(type = "response"),
      by = eval(mcname)] %>%
    .[]
}
ygeunkim/youngtool documentation built on Dec. 14, 2019, 7:42 p.m.