R/assess_performance.R
In eesim: Simulate and Evaluate Time Series for Environmental Epidemiology

Documented in beta_bias beta_var check_sims coverage_beta mean_beta power_beta power_calc

#' Average Estimated Coefficient
#'
#' This function gives the mean value of the estimated log relative risks (\eqn{\hat{\beta}}s)
#' and the mean of the estimated relative risk values over the \code{n} simulations.
#'
#' @param df A data frame of replicated simulations which must include a column
#'    titled "Estimate" with the effect estimate from the fitted model.
#'
#' @return A data frame with the mean estimated log relative risk and mean estimated
#'    relative risk. The mean estimated risk is based on first calculating the
#'    mean log relative risk and then exponentiating this mean value.
#'
#' @examples
#' sims <- create_sims(n_reps=10, n=50, central = 100, sd = 10,
#'             exposure_type="continuous", exposure_trend = "cos1",
#'             exposure_amp = .6, average_outcome = 22,
#'             outcome_trend = "no trend", outcome_amp = .6, rr = 1.01)
#' fits <- fit_mods(data = sims, custom_model = spline_mod,
#'                  custom_model_args = list(df_year = 1))
#' mean_beta(df=fits)
#'
#' @export
mean_beta <- function(df){
  if (!is.data.frame(df)){
    stop("Input must be a data frame")
  }
  beta_hat <- mean(df$Estimate)
  rr_hat <- mean(beta_hat)
  out <- data.frame(beta_hat, rr_hat)
  return(out)
}

#' Standard Deviation of Estimated Coefficients
#'
#' Measures the variance of the point estimates of the estimated log relative risk
#' (\eqn{\hat{beta}}{b}) over the \code{n_rep} simulations and the mean of
#' the variances of each \eqn{\hat{\beta}}{b}.
#'
#' @param df A data frame of replicated simulations which must include columns
#'    titled "Estimate" and "Std.Error".
#'
#' @return A data frame of the variance across all values of beta hat
#'    and the mean variance of the beta hats
#'
#' @examples
#' sims <- create_sims(n_reps = 10, n = 600, central = 100,
#'                     sd = 10, exposure_type = "continuous",
#'                     exposure_trend = "cos1",
#'                     exposure_amp = 0.6,
#'                     average_outcome = 20,
#'                     outcome_trend = "no trend",
#'                     rr = 1.01)
#' fits <- fit_mods(data = sims, custom_model = spline_mod,
#'                  custom_model_args  = list(df_year = 1))
#' beta_var(fits)
#'
#' @export
beta_var <- function(df){
  var_across_betas <- stats::var(df$Estimate)
  mean_beta_var <- mean(df$Std.Error^2)
  out <- data.frame(var_across_betas, mean_beta_var)
  return(out)
}

#' Percent Bias of Estimated Coefficient
#'
#' This function returns the relative bias of the mean of the estimated coefficients.
#'
#'
#' @details This function estimates the percent bias in the estimated log relative risk
#'    (\eqn{b}) as:
#' \deqn{100 * \frac{\beta - \hat{\beta}}{\beta}}{100 ((\beta - b) / \beta)}
#' where \eqn{\hat{\beta}}{b} is the mean of the estimated log relative risk values from all
#' simulations and \eqn{\beta} is the true log relative risk used to simulate the data.
#'
#' @inheritParams mean_beta
#' @param true_rr The true relative risk used to simulate the data.
#'
#' @return A data frame with a single value: the percent bias of the mean of the estimated
#'    coefficients over \code{n_reps} simulations.
#'
#' @examples
#' sims <- create_sims(n_reps = 10, n = 600, central = 100,
#'                     sd = 10, exposure_type = "continuous",
#'                     exposure_trend = "cos1",
#'                     exposure_amp = 0.6,
#'                     average_outcome = 20,
#'                     outcome_trend = "no trend",
#'                     rr = 1.01)
#' fits <- fit_mods(data = sims, custom_model = spline_mod,
#'                  custom_model_args = list(df_year = 1))
#' beta_bias(fits, true_rr = 1.02)
#'
#' @export
beta_bias <- function(df, true_rr){
  percent_bias <- 100 * (log(true_rr) - mean(df$Estimate)) / log(true_rr)
  out <- data.frame(percent_bias)
  return(out)
}

#' Empirical coverage of confidence intervals
#'
#' Calculates the percent of simulations in which the estimated 95\% confidence
#' interval for the log relative risk includes the true value of the log
#' relative risk.
#'
#' @param df A data frame of replicated simulations which must include columns
#'    titled \code{lower_ci} and \code{upper_ci}.
#' @param true_rr The true relative risk used to simulate the data.
#'
#' @return A data frame with the percent of confidence intervals for the
#'    estimated log relative risk over \code{n_reps} simulations which include
#'    the true log relative risk.
#'
#' @examples
#' sims <- create_sims(n_reps = 10, n = 600, central = 100,
#'                     sd = 10, exposure_type = "continuous",
#'                     exposure_trend = "cos1", exposure_slope=1,
#'                     exposure_amp = 0.6,
#'                     average_outcome = 20,
#'                     outcome_trend = "no trend",
#'                     rr = 1.01)
#' fits <- fit_mods(data = sims, custom_model = spline_mod,
#'                  custom_model_args = list(df_year = 1))
#' coverage_beta(df=fits, true_rr = 1.02)
#'
#' @export
coverage_beta <- function(df, true_rr){
  true_beta <- log(true_rr)
  coverage <- df$lower_ci <= true_beta & df$upper_ci >= true_beta
  out <- data.frame(coverage = sum(coverage) / nrow(df))
  return(out)
}

#' Estimate power
#'
#' Calculates the estimated power of a hypothesis test that the log relative risk
#' equals 0 at a 5\% significance level across all simulated data.
#'
#' @inheritParams coverage_beta
#'
#' @return A data frame with one row with the estimated power of the analysis
#'    at the 5\% significance level.
#'
#' @examples
#' sims <- create_sims(n_reps = 10, n = 600, central = 100,
#'                     sd = 10, exposure_type = "continuous",
#'                     exposure_trend = "cos1",
#'                     exposure_amp = 0.6,
#'                     average_outcome = 20,
#'                     outcome_trend = "no trend",
#'                     rr = 1.01)
#' fits <- fit_mods(data = sims, custom_model = spline_mod,
#'                  custom_model_args = list(df_year = 1))
#' power_beta(fits)
#'
#' @export
power_beta <- function(df){
  no_zero <- df$lower_ci >= 0 | df$upper_ci <= 0
  out <- data.frame(power = sum(no_zero) / nrow(df))
  return(out)
}

#' Assess model performance
#'
#' Calculates several measures of model performance, based on results of fitting
#' a model to all simulated datasets.
#'
#' @inheritParams beta_bias
#'
#' @return A dataframe with one row with model assessment across all simulations.
#'   Includes values for:
#'   \itemize{
#'     \item{\code{beta_hat}: Mean of the estimated log relative risk across all simulations.}
#'     \item{\code{rr_hat}: Mean value of the estimated relative risk across all simulations.}
#'     \item{\code{var_across_betas}: Variance of the estimated log relative risk across all
#'           simulations}
#'     \item{\code{mean_beta_var}: The mean of the estimated variances of the estimated log
#'           relative risks across all simulations.}
#'     \item{\code{percent_bias}: The relative bias of the estimated log relative risks compared
#'           to the true log relative risk.}
#'     \item{\code{coverage}: Percent of simulations for which the estimated 95\% confidence
#'           interval for log relative risk includes the true log relative risk.}
#'     \item{\code{power}: Percent of simulations for which the null hypothesis that the log
#'           relative risk equals zero is rejected based on a p-value of 0.05.}
#'   }
#'
#' @seealso The following functions are used to calculate these measurements:
#'    \code{\link{beta_bias}}, \code{\link{beta_var}}, \code{\link{coverage_beta}},
#'    \code{\link{mean_beta}}, \code{\link{power_beta}}
#'
#' @examples
#' sims <- create_sims(n_reps = 100, n = 1000, central = 100,
#'                     sd = 10, exposure_type = "continuous",
#'                     exposure_trend = "cos1",
#'                     exposure_amp = 0.6,
#'                     average_outcome = 20,
#'                     outcome_trend = "no trend",
#'                     rr = 1.02)
#' fits <- fit_mods(data = sims, custom_model = spline_mod,
#'                  custom_model_args = list(df_year = 1))
#' check_sims(df = fits, true_rr = 1.02)
#'
#' @export
#'
check_sims <- function(df, true_rr){
  a <- mean_beta(df)
  b <- beta_var(df)
  c <- beta_bias(df, true_rr = true_rr)
  d <- coverage_beta(df, true_rr = true_rr)
  e <- power_beta(df)

  out <- cbind(a, b, c, d, e)
  return(out)
}

#' Power Calculations
#'
#' Calculates the expected power of an environmental epidemiology time series analysis based
#' on simulated datasets. This function uses the simulation provided by \code{eesim} to
#' simulate multiple environmental epidemiology datasets under different scenarios (e.g.,
#' total days in study, size of association between exposure and outcome, or baseline
#' average daily count of the outcome in the study) and estimates the power of a specified
#' analysis to detect the hypothesized association.
#'
#' @param varying A character string specifying the parameter to be varied.  Choices are
#'    \code{'n'} (which varies the number of days in each dataset of simulated data),
#'    \code{'rr'} (which varies the relative rate per unit increase in exposure that is used
#'    to simulate the data), or \code{'average_outcome'} (which varies the average value
#'    of the outcomes in each dataset). For whichever of these three values is not set to vary in this
#'    argument, the user must specify a constant value to this function through the
#'    \code{n}, \code{rr}, or \code{average_outcome} arguments.
#' @param values A numeric vector with the values you would like to test for the varying
#'    parameters. For example, \code{values = c(1.05, 1.10, 1.15)} would produce power
#'    estimates for the four specified values of relative risk if the user has specified
#'    \code{varying = 'rr'}.
#' @param plot "TRUE" or "FALSE" for whether to produce a plot
#' @inheritParams power_beta
#' @inheritParams create_sims
#' @inheritParams fit_mods
#'
#' @return Data frame with the values of the varying parameter and the estimated power
#' for each. If the \code{plot} argument is set to \code{TRUE}, it also returns a power
#' curve plot as a side effect. Because these estimates are based on simulations, there
#' will be some random variation in estimates of power. Estimates will be more stable
#' if a higher value is used for \code{n_reps}, although this will increase the time it
#' takes the function to run.
#'
#' @examples
#'
#' # Calculate power for studies that vary in the total length of the study period
#' # (between one and twenty-one years of data) for the association between a continuous
#' # exposure with a seasonal trend (mean = 100, sd from seasonal baseline = 10) and a count
#' # outcome (e.g., daily number of deaths, mean daily value across the study period of 22).
#' # The alternative hypothesis is that there is a relative rate of the outcome of 1.001 for
#' # every one-unit increase in exposure. The null hypothesis is that there is no association
#' # between the exposure and the outcome. The model used to test for an association is a
#' # case-crossover model
#' \dontrun{
#' pow <- power_calc(varying = "n", values = floor(365.25 * seq(1, 21, by = 5)), n_reps = 20,
#'            central = 100, sd = 10, rr = 1.001, exposure_type = "continuous",
#'            exposure_trend = "cos1", exposure_amp = .6, average_outcome = 22,
#'            outcome_trend = "no trend", outcome_amp = .6,
#'            custom_model = spline_mod, plot = TRUE)
#'}
#'
#' @export
power_calc <- function(varying, values, n_reps, custom_model, central, exposure_type,
                       n = NULL, sd = NULL, exposure_trend = "no trend",
                       exposure_amp = NULL, average_outcome = NULL,
                       outcome_trend = "no trend", outcome_amp = NULL, rr = NULL,
                       start.date = "2000-01-01",
                       cust_exp_func = NULL, cust_exp_args = NULL,
                       cust_base_func = NULL, cust_lambda_func = NULL,
                       cust_base_args = NULL, cust_lambda_args = NULL,
                       custom_model_args = NULL, plot = FALSE){
  msg <- paste("This function may take a minute or two to run, especially with lots of",
               "replications (`n_reps`) or options for `values`.")
  msg <- paste(strwrap(msg), collapse = "\n")
  message(msg)

  if(varying == "n"){
    rep_df <- values %>% purrr::map(create_sims, n_reps = n_reps, central = central, sd = sd,
                                    exposure_type = exposure_type,
                                    exposure_trend = exposure_trend,exposure_amp = exposure_amp,
                                    average_outcome = average_outcome, outcome_trend = outcome_trend,
                                    outcome_amp = outcome_amp, rr = rr, start.date = start.date)
    fits <- rep_df %>% purrr::map(fit_mods, custom_model = custom_model,
                                  custom_model_args = custom_model_args)
    power <- fits %>% purrr::map(power_beta) #makes a list, want to extract the values of power and put in a data frame with values of n.
  }
  else if(varying == "rr"){
    rep_df <- values %>% purrr::map(create_sims, n = n, n_reps = n_reps, central = central, sd = sd,                         exposure_type = exposure_type,
                                    exposure_trend = exposure_trend, exposure_amp = exposure_amp,
                                    average_outcome = average_outcome, outcome_trend = outcome_trend,
                                    outcome_amp = outcome_amp, start.date = start.date)
    fits <- rep_df %>% purrr::map(fit_mods, custom_model = custom_model,
                                  custom_model_args = custom_model_args)
    power <- fits %>% purrr::map(power_beta) #makes a list, want to extract the values of power and put in a data frame with values of rr.
  }
  else if(varying == "average_outcome"){
    rep_df <- values %>% purrr::map(create_sims, n = n, n_reps = n_reps,
                                    central = central, sd = sd, exposure_type = exposure_type,
                                    exposure_trend = exposure_trend,
                                    exposure_amp = exposure_amp, outcome_trend = outcome_trend,
                                    outcome_amp = outcome_amp, rr = rr, start.date = start.date)
    fits <- rep_df %>% purrr::map(fit_mods, custom_model = custom_model,
                                  custom_model_args = custom_model_args)
    power <- fits %>% purrr::map(power_beta)
  }

  powervec <- rep(0, length(power))
  for (i in 1:length(power)){
    powervec[i] <- power[[i]][ , 1]
  }

  dat <- data.frame(values = values, power = powervec)

  if(plot == TRUE){
    my_plot <- ggplot2::ggplot(dat, ggplot2::aes_(x = ~ dat$values, y = ~ dat$power)) +
      ggplot2::geom_line() + ggplot2::theme_minimal() +
      ggplot2::xlab(varying) + ggplot2::ylab("power") +
      ggplot2::ylim(0, 1)
    print(my_plot)
  }

  return(dat)
}
Any scripts or data that you put into this service are public.
eesim documentation built on May 2, 2019, 7:30 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
eesim
Simulate and Evaluate Time Series for Environmental Epidemiology

R/assess_performance.R
In eesim: Simulate and Evaluate Time Series for Environmental Epidemiology

Defines functions mean_beta beta_var beta_bias coverage_beta power_beta check_sims power_calc

Documented in beta_bias beta_var check_sims coverage_beta mean_beta power_beta power_calc

Try the eesim package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

eesim Simulate and Evaluate Time Series for Environmental Epidemiology

R/assess_performance.R In eesim: Simulate and Evaluate Time Series for Environmental Epidemiology

Defines functions mean_beta beta_var beta_bias coverage_beta power_beta check_sims power_calc

Documented in beta_bias beta_var check_sims coverage_beta mean_beta power_beta power_calc

Try the eesim package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

eesim
Simulate and Evaluate Time Series for Environmental Epidemiology

R/assess_performance.R
In eesim: Simulate and Evaluate Time Series for Environmental Epidemiology