R/fastpos.R
In fastpos: Finds the Critical Sequential Point of Stability for a Pearson Correlation

Documented in create_pop find_critical_pos

#' @useDynLib fastpos
#' @importFrom Rcpp sourceCpp
NULL

#' Stops execution without giving error
#'
#' This is useful to have a consistent behavior when the user interrupts
#' function execution but this interruption is not catched by C++. If this
#' happens nothing is returned. But if C++ catches the interrupt, we need to
#' stop execution ourselves (and also return nothing).
#' @noRd
stop_quietly <- function() {
  blank_msg <- sprintf("\r%s\r", paste(rep("", getOption("width") - 1L),
                                      collapse =  " "))
  stop(simpleError(blank_msg))
}

#' Creates a population with a specified correlation.
#'
#' @param rho Population correlation.
#' @param size Population size.
#' @return Two-dimensional population matrix with a specific correlation.
#' @examples
#' pop <- create_pop(0.5, 1e5)
#' cor(pop)
#' @noRd
#' @importFrom MASS mvrnorm
create_pop_inexact <- function(rho, size) {
  mu <- c(1, 2)
  s1 <- 2
  s2 <- 8
  sigma <- matrix(c(s1^2, s1 * s2 * rho, s1 * s2 * rho, s2^2), 2)
  pop <- MASS::mvrnorm(n = size, mu = mu, Sigma = sigma)
  pop
}

#' Creates a population with a specified correlation.
#'
#' The correlation will be exactly the one specified. The used method is
#' described here:
#' https://stats.stackexchange.com/questions/15011/generate-a-random-variable-with-a-defined-correlation-to-an-existing-variables/15040#15040
#'
#' @param rho Population correlation.
#' @param size Population size.
#' @return Two-dimensional population matrix with a specific correlation.
#' @examples
#' pop <- create_pop(rho = 0.5, size = 1e6)
#' cor(pop)
#' @export
#' @importFrom stats residuals sd rnorm lm.fit
create_pop <- function(rho, size) {
  y <- stats::rnorm(size)
  x <- stats::rnorm(size)
  y_perp <- stats::residuals(stats::lm.fit(cbind(1, y), x))
  x <- rho * stats::sd(y_perp) * y + y_perp * stats::sd(y) * sqrt(1 - rho^2)
  matrix(c(x, y), ncol = 2)
}

#' Run simulation for one specific correlation.
#'
#' @param rho Population correlation.
#' @param lower_limit Lower limit of corridor, overrides precision parameter
#' @param upper_limit Upper limit of corridor, overrides precision parameter
#' @inheritParams find_critical_pos
#' @return A list with two elements, (1) a data frame called "summary"
#'   containing all the above information as well as the critical sample sizes
#'   (points of stability) for the confidence-levels specified and (2) vector
#'   "n" with the sample size from each study (e.g. for plotting the
#'   distribution)
#' @examples
#' find_one_critical_pos(rho = 0.5)
#' @noRd
#' @importFrom stats cor quantile
#' @importFrom pbmcapply pbmclapply
#' @importFrom tibble lst
find_one_critical_pos <- function(rho, sample_size_min = 20,
                                  sample_size_max = 1e3,
                                  replace = TRUE, n_studies = 1e3,
                                  pop_size = 1e6,
                                  precision_absolute = .1,
                                  precision_relative = NA,
                                  confidence_levels = c(.8, .9, .95),
                                  n_cores = 1,
                                  lower_limit = NA,
                                  upper_limit = NA,
                                  progress = show_progress()) {

  # create corridor of stability
  corridor_function <- choose_corridor_function(
    !is.na(precision_absolute), !is.na(precision_relative),
    !is.na(lower_limit), !is.na(upper_limit)
  )
  corridor_values <- corridor_function(rho, precision_absolute,
                                       precision_relative,
                                       lower_limit, upper_limit)

  lower_limit <- corridor_values$lower_limit
  upper_limit <- corridor_values$upper_limit
  precision_absolute <- corridor_values$precision_absolute
  precision_relative <- corridor_values$precision_relative

  # create bivariate population distribution
  pop <- create_pop(rho, pop_size)

  x <- pop[, 1]
  y <- pop[, 2]
  rho_pop <- stats::cor(x, y)

  # create dist of pos
  n_studies_per_core <- ceiling(n_studies / n_cores)
  if (n_cores > 1) {
    res <- unlist(pbmcapply::pbmclapply(1:n_cores, function(k)
      simulate_pos(x, y, n_studies_per_core, sample_size_min,
                   sample_size_max, replace,
                   lower_limit, upper_limit, progress = FALSE),
      mc.cores = n_cores,
      mc.set.seed = FALSE
    ))
  } else {
    res <- simulate_pos(x, y, n_studies, sample_size_min, sample_size_max, replace,
                        lower_limit, upper_limit, progress)
  }

  # on interruption, C++ will return -1 (if R interrupts by itself, nothing
  # will be returned, it just stops)
  if (length(res) == 1) {
    if (res == -1) stop_quietly()
  }
  names(res) <- unlist(paste("study ", 1:length(res)))
  n_not_breached <- sum(is.na(res))

  # calc critical pos
  # use max sample size for those studies where the corridor was not breached
  thequantiles <- stats::quantile(c(res, rep(sample_size_max, n_not_breached)),
                                    confidence_levels, na.rm = TRUE)
  return(list(summary = unlist(tibble::lst(
    rho_pop,
    pos = unlist(thequantiles),
    sample_size_min,
    sample_size_max,
    lower_limit,
    upper_limit,
    n_studies = length(res),
    n_not_breached,
    precision_absolute,
    precision_relative)),
    n = res
  ))
}

#' Find the critical point of stability
#'
#' Run simulations for one or several population correlations and return the
#' critical points of stability (POS). The critical point of stability is the
#' sample size at which a certain percentage of studies will fall into an a
#' priori specified interval and stay in this interval if the sample size is
#' increased further.
#'
#' @param rho Vector of population correlations (can also be a single correlation).
#' @param precision_absolute Precision around the correlation which is acceptable
#'   (defaults to 0.1). The precision will determine the corridor of stability which is
#'   just rho+-precision. Can be a single value or a vector (different values for
#'   different rhos).
#' @param confidence_levels Confidence levels for point of stability. This corresponds
#'   to the quantile of the distribution of all found critical sample sizes (defaults
#'   to c(.8, .9, .95)). A single value can also be used. Note that this value is fixed
#'   for all rhos! You cannot specify different levels for different rhos.
#' @param sample_size_min Minimum sample size for each study (defaults to 20). A vector
#'   can be used (different values for different rhos).
#' @param sample_size_max Maximum sample size for each study (defaults to 1e3). A
#'   vector can be used (different values for different rhos). If you get a warning
#'   that the corridor of stability was not reached, you should increase this value.
#'   But note that this will increase the time for the simulation.
#' @param n_studies Number of studies to run for each rho (defaults to 1e4). A vector
#'   can be used (different values for different rhos).
#' @param n_cores Number of cores to use for simulation. Defaults to 1. Under
#'   Windows only 1 core is supported because forking is used.
#' @param pop_size Population size (defaults to 1e6). This is the size of the
#'   population from which value pairs for correlations are drawn. This value should
#'   usually not be decreased as it can lead to less accurate results.
#' @param replace Whether drawing samples is with replacement or not. Default is TRUE,
#'   which usually should not be changed. This parameter is mainly of interest for
#'   researchers studying the method in more detail. A vector can be used (different
#'   values for different rhos).
#' @param precision_relative Relative precision around the correlation
#'   (rho+-rho*precision), if set, it will overwrite precision_absolute. A vector can
#'   be used (different values for different rhos).
#' @param lower_limit Lower limit of corridor, overrides precision parameters. A vector
#'   can be used (different values for different rhos). If used, upper_limit must also
#'   be set.
#' @param upper_limit Upper limit of corridor, overrides precision parameters. A vector
#'   can be used (different values for different rhos). If used, lower_limit must also
#'   be set.
#' @param progress Should progress bar be displayed? Logical, default is to show
#'   progress when run in interactive mode.
#' @param precision `r lifecycle::badge("deprecated")`, use precision_absolute instead
#' @param precision_rel `r lifecycle::badge("deprecated")`, use precision_relative
#'   instead
#' @param rhos `r lifecycle::badge("deprecated")`, use rho instead
#' @return A data frame containing all the above information, as well as the critical
#'   points of stability.
#'
#' The critical points of stability follow directly after the first column (rho)
#' and are named pos.confidence-level, e.g. pos.80, pos.90, pos.95 for the
#' default confidence levels.
#'
#' @examples
#' find_critical_pos(rho = 0.5, n_studies = 1e3)
#' find_critical_pos(rho = c(0.4, 0.5), n_studies = 1e3)
#' @export
#' @importFrom lifecycle deprecated is_present deprecate_warn badge
#' @importFrom plyr ldply
find_critical_pos <- function(rho,
                              precision_absolute = 0.1,
                              confidence_levels = c(.8, .9, .95),
                              sample_size_min = 20,
                              sample_size_max = 1e3,
                              n_studies = 1e4,
                              n_cores = 1,
                              pop_size = 1e6,
                              replace = TRUE,
                              precision_relative = NA,
                              lower_limit = NA,
                              upper_limit = NA,
                              progress = show_progress(),
                              precision = lifecycle::deprecated(),
                              precision_rel = lifecycle::deprecated(),
                              rhos = lifecycle::deprecated()) {
  if (.Platform$OS.type == "windows" & n_cores > 1) {
    n_cores <- 1
    warnings("On Windows only one core can be used. Sorry.")
  }
  if (lifecycle::is_present(precision)) {
    lifecycle::deprecate_warn(
      when = "0.6.0",
      what = "find_critical_pos(precision)",
      details = "find_critical_pos(precision_absolute)"
    )
    precision_absolute <- precision
  }

  if (lifecycle::is_present(rhos)) {
    lifecycle::deprecate_warn(
      when = "0.6.0",
      what = "find_critical_pos(rhos)",
      details = "find_critical_pos(rho)"
    )
    rho <- rhos
  }

  if (lifecycle::is_present(precision_rel)) {
    lifecycle::deprecate_warn(
      when = "0.6.0",
      what = "find_critical_pos(precision_rel)",
      details = "Use precision_relative instead. Note that precision_relative takes a numeric value, not a logical!"
    )
    precision_relative <- precision_absolute
  }

  result <- mapply(find_one_critical_pos,
                   rho = rho,
                   sample_size_max = sample_size_max,
                   sample_size_min = sample_size_min,
                   n_studies = n_studies,
                   precision_absolute = precision_absolute,
                   precision_relative = precision_relative,
                   lower_limit = lower_limit,
                   upper_limit = upper_limit,
                   MoreArgs = list(confidence_levels = confidence_levels,
                                   n_cores = n_cores,
                                   pop_size = pop_size,
                                   progress = progress),
                   SIMPLIFY = FALSE)
  summary <- lapply(result, function(x) x[[1]])
  summary <- plyr::ldply(summary)
  sum_n_not_breached <- sum(summary$n_not_breached)
  if (sum_n_not_breached > 0) {
    warning("\n", sum_n_not_breached,
            " simulation[s] did not reach the corridor of stability",
            ".\nIncrease sample_size_max and rerun the simulation.",
            sep = "")
  }
  summary
}

.onUnload <- function(libpath) {
  library.dynam.unload("fastpos", libpath)
}

#' @importFrom tibble lst
create_corridor_absolute <- function(rho, precision_absolute,
                                     precision_relative,
                                     lower_limit,
                                     upper_limit) {
  limits <- rho + c(-1, 1) * precision_absolute
  precision_relative <- NA
  lower_limit <- limits[1]
  upper_limit <- limits[2]
  return(tibble::lst(lower_limit, upper_limit, precision_absolute,
                     precision_relative))
}

#' @importFrom tibble lst
create_corridor_relative <- function(rho, precision_absolute,
                                     precision_relative, lower_limit,
                                     upper_limit) {
  limits <- rho * (1 + c(-1, 1) * precision_relative)
  precision_absolute <- NA
  lower_limit <- limits[1]
  upper_limit <- limits[2]
  return(tibble::lst(lower_limit, upper_limit, precision_absolute,
                     precision_relative))
}

#' @importFrom tibble lst
create_corridor_manual <- function(rho, precision_absolute,
                                   precision_relative, lower_limit,
                                   upper_limit) {
  precision_absolute <- NA
  precision_relative <- NA
  return(tibble::lst(lower_limit, upper_limit, precision_absolute,
                     precision_relative))
}

#' @importFrom tibble lst
create_corridor_error <- function() {
  stop("Your corridor parameters are not correct. Either specify precision_absolute, precision_relative, or lower_limit AND upper_limit")
}

choose_corridor_function <- function(precision_absolute,
                                     precision_relative,
                                     lower_limit, upper_limit) {
  args <- as.numeric(c(precision_absolute, precision_relative, lower_limit,
                       upper_limit))
  df <- expand.grid(0:1, 0:1, 0:1, 0:1)
  cases <- apply(df, 1, paste0, collapse = "")
  case <- which(cases == paste(args, collapse = ""))

  f <- rep_f(length(cases), create_corridor_error)
  f[13:16] <- rep_f(4, create_corridor_manual)
  f[[2]] <- create_corridor_absolute
  f[3:4] <- rep_f(2, create_corridor_relative)
  f[[case]]
}

rep_f <- function(n, f) {
  lapply(1:n, function(x) f)
}

show_progress <- function() {
  # it is also possible to use isatty(stdout) and Sys.getenv("RSTUDIO"), which is 1
  # when RSTUDIO is used
  progress <- interactive()
  return(progress)
}

Any scripts or data that you put into this service are public.

fastpos documentation built on Aug. 16, 2022, 1:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

fastpos
Finds the Critical Sequential Point of Stability for a Pearson Correlation

R/fastpos.R
In fastpos: Finds the Critical Sequential Point of Stability for a Pearson Correlation

Defines functions show_progress rep_f choose_corridor_function create_corridor_error create_corridor_manual create_corridor_relative create_corridor_absolute .onUnload find_critical_pos find_one_critical_pos create_pop create_pop_inexact stop_quietly

Documented in create_pop find_critical_pos

Try the fastpos package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastpos Finds the Critical Sequential Point of Stability for a Pearson Correlation

R/fastpos.R In fastpos: Finds the Critical Sequential Point of Stability for a Pearson Correlation

Defines functions show_progress rep_f choose_corridor_function create_corridor_error create_corridor_manual create_corridor_relative create_corridor_absolute .onUnload find_critical_pos find_one_critical_pos create_pop create_pop_inexact stop_quietly

Documented in create_pop find_critical_pos

Try the fastpos package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastpos
Finds the Critical Sequential Point of Stability for a Pearson Correlation

R/fastpos.R
In fastpos: Finds the Critical Sequential Point of Stability for a Pearson Correlation