R/utils.R

Defines functions is_congruent inform_seed find_proglang vec2mat to_integer get_seed check_set_nthreads getBoottest_nthreads setBoottest_nthreads set_engine getBoottest_engine setBoottest_engine create_data

Documented in find_proglang setBoottest_engine

#' Simulate Data
#'
#' Function simulates data for tests and examples with clustering variables
#' and fixed-effects.
#'
#' @param N number of observations
#' @param N_G1 A scalar. number of clusters for clustering variable 1
#' @param icc1 A scalar between 0 and 1. intra-cluster correlation for
#'  clustering variable 1
#' @param N_G2 A scalar. number of clusters for clustering variable 2
#' @param icc2 A scalar between 0 and 1. intra-cluster correlation for
#'  clustering variable 2
#' @param numb_fe1 A scalar. Number of fixed effect for first factor variable
#' @param numb_fe2 A scalar. Number of fixed effect for second factor variable
#' @param seed An integer. Set the random seed
#' @param weights Possible regression weights to be used in estimation
#' @return A simulated `data.frame` with specified numbers of clusters,
#'         intra-cluster correlations and dimensionality of fixed effects.
#' @examples
#' df <- create_data(
#'   N = 1000,
#'   N_G1 = 1000,
#'   icc1 = 0.5,
#'   N_G2 = 10,
#'   icc2 = 0.01,
#'   numb_fe1 = 10,
#'   numb_fe2 = 10,
#'   seed = 2293
#' )
#' @noRd

# This function very closely mirrors an example from the fabricatr package
# website, which can be found under the following link:
# https://declaredesign.org/r/fabricatr/articles/getting_started.html
# https://declaredesign.org/r/fabricatr/articles/getting_started.html
# the fabricatr package can be downloaded from CRAN and is published under
# MIT license.
# Graeme Blair [aut, cre], Jasper Cooper [aut], Alexander Coppock [aut],
# Macartan Humphreys [aut], Aaron Rudkin [aut], Neal Fultz [aut]
# are the authors of the fabricatr package

create_data <-
  function(N,
           N_G1,
           icc1,
           N_G2,
           icc2,
           numb_fe1,
           numb_fe2,
           seed,
           weights) {
    set.seed(seed)
    voters <-
      fabricatr::fabricate(
        N,
        group_id1 = sample(1:N_G1, N, replace = TRUE),
        group_id2 = sample(1:N_G2, N, replace = TRUE),
        ideology1 = fabricatr::draw_normal_icc(
          mean = 0,
          N = N,
          clusters = group_id1,
          ICC = icc1
        ),
        ideology2 = fabricatr::draw_normal_icc(
          mean = 0,
          N = N,
          clusters = group_id2,
          ICC = icc2
        ),
        ideological_label = fabricatr::draw_ordered(
          x = ideology1,
          break_labels = c(
            "Very Conservative",
            "Conservative",
            "Liberal",
            "Very Liberal"
          )
        ),
        income = exp(rlnorm(
          n = N,
          meanlog = 2.4 - (ideology1 * 0.1),
          sdlog = 0.12
        )),
        # Q1_immigration_latent = rnorm(N),
        # Q1_immigration = ifelse(Q1_immigration_latent > 0.5, 1,
        #  ifelse(Q1_immigration_latent <= 0.5 &
        # Q1_immigration_latent > 0, 2, 3)
        # ),
        # Q2_defense_latent = rnorm(N, 0, 3),
        # Q2_defense = ifelse(Q2_defense_latent > 0.5, 1,
        #  ifelse(Q2_defense_latent <= 0.5 & Q2_defense_latent > 0, 2, 3)
        # ),
        Q1_immigration = sample(1:numb_fe1, N, TRUE),
        Q2_defense = sample(1:numb_fe2, N, TRUE),
        treatment = fabricatr::draw_binary(0.5, N = N),
        proposition_vote = fabricatr::draw_binary(
          latent = ideology1 + ideology2 + 0.2 * treatment +
            2 * Q1_immigration + rnorm(N, 0, 3),
          link = "probit"
        ),
        state = rep(1:(N / 20), 20),
        year = sort(rep(1:20, N / 20))
      )

    #' @srrstats {G2.4d} *explicit conversion to factor via `as.factor()`* Yes.
    voters$Q1_immigration <- as.factor(voters$Q1_immigration)
    voters$Q2_defense <- as.factor(voters$Q2_defense)


    voters$log_income <- log(voters$income)
    voters$Q1_immigration <- as.factor(voters$Q1_immigration)

    # add weights
    voters$weights <- sample(1:10, N, replace = TRUE) / 10

    voters
  }


# gtools_permutations <-
#   function(n,
#            r,
#            v = 1:n,
#            set = TRUE,
#            repeats.allowed = FALSE) {
#     #' copied from the permutations package as long as it is orphaned on CRAN
#     #' @param n size of source vector
#     #' @param r size of target vector
#     #' @param v source vector. defaults to 1:n
#     #' @param set logical flag for duplicates
#     #' @param repeats.allowed logical flag
#     #' @noRd
#
#     if (mode(n) != "numeric" || length(n) != 1 || n < 1 ||
#         (n %% 1) != 0) {
#       stop("bad value of n")
#     }
#     if (mode(r) != "numeric" || length(r) != 1 || r < 1 ||
#         (r %% 1) != 0) {
#       stop("bad value of r")
#     }
#     if (!is.atomic(v) || length(v) < n) {
#       stop("v is either non-atomic or too short")
#     }
#     if ((r > n) & repeats.allowed == FALSE) {
#       stop("r > n and repeats.allowed=FALSE")
#     }
#     if (set) {
#       v <- unique(sort(v))
#       if (length(v) < n) {
#         stop("too few different elements")
#       }
#     }
#     v0 <- vector(mode(v), 0)
#     if (repeats.allowed) {
#       sub <- function(n, r, v) {
#         if (r == 1) {
#           matrix(v, n, 1)
#         } else if (n == 1) {
#           matrix(v, 1, r)
#         } else {
#           inner <- Recall(n, r - 1, v)
#           cbind(
#             rep(v, rep(nrow(inner), n)),
#             matrix(
#               t(inner),
#               ncol = ncol(inner),
#               nrow = nrow(inner) * n,
#               byrow = TRUE
#             )
#           )
#         }
#       }
#     } else {
#       sub <- function(n, r, v) {
#         if (r == 1) {
#           matrix(v, n, 1)
#         } else if (n == 1) {
#           matrix(v, 1, r)
#         } else {
#           X <- NULL
#           for (i in 1:n) {
#             X <- rbind(X, cbind(v[i], Recall(n -
#                                                1, r - 1, v[-i])))
#           }
#           X
#         }
#       }
#     }
#     sub(n, r, v[1:n])
#   }

setBoottest_engine <- function(engine) {

  #' Sets the default bootstrap algo for the current R session
  #' to be run via `boottest()` and `mboottest()`
  #'
  #' @param engine Character scalar. Either 'R' or 'WildBootTests.jl'.
  #'  Default is 'R'
  #' @return No return value
  #'
  #' @export
  #'
  #' @examples
  #' \dontrun{
  #' setBoottest_engine(engine = "R")
  #' setBoottest_engine(engine = "WildBootTests.jl")
  #' }

  if (missing(engine) || is.null(engine)) {
    # New default: one cores used
    engine <- "R"
  }

  engine <- set_engine(engine)

  options("boottest_engine" = engine)

  invisible()
}

getBoottest_engine <- function() {
  #' get the bootstrap algorithm to be run via `boottest()` and `waldboottest()`
  #' @return The number of threads currently used by boottest as set in options
  #' @noRd

  x <- getOption("boottest_engine")
  if (!(x %in% c("R", "WildBootTests.jl"))) {
    rlang::abort(
      "The value of getOption(\"boottest_engine\") is currently not legal.
      Please use function setBoottest_engine to set it to an appropriate
      value. ",
      use_cli_format = TRUE
    )
  }
  x
}

set_engine <- function(engine) {

  #' check the bootstrap algo
  #' @param engine character scalar
  #' @noRd

  dreamerr::check_value(engine, "charin(R, WildBootTests.jl)")

  engine
}


# The original source of all three included funcitons is Laurent Berge's fixest
# code in https://github.com/lrberge/fixest
# The original code was distributed under GPL-3 license


# changes to these functions by Alexander Fischer
# 1. functions are renamed to _Boottest_
# 2. the default number of threads in check_set_nthreads is set to 1

setBoottest_nthreads <- function(nthreads) {

  #' Set the number of threads for use with open mp via options
  #' By default, only one thread is used
  #' @param nthreads Integer. Number of threads to be used
  #' @return No return value
  #' @noRd
  #' @importFrom parallel detectCores

  max_CRAN <- as.numeric(Sys.getenv("OMP_THREAD_LIMIT"))
  max_CRAN[is.na(max_CRAN)] <- 1000

  max_threads <- min(cpp_get_nb_threads(), 1000, max_CRAN)
  # we cap at 1k nthreads

  if (missing(nthreads) || is.null(nthreads)) {
    # New default: one cores used
    nthreads <- 1
  }

  nthreads <- check_set_nthreads(nthreads)

  options("boottest_nthreads" = nthreads)

  invisible()
}


getBoottest_nthreads <- function() {
  #' get the number of threads for use with open mp
  #' @return The number of threads currently used by boottest as set in options
  #' @noRd

  x <- getOption("boottest_nthreads")
  if (length(x) != 1 || !is.numeric(x) || is.na(x) || x %% 1 != 0 || x < 0) {
    rlang::abort("The value of getOption(\"boottest_nthreads\") is currently not legal.
         Please use function setBoottest_nthreads to set it to an appropriate
         value. ",
      use_cli_format = TRUE
    )
  }
  # cat("getBoottest nr threads \n")
  # print(x)
  x
}

check_set_nthreads <- function(nthreads) {
  #' Simple function that checks that the nber of threads is valid
  #' @param nthreads Integer. Number of threads to be used
  #' @importFrom dreamerr set_up check_value warn_up
  #' @return Integer. The number of threads to be used.
  #' @noRd


  dreamerr::set_up(1)

  dreamerr::check_value(nthreads,
                        "integer scalar GE{0} | numeric scalar GT{0} LT{1}",
                        .message = paste0(
                          "The argument 'nthreads' must be an integer
                          lower or equal to the number of threads available (",
                          max(cpp_get_nb_threads(), 1), ").
                          It can be equal to 0 which means all threads.
                          Alternatively, if equal to a number strictly between
                          0 and 1, it represents the fraction of all
                          threads to be used."
                        )
  )

  # max_threads <- parallel::detectCores()
  max_threads <- cpp_get_nb_threads()
  # cat("max_threads \n")
  # print(max_threads)

  # # To add later
  # if(cpp_is_in_fork()) return(1)

  if (nthreads == 0) {
    nthreads <- max(max_threads, 1)
  } else if (nthreads < 1) {
    nthreads <- max(ceiling(max_threads * nthreads), 1)
  } else if (nthreads > 1) {
    if (max_threads == 0) {
      dreamerr::warn_up(
        "OpenMP not detected: cannot use ", nthreads,
        " threads, single-threaded mode instead."
      )
      nthreads <- 1
    } else if (nthreads > max_threads) {
      dreamerr::warn_up(
        "Asked for ", nthreads,
        " threads while the maximum is ",
        max_threads,
        ". Set to ",
        max_threads,
        " threads instead."
      )
      nthreads <- max_threads
    }
  }
  # cat("nthreads set_get \n")
  # print(nthreads)
  nthreads
}

get_seed <- function() {
  #' creates an integer based on the global random seed set via set.seed()
  #' for using set.seed() for controlling rcpp's seed, see this
  #' blog post http://rorynolan.rbind.io/2018/09/30/rcsetseed/
  #' @noRd

  # max_int <- .Machine$integer.max
  max_int <- 2147483647L
  x <- sample.int(max_int, 1)
  x
}

to_integer <- function(vec) {

  #' Transform vectors of all types safely to integer vectors
  #' @param vec A vector
  #' @return An integer vector
  #' @noRd
  #' @srrstats {G2.4a}


  dreamerr::check_arg(vec, "MBT vector")

  unique_vec <- unique(vec)
  int_vec <- rep(NA, length(vec))
  for (x in seq_along(unique_vec)) {
    int_vec[which(vec == unique_vec[x])] <- x
  }
  as.integer(int_vec)

}


# functions taken from 'clubSandwich' package
matrix_split <- function (x, fac, dim) {
  # if (is.vector(x)) {
  #   if (dim != "both")
  #     stop(paste0("Object must be a matrix in order to subset by ",
  #                 dim, "."))
  #   x_list <- split(x, fac)
  #   lapply(x_list, function(x) diag(x, nrow = length(x)))
  # }
  # else {
    lapply(levels(fac), sub_f(x, fac, dim))
  #}
}

sub_f <- function (x, fac, dim){
  function(f) switch(dim,
                     row = x[fac == f, , drop = FALSE],
                     col = x[, fac == f, drop = FALSE],
                     both = x[fac == f,
                     fac == f, drop = FALSE])
}



#' reformat a vector by a 'by' group
#' @param x A vector
#' @param group_id a grouping vector of integers
#' @return
#' A matrix of dimension N x G, where G the number of obs and
#' G the number of groups
#' @examples
#' if(requireNamespace("collapse")){
#' N <- 100
#' cluster <- sample(letters, N, TRUE)
#' g <- collapse::GRP(cluster, call = FALSE)
#' vec2mat(x = rnorm(N), group_id = g$group.id)
#' }
#' @noRd
vec2mat <- function(x, group_id){

  N <- length(x)
  G <- length(unique(group_id))
  mat <- matrix(0, N, G)
  index <- 1:N
  idx <- index + N * (group_id - 1)
  mat[idx] <- x
  mat

}


find_proglang <- function(lang){

  #' Check if julia or python are installed /
  #' can be found on the PATH.
  #'
  #' Based on Mauro Lepore's great suggestion
  #' https://github.com/ropensci/software-review/issues/546#issuecomment-1416728843
  #'
  #' @param lang which language to check. Either 'julia' or 'python'
  #'
  #' @return logical. TRUE if lang is found on path, FALSE if not
  #'
  #' @examples
  #'
  #' \dontrun{
  #' find_proglang(lang = "julia")
  #' }

  dreamerr::check_arg(lang, "charin(julia, python)")

  language_found <- nzchar(Sys.which(lang))

  language_found

}


inform_seed <- function(frequency_id, engine){

  if(engine != "WildBootTests.jl"){

    rlang::inform(
      "Too guarantee reproducibility, don't forget to set a
    global random seed **both** via `set.seed()` and `dqrng::dqset.seed()`.",
      use_cli_format = TRUE,
      .frequency = "regularly",
      .frequency_id = frequency_id
    )

  } else {

    rlang::inform(
      "Too guarantee reproducibility, don't forget to set a
    global random seed via `set.seed()`.",
      use_cli_format = TRUE,
      .frequency = "regularly",
      .frequency_id = frequency_id
    )

  }



}

is_congruent <- function(x, y){
  
  #' check if two vectors x and y are congruent
  #' @param x a vector
  #' @param y another vector of length(y) == length(x)
  #' @return A logical. TRUE if the two vectors are congruent
  #' @examples
  #' x <- c(1, 1, 1, 2, 2, 3, 4, 4, 4)
  #' y <- c("a", "a", "a", "b", "b", "b", "b", "b", "b")
  #' is_congruent(x, y)
  #' @noRd

  dreamerr::check_arg(x,"vector")
  dreamerr::check_arg(y, "vector")
  
  u_x <- unique(x)
  u_y <- unique(y)
  
  if(length(u_x) < length(u_y)){
    rlang::abort("")
  }
  
  tab <- table(x,y)
  !any(colSums(tab != 0) != 1)
  
}
s3alfisc/fwildclusterboot documentation built on Sept. 17, 2023, 5:55 a.m.