#' Generate predictor coefficients according to the signal-noise ratio (SNR)
#' @param X matrix; data containing the covariates
#' @param family family object; see "?family" for more details.
#' @param target_ratio numeric vector; the desired signal-noise ratio
#' @param group boolean; Should the data be grouped or ungrouped. See details.
#' @param f function; transformation for the covariates.
#' @param ... extra parameters; see details.
#' @details For families other than 'gaussian', the function reweights the
#' samples such that the target signal-noise ratio is reached. This implies
#' the data records will be treated as grouped records. If 'group' is TRUE,
#' then the function returns the grouped data, otherwise, the function returns
#' the ungrouped data (through duplication). \cr\cr
#' If 'family' is gaussian, one can pass the 'sd' argument through `...`.
#' Otherwise, the function runs a stochastic search algorithm to search
#' for the predictors coefficients, and one can pass the 'max_iter' argument
#' (default to be 100) to `...`. \cr\cr
#' Other parameters are tol, curiosity and block_num. They are not recommended
#' to external users.
#' @export
generate_response_with_ratio <- function(
X, family, target_ratio, group = FALSE, f = identity, ...) {
if (family$family == "gaussian")
return(response_with_ratio_gaussian(X, target_ratio, ...))
res <- response_with_ratio(X, family, f, target_ratio, ...)
my_data <- res$data
w <- res$return$parameter
glm_model <- glm(resp_var ~ . -1, data = my_data, family,
weights = ceil_exp(w))
if (group)
return( list(beta = glm_model$coefficients, weights = ceil_exp(w),
data = my_data, SNR = extract_ratio(glm_model)) )
my_data <- ungroup_data(my_data, ceil_exp(w))
list(beta = glm_model$coefficients, weights = rep(1, nrow(my_data)),
data = my_data, SNR = extract_ratio(glm_model))
}
#[Core] Optimise the weights matrix to match the target signal-noise ratio
response_with_ratio <- function(X, family, f = identity, target_ratio,
max_iter = 100, tol = 0.1, curiosity = 1000, block_num) {
if (missing(block_num)) block_num <- max(ceiling(nrow(X) / 50), 20)
beta <- rnorm(ncol(X))
my_data <- generate_response(X, beta, family, f = f)
csignal_ratio <- init_signal_noise(my_data, family, tf = ceil_exp)
res <- stochastic_search(nrow(X), csignal_ratio, ls_loss, target_ratio,
max_iter = max_iter, tol = tol,
curiosity = curiosity, block_num = block_num)
if (res$loss > tol) {
simpleWarning("Desired tolerence is not reached.")
}
print(res$loss)
list(data = my_data, return = res)
}
#[Core] Generate predictor coefficients according to the signal-noise ratio for Gaussian distribution
response_with_ratio_gaussian <- function(X, target_ratio, ...) {
X <- as.matrix(X)
tX <- t(X)
XTX_inv <- solve(tX %*% X)
epsilon <- rnorm(nrow(X))
arg <- list(...)
if ("sd" %in% names(arg))
epsilon <- rnorm(nrow(X), sd = arg$sd)
sigma2 <- var(epsilon)
beta <- target_ratio * sqrt(sigma2 * diag(XTX_inv)) - XTX_inv %*% tX %*% epsilon
my_data <- data.frame(resp_var = X %*% beta + epsilon, X)
glm_model <- glm(resp_var ~ . -1, data = my_data, family = gaussian())
list(beta = beta, weights = rep(1, nrow(my_data)),
data = my_data, SNR = extract_ratio(glm_model))
}
#' Reduce the weights such that it rounds to 'nice' numbers.
#' @param data_model_obj An object from 'generate_response_with_ratio'.
#' @param N Number of weights to be trimmed.
#' @param by Integer; desired decrement for each weight.
#' @param choose One of 'random' and 'top'.
#' If 'random', the function reduces a random set of N weights.
#' If 'top', the function reduces the heaviest N weights.
#' @param family GLM family, see "?family" for details.
#' @details Not for external users.
#' @export
reduce_data_weights <- function(data_model_obj, N, by = 1,
choose = "random", family) {
w <- data_model_obj$weights
reduce_FUN <- reduce_weights_randomly
if (choose == 'top') reduce_FUN <- reduce_weights
new_w <- reduce_FUN(w, N, by)
glm_model <- glm(resp_var ~ . -1, data = data_model_obj$data,
family = family, weights = new_w)
list(beta = glm_model$coefficients, weights = new_w,
data = data_model_obj$data, SNR = extract_ratio(glm_model))
}
#[Core] Reduce the top N weights by 1
reduce_weights <- function(w, N, by = 1) {
index <- tail(order(w), N)
w[index] <- w[index] - by
w
}
#[Core] Reduce random N weights by 1
reduce_weights_randomly <- function(w, N, by = 1) {
index <- sample(seq_along(w), N)
w[index] <- w[index] - by
w
}
#' Extract (ungrouped) data from the beta object
#' @param data_model_obj An object from 'generate_response_with_ratio'.
#' @details Not for external users.
#' @export
extract_ungrouped_data <- function(data_model_obj) {
ungroup_data(data_model_obj$data, data_model_obj$weights)
}
#Ungroup grouped data according to given weights.
ungroup_data <- function(my_data, w) {
my_data[rep(1:nrow(my_data), w), ]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.