#' @title dredge_correlation
#'
#' @description data dredge a \code{\link[base]{data.frame}} using \code{\link[stats]{cor.test}}
#'
#' @export
#'
#' @param data a \code{\link[base]{data.frame}} to operate over
#' @param grid a search grid in the form of \code{\link[mndredge]{make_correlation_grid}} to use in guiding the dredging
#' @param min_pvalue the maximum the pvalue from \code{\link[stats]{lm}} can be before the result is filtered out. this generaly is an issue only in overfitted models or synthetic data
#' @param max_pvalue the maximum the pvalue from \code{\link[stats]{lm}} can be before the result is filtered out
#' @param ... addtional parameters to pass to \code{\link[stats]{cor.test}}
#' @return a tidy \code{\link[base]{data.frame}} containing the following components:
#' \describe{
#' \item{column_a}{the first column to compare}
#' \item{column_b}{the second column to compare}
#' \item{estimate}{the estimate from \code{\link[stats]{cor.test}}}
#' \item{p.value}{the p.value from \code{\link[stats]{cor.test}}}
#' \item{lag}{the amount of lag for the current compare}
#' }
#'
#' @author Mark Newman, \email{mark@trinetteandmark.com}
#' @keywords utilities
#' @family dredging
#'
#' @examples
#' \dontshow{
#' library(magrittr)
#' library(doParallel)
#' library(mndredge) }
#' set.seed(0)
#' y1 <- seq(-10, 10, .5)
#' y2 <-
#' y1 + y1 %>%
#' length() %>%
#' rnorm()
#' registerDoParallel(cores=2)
#' data <- data.frame(y1, y2)
#' grid <- data %>% make_correlation_grid()
#' data %>% dredge_correlation(grid)
#'
dredge_correlation <-
function(
data,
grid,
min_pvalue = getOption("mndredge_min_pvalue", 0),
max_pvalue = getOption("mndredge_max_pvalue", 0.05),
...) {
# quite incorrect `devtools::check()` note
i <- NULL
stopifnot(
data %>% is.data.frame(),
data %>% nrow() >= 3,
grid %>% is.data.frame(),
min_pvalue %>% is.numeric(),
min_pvalue >= 0,
min_pvalue < 1,
max_pvalue %>% is.numeric(),
max_pvalue >= 0,
max_pvalue < 1,
min_pvalue < max_pvalue)
tal <- apply_lag
grid_res <-
foreach(
i = iter(grid, by = "row"),
.packages = c("magrittr")) %dopar% {
t1 <-
data[, c(i$pair_1, i$pair_2)] %>%
tal(
hold = i$pair_1,
move = i$pair_2,
lag = i$lag)
ct <- suppressWarnings(cor.test(t1[,i$pair_1], t1[,i$pair_2], ...))
exclude <-
ct$p.value %>% is.na() ||
ct$p.value < min_pvalue ||
max_pvalue < ct$p.value
if(exclude) {
NULL }
else {
names(ct$estimate) <- NULL
c(i$pair_1, i$pair_2, ct$estimate, ct$p.value, i$lag)
}}
res_df <-
do.call(rbind, grid_res) %>%
data.frame(stringsAsFactors = F)
if((res_df %>% nrow()) == 0) {
data.frame(
column_a = character(),
column_b = character(),
estimate = numeric(),
p.value = numeric(),
lag = numeric())
}
else {
res_df %>%
make_column_numeric(3) %>%
make_column_numeric(4) %>%
make_column_numeric(5) %>%
set_colnames(c("column_a", "column_b", "estimate", "p.value", "lag")) %>%
set_rownames(NULL)
}}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.