#' Calculate a number of sample correlations based on a specified population correlation
#' @param pop.r Population correlation. Do not use pop.data is you provide this value.
#' @param n Sample size for all samples If you use n, do not use n.min or n.max.
#' @param number.of.samples Number of samples to obtain
#' @return Data frame with sample correlations
#' @examples
#' get.r.samples(pop.r = .35,n=100)
#' @export
get.r.samples <- function(pop.r = NA, n, number.of.samples = 10) {
number.of.decimals = 3
Sigma <- diag(2)
Sigma[1,2] <- pop.r
Sigma[2,1] <- pop.r
mu <- c(0,0)
K <- number.of.samples
data_samples <- get_true_scores_with_sampling_error_cov(Sigma, mu, n, K)
rs <- rep(NA,number.of.samples)
dfs <- rep(NA,number.of.samples)
ts <- rep(NA,number.of.samples)
in_interval <- rep(NA, number.of.samples)
ps <- rep(NA,number.of.samples)
LLs <- rep(NA,number.of.samples)
ULs <- rep(NA,number.of.samples)
pi_LLs <- rep(NA,number.of.samples)
pi_ULs <- rep(NA,number.of.samples)
pi_in_interval <- rep(NA, number.of.samples)
ci_as_pi_in_interval <- rep(NA, number.of.samples)
#pi_capture_dist_percent <- rep(NA, number.of.samples)
for (i in 1:number.of.samples) {
x <- data_samples$x_true[,i]
y <- data_samples$y_true[,i]
r_info <- stats::cor.test(x,y)
rs[i] <- round(r_info$estimate,number.of.decimals)
LLs[i] <- round(r_info$conf.int[1],number.of.decimals)
ULs[i] <- round(r_info$conf.int[2],number.of.decimals)
in_interval[i] <- is_value_in_interval(pop.r, c(r_info$conf.int[1], r_info$conf.int[2]))
ts[i] <- round(r_info$statistic,number.of.decimals)
dfs[i] <- round(r_info$parameter,number.of.decimals)
ps[i] <- r_info$p.value
pi_info <- predictionInterval::pi.r(r = rs[i], n = n, rep.n = n)
pi_LLs[i] <- round(pi_info$lower_prediction_interval, number.of.decimals)
pi_ULs[i] <- round(pi_info$upper_prediction_interval, number.of.decimals)
if (i > 1) {
pi_in_interval[i-1] <- is_value_in_interval(rs[i], c(pi_LLs[i-1], pi_ULs[i-1]))
ci_as_pi_in_interval[i-1] <- is_value_in_interval(rs[i], c(LLs[i-1], ULs[i-1]))
}
# pi_capture_dist_percent[i] <- pi.dist.capture(pop.r = pop.r, n = n, pi.LL = pi_LLs[i] , pi.UL = pi_ULs[i])
}
xx<-1:number.of.samples
sample.number <- xx
#data.out <- data.frame(sample.number, pop.r = pop.r, n = n, r = rs, ci.LL = LLs, ci.UL = ULs, ci.captures.pop.r = in_interval, pi.LL = pi_LLs, pi.UL = pi_ULs, pi.captures.next.r = pi_in_interval, ci.captures.next.r = ci_as_pi_in_interval, pi.dist = pi_capture_dist_percent)
data.out <- data.frame(sample.number = sample.number,
pop.r = pop.r,
n = n, r = rs,
ci.LL = LLs,
ci.UL = ULs,
pi.LL = pi_LLs,
pi.UL = pi_ULs,
ci.captures.pop.r = in_interval,
pi.captures.next.r = pi_in_interval,
p = ps)
rownames(data.out) <- NULL
#pi_in_interval[i] <- is_value_in_interval(pop.r, c(r_info$conf.int[1], r_info$conf.int[2]))
data.out <- tibble::as.tibble(data.out)
return(data.out)
}
get_true_scores_with_sampling_error_cov <- function(Sigma, mu, n, K) {
# This is the MASS::mvrnorm routine structured to eliminate
# redundant calculations when repeated K times
#Matrix approach for fast bivariate simulations
p <- length(mu)
eS <- eigen(Sigma, symmetric = TRUE)
ev <- eS$values
Xmulti <- eS$vectors %*% diag(sqrt(pmax(ev, 0)), p)
xs <- matrix(NA,n,K)
ys <- matrix(NA,n,K)
for (i in 1:K) {
X <- matrix(rnorm(p * n), n)
X2 <- Xmulti %*% t(X)
X2 <- t(X2)
xs[,i] <- X2[,1]
ys[,i] <- X2[,2]
}
output <- list()
output$x_true <- xs
output$y_true <- ys
return(output)
}
is_value_in_interval <- function(value, interval) {
is_in_interval <- FALSE
check_interval<-findInterval(value,sort(interval),rightmost.closed = TRUE)
if (check_interval==1) {
is_in_interval <- TRUE
}
return(is_in_interval)
}
#' Calculate the percentage of rows that are true in a column
#' @param x the column to be examined
#' @return The percent equal to TRUE
#'@export
percent.true <- function(x) {
sum_TRUE <- sum(x, na.rm = TRUE)
sum_length <- sum(!is.na(x))
return(sum_TRUE/sum_length*100)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.