Nothing
#' @rdname pearson_data
#' @title Pearson Data
#' @aliases dpearson
#' @description Generates an integer data set for computing a correlation using [exams.forge::sumofsquares()].
#' If `n>100` and `nmax>6` it is better to use one of the precomputed solutions. Otherwise it may take
#' up to `maxt` seconds. Please note that the correlation of the generated data set may differ from the desired
#' correlation.
#'
#' @md
#' @param r numeric: desired correlation
#' @param n integer: number to decompose as sum of squares, see [exams.forge::sumofsquares()].
#' @param nmax integer: maximal number of squares in the sum, see [exams.forge::sumofsquares()].
#' @param maxt numeric: maximal number of seconds the routine should run, see [exams.forge::sumofsquares()].
#' @param xsos sos matrix: precomputed matrix
#' @param ysos sos matrix: precomputed matrix
#'
#' @return A matrix with two columns and an attribute `interim` for intermediate values as matrix.
#' The rows of the matrix contain : \eqn{x_i}, \eqn{y_i}, \eqn{x_i-bar{x}}, \eqn{y_i-\bar{y}},
#' \eqn{(x_i-bar{x})^2}, \eqn{(y_i-\bar{y})^2}, and \eqn{(x_i-bar{x})((y_i-\bar{y})}.
#' In a final step, a vector with the row of sums is appended as a further column.
#' @export
#'
#' @examples
#' data(sos)
#' xy <- pearson_data(0.7, xsos=sos100)
#' colSums(xy)
#' colSums(xy^2)
#' sum(xy[,1]*xy[,2])
#' # my data
#' x <- 100+5*xy[,1]
#' y <- 100+5*xy[,2]
#' cor(x, y)
pearson_data <- function(r, n=100, nmax=6, maxt=30, xsos=NULL, ysos=NULL) {
stopifnot(abs(r)<=1)
if (is.null(xsos)) {
xsos <- sumofsquares(n=n, nmax=nmax, maxt=maxt, zerosum = TRUE)
xsos <- xsos[rowSums(is.na(xsos))==0,1:nmax]
ysos <- xsos
} else {
xsos <- xsos[rowSums(!is.na(xsos))==nmax,1:nmax]
if (is.null(ysos)) ysos <- xsos
}
rxy <- xsos%*%t(ysos)
rxy <- rxy/n
rxy <- abs(rxy-abs(r))
ind <- which(rxy==min(rxy), arr.ind = TRUE)
if (nrow(ind)>1) ind <- ind[sample(1:nrow(ind), 1),]
xy <- cbind(xsos[ind[1],], if(r>0) ysos[ind[2],] else -ysos[ind[2],])
xyn <- scale(xy, scale=FALSE)
m <- rbind(t(xy), t(xyn), t(xyn^2), xyn[,1]*xyn[,2])
m <- cbind(m, rowSums(m))
colnames(m) <- c(1:nmax, "$\\sum$")
row.names(m) <- c("$x_i$", "$y_i$", "$(x_i-\\bar{x})$", "$(y_i-\\bar{y})$",
"$(x_i-\\bar{x})^2$", "$(y_i-\\bar{y})^2$", "$(x_i-\\bar{x})(y_i-\\bar{y})$")
structure(xy, interim=m)
}
#' @rdname pearson_data
#' @export
# dpearson <- function(...){
# pearson_data(...)}
dpearson <- pearson_data
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.