################################################################################
# Test generators
################################################################################
set.seed(5324)
context("Simple Random Correlation Matrices")
N <- 8000
K <- 10
RHO1 <- 0.45
covmat <- genNumeric(N, K, rho=RHO1)
test_that("Dimensions are correct", {
expect_equal(nrow(covmat), N)
expect_equal(ncol(covmat), K)
expect_is(covmat, "matrix")
})
covE <- rep(NA, K-1)
for(i in 2:ncol(covmat)){
covE[i-1] <- cor(covmat)[i-1, i]
}
tol <- RHO1 * K * .005
err <- sum(abs(covE - RHO1)) / K
test_that("Correlations are correct", {
expect_that(err, is_less_than(tol))
})
context("User Controlled Random Correlation Matrices")
struc <- list(dist = c("norm", "norm", "pois"),
rho = c(-.05, -.4, 0.4),
names = c("score", "score2", "accept"))
N <- 5000
covmat <- genNumeric(N, pattern=struc)
test_that("Dimensions are correct in simple case", {
expect_equal(nrow(covmat), N)
expect_equal(ncol(covmat), length(struc$dist)+1)
expect_is(covmat, "data.frame")
})
low <- -1
high <- -1
mat <- cor(covmat)
delta <- row(mat) - col(mat)
mat[delta < low | delta > high] <- NA
err <- sum((as.numeric(mat[!is.na(mat)]) - struc$rho)^2) / length(struc$rho)
tol <- sum(abs(struc$rho*.05))
test_that("Correlations are correct in simple case", {
expect_that(err, is_less_than(tol))
})
test_that("Names get passed properly in simple case", {
expect_equivalent(names(covmat)[2:ncol(covmat)], struc$names)
expect_is(covmat, "data.frame")
})
context("User Controlled Random Correlation Matrices Complex")
struc2 <- list(dist = c("norm", "chisq", "pois", "norm",
"weibull", "gamma"),
rho = c(-.05, -.4, 0.3, 0.9, .03, -.6),
names = c("score", "accept", "score2", "days", "days2",
"luck"))
covmat <- genNumeric(N, pattern=struc2)
test_that("Dimensions are correct in complex case", {
expect_equal(nrow(covmat), N)
expect_equal(ncol(covmat), length(struc2$dist)+1)
expect_is(covmat, "data.frame")
})
mat <- cor(covmat)
delta <- row(mat) - col(mat)
mat[delta < low | delta > high] <- NA
err <- sum((as.numeric(mat[!is.na(mat)]) - struc2$rho)^2) / length(struc2$rho)
tol <- sum(abs(struc2$rho*.05))
test_that("Correlations are correct in complex case", {
expect_that(err, is_less_than(tol))
})
test_that("Names get passed properly in complex case", {
expect_equivalent(names(covmat)[2:ncol(covmat)], struc2$names)
expect_is(covmat, "data.frame")
})
context("User specified Seed is Correct")
N <- 1000
K <- 6
RHO1 <- 0.3
RHO2 <- -0.8
S1 <- rnorm(N)
S2 <- rnorm(N)
struc3 <- list(dist = c("norm", "chisq", "pois", "norm",
"weibull", "gamma"),
rho = c(-.05, -.4, 0.3, 0.9, .03, -.6),
names = c("score", "accept", "score2", "days", "days2",
"luck"),
seed = c(runif(N), rpois(N, 7), rpois(N, 3), rgamma(N, shape=2),
runif(N)))
covmat <- genNumeric(N, K, rho=RHO1, seed=S1)
covmat2 <- genNumeric(N, K, rho=RHO2, seed=S2)
covmat3 <- genNumeric(N, pattern = struc3)
err1 <- cor(covmat)[2:K, 1]
tol1 <- abs(sum(err1 - RHO1)) / K
err2 <- cor(covmat2)[2:K, 1]
tol2 <- abs(sum(err2 - RHO2)) / K
test_that("Function appropriately switches between fixed and variable seed",{
expect_error(genNumeric(N, K, rho=-0.5, seed=3))
expect_error(genNumeric(N, K, rho=0.3, seed=rep(7, 10)))
expect_error(genNumeric(N, K, rho=0.3, seed=rep(7, N+10)))
expect_error(genNumeric(N, K, rho=0.3, seed=cbind(rep(7, N+10), rep(7, N+10))))
expect_error(genNumeric(N, K, rho=0.3, seed=cbind(rep(7, N), rep(7, N))))
})
test_that("Function gets the right answer!", {
expect_identical(covmat[, 1], S1)
expect_identical(covmat2[, 1], S2)
expect_that(tol1, is_less_than(0.05))
expect_that(tol2, is_less_than(0.05))
})
context("Test user specified seed in patterned structure")
seeds <- genNumeric(1000, 6, rho=0.3)
struc <- list(dist=c("norm", "norm", "chisq", "pois", "pois", "gamma",
"weibull"),
rho=c(0.7, 0.3, -0.5, 0.3, -0.8, 0.05, 0.7),
names=c("test1", "test2", "noise", "daysattended",
"daysOUT", "bad", "bad2"),
seed = cbind(seeds[,1], seeds[,2], seeds[,3], seeds[, 4], seeds[, 5],
seeds[, 6], seeds[,1]))
dat <- genNumeric(1000, seed=TRUE, pattern=struc)
test_that("Data generates the right answer", {
expect_that(abs(cor(seeds[,1], dat[,1]) - struc$rho[1]), is_less_than(.05))
expect_that(abs(cor(seeds[,2], dat[,2]) - struc$rho[2]), is_less_than(.05))
expect_that(abs(cor(seeds[,3], dat[,3]) - struc$rho[3]), is_less_than(.1))
expect_that(abs(cor(seeds[,4], dat[,4]) - struc$rho[4]), is_less_than(.05))
expect_that(abs(cor(seeds[,5], dat[,5]) - struc$rho[5]), is_less_than(.05))
expect_that(abs(cor(seeds[,6], dat[,6]) - struc$rho[6]), is_less_than(.1))
expect_that(abs(cor(seeds[,1], dat[,7]) - struc$rho[7]), is_less_than(.05))
})
context("Speed")
N <- 98756
K <- 25
P <- 0.43278456
RHO2 <- -0.24
covmat <- genNumeric(N, K, rho=RHO1)
test_that("Function executes in reasonable length of time", {
expect_that(genNumeric(1000, 25, rho=0.3), takes_less_than(1))
expect_that(genNumeric(500, 40, rho=0.3), takes_less_than(1))
expect_that(genNumeric(8795, 8, rho=0.3), takes_less_than(1))
expect_that(genNumeric(8000, 70, rho=0.3), takes_less_than(8))
})
context("Generate simple correlated factors")
N <- 10000
K <- 30
LEVS <- 12
RHO1 <- 0.2
test <- genFactor(N, K, nlevel=LEVS, rho=RHO1)
test_that("genFactor outputs data in expected size and shape", {
expect_is(test, "data.frame")
expect_equal(nrow(test), N)
expect_equal(ncol(test), K)
expect_equal(length(table(test[1])), LEVS)
})
context("Are the variables in the data.frame properly related?")
chiTest <- function(i, j, data) {chisq.test(data[,i], data[,j])$p.value}
chiP <- Vectorize(chiTest, vectorize.args=list("i", "j"))
Cresults <- outer(1:K, 1:K, chiP, data=test)
gTest <- function(i, j, data) {gammaGK(data[, i], data[, j])$g}
gP <- Vectorize(gTest, vectorize.args=list("i", "j"))
Gresults <- outer(1:K, 1:K, gP, data=test)
gTestSE <- function(i, j, data) {gammaGK(data[, i], data[, j])$se}
gPse <- Vectorize(gTestSE, vectorize.args=list("i", "j"))
GSEresults <- outer(1:K, 1:K, gPse, data=test)
chiSQ <- rep(NA, K-1)
for(i in 2:ncol(Cresults)){
chiSQ[i-1] <- Cresults[i-1, i] < 0.05
}
summary(chiSQ)
gMAT <- cbind(rep(NA, K-1),rep(NA, K-1))
for(i in 2:ncol(Cresults)){
gMAT[i-1, 1] <- Gresults[i-1, i] + 2* GSEresults[i-1, i]
gMAT[i-1, 2] <- Gresults[i-1, i] - 2* GSEresults[i-1, i]
}
tol <- RHO1 * .01
test2 <- abs(round(gMAT[,1], digits=3) - RHO1) > tol
test_that("Bivariate relationships exist and magnitude is correct", {
expect_equivalent(length(chiSQ), table(chiSQ)["TRUE"][[1]])
expect_that(table(chiSQ)["TRUE"][[1]], is_more_than(floor(.9*length(test2))))
})
test_that("Function is not slow", {
expect_that(genFactor(N, K, nlevel=LEVS, rho=RHO1), takes_less_than(6))
})
context("Generate user specified correlated factors")
set.seed(12532)
N <- 15000
K <- 4
LEVS <- 5
RHO1 <- -0.2
S1 <- sample(letters[1:5], N, replace=TRUE)
S2 <- rnorm(N)
test <- genFactor(N, K, nlevel=LEVS, rho=RHO1, seed=S2)
test2 <- genFactor(N, K, nlevel=LEVS, rho=RHO1, seed=S1)
tol <- 0.12
test_that("Correlations are reasonable", {
expect_that(abs(gammaGK(test[,1], test[,5])$gamma - RHO1), is_less_than(tol))
expect_that(abs(gammaGK(test[,1], test[,2])$gamma - RHO1), is_less_than(tol))
expect_that(abs(gammaGK(test[,1], test[,3])$gamma - RHO1), is_less_than(tol))
expect_that(abs(gammaGK(test[,1], test[,4])$gamma - RHO1), is_less_than(tol))
expect_that(abs(gammaGK(test2[,1], test2[,5])$gamma - RHO1), is_less_than(tol))
expect_that(abs(gammaGK(test2[,1], test2[,2])$gamma - RHO1), is_less_than(tol))
expect_that(abs(gammaGK(test2[,1], test2[,3])$gamma - RHO1), is_less_than(tol))
expect_that(abs(gammaGK(test2[,1], test2[,4])$gamma - RHO1), is_less_than(tol))
})
N <- 10000
K <- 4
LEVS <- 5
RHO1 <- -0.9
RHO2 <- 0.9
S1 <- sample(letters[1:5], N, replace=TRUE)
S2 <- rnorm(N)
test <- genFactor(N, K, nlevel=LEVS, rho=RHO1, seed=S2)
test2 <- genFactor(N, K, nlevel=LEVS, rho=RHO2, seed=S1)
test_that("Test extreme values of RHO", {
expect_that(abs(gammaGK(test[,1], test[,5])$gamma - RHO1), is_less_than(tol))
expect_that(abs(gammaGK(test[,1], test[,2])$gamma - RHO1), is_less_than(tol))
expect_that(abs(gammaGK(test[,1], test[,3])$gamma - RHO1), is_less_than(tol))
expect_that(abs(gammaGK(test[,1], test[,4])$gamma - RHO1), is_less_than(tol))
expect_that(abs(gammaGK(test2[,1], test2[,5])$gamma - RHO2), is_less_than(tol))
expect_that(abs(gammaGK(test2[,1], test2[,2])$gamma - RHO2), is_less_than(tol))
expect_that(abs(gammaGK(test2[,1], test2[,3])$gamma - RHO2), is_less_than(tol))
expect_that(abs(gammaGK(test2[,1], test2[,4])$gamma - RHO2), is_less_than(tol))
})
context("Chain Together Starting Seeds")
seeds <- genNumeric(1000, 6, rho=0.3)
struc <- list(dist=c("norm", "norm", "unif", "pois", "pois", "gamma",
"weibull"),
rho=c(0.7, 0.3, -0.5, 0.3, -0.8, 0.05, 0.7),
names=c("test1", "test2", "noise", "daysattended",
"daysOUT", "bad", "bad2"),
seed = cbind(seeds[,1], seeds[,2], seeds[,3], seeds[, 4], seeds[, 5],
seeds[, 6], seeds[,1]))
dat1 <- genFactor(1000, 3, nlevel=3, rho=0.8)
dat2 <- genFactor(1000, 4, nlevel=4, rho=0.3, seed=rnorm(1000))
dat3 <- genFactor(1000, 4, nlevel=6, rho=-0.7, seed=dat2[,4])
test_that("Test that seed is preserved and identical in two data frames",{
expect_identical(dat2[, 4], dat3[, 1])
})
try1 <- genFactor(25000, 4, nlevel=27, rho=0.3, seed=rnorm(25000))
try2 <- genFactor(25000, 4, nlevel=100, rho=0.3, seed=rnorm(25000))
try3 <- genFactor(50000, 4, nlevel=200, rho=0.3, seed=rnorm(50000))
try4 <- genFactor(50000, 4, nlevel=400, rho=0.3, seed=rnorm(50000))
mx1 <- max(apply(try1, 2, function(x) length(unique(x))))
mn1 <- min(apply(try1, 2, function(x) length(unique(x))))
md1 <- median(apply(try1, 2, function(x) length(unique(x))))
mx2 <- max(apply(try2, 2, function(x) length(unique(x))))
mn2 <- min(apply(try2, 2, function(x) length(unique(x))))
md2 <- median(apply(try2, 2, function(x) length(unique(x))))
mx3 <- max(apply(try3, 2, function(x) length(unique(x))))
mn3 <- min(apply(try3, 2, function(x) length(unique(x))))
md3 <- median(apply(try3, 2, function(x) length(unique(x))))
mx4 <- max(apply(try4, 2, function(x) length(unique(x))))
mn4 <- min(apply(try4, 2, function(x) length(unique(x))))
md4 <- median(apply(try4, 2, function(x) length(unique(x))))
tol1 <- 1*27
tol2 <- .8*100
tol3 <- .8*200
tol4 <- .8*400
test_that("Factor levels greater than 26 can be generated", {
expect_equal(md1, tol1)
expect_that(md2, is_more_than(tol2))
expect_that(md3, is_more_than(tol3))
expect_that(md4, is_more_than(tol4))
})
context("Generate formulas")
#
# set.seed(382)
# seeds <- genNumeric(10000, 6, rho=0.1)
#
# struc <- list(dist=c("norm", "norm", "unif", "pois", "pois", "gamma",
# "weibull"),
# rho=c(0.7, 0.3, -0.5, 0.3, -0.8, 0.05, 0.7),
# names=c("test1", "test2", "noise", "daysattended",
# "daysOUT", "bad", "bad2"),
# seed = cbind(seeds[,1], seeds[,2], seeds[,3], seeds[, 4], seeds[, 5],
# seeds[, 6], seeds[,1]))
#
# dat <- genNumeric(10000, pattern=struc)
#
# dat1 <- genFactor(10000, 3, nlevel=3, rho=0.8)
# dat2 <- genFactor(10000, 4, nlevel=4, rho= - 0.1, seed=dat[,6])
#
# names(dat1) <- sample(LETTERS, length(names(dat1)))
# names(dat2) <- sample(letters, length(names(dat2)))
# mdf <- cbind(dat, dat1)
# mdf <- cbind(mdf, dat2)
#
# myF <- list(vars = sample(names(mdf), 7))
#
# genFormula(mdf, myF$vars[-1])
#
# context("Generate binomial dependent variables")
#
# myF$coefs <- rnorm(length(genFormula(mdf, myF$vars)[-1]), mean=0, sd=4)
#
# mdf$out <- genBinomialDV(mdf, form=myF, intercept=-2)
# table(mdf$out)
#
# mod1 <- glm(out ~ ., data=mdf, family="binomial")
# mod2 <- glm(out ~ bad2 + cC + F + tT + q + hH + r, data=mdf,
# family="binomial")
#
# context("Generate other dependent variables")
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.