tests/testthat/test_inference.R

context("Get plot inference")

set.seed(100)
d <- data.frame(
    x = rnorm(100, c(150, 155), c(10, 20)),
    y = factor(c("A", "B")),
    stringsAsFactors = TRUE
)

test_that("Two-sample tests use appropriate CI", {
    pTRUE <- getPlotSummary(x, y,
        data = d,
        summary.type = "inference",
        inference.type = "conf",
        hypothesis.var.equal = TRUE
    )
    pFALSE <- getPlotSummary(x, y,
        data = d,
        summary.type = "inference",
        inference.type = "conf",
        hypothesis.var.equal = FALSE
    )

    pvals <- sapply(list(pTRUE, pFALSE), function(p) {
        as.numeric(
            gsub(
                ".+=", "",
                strsplit(p[grep("p-value = ", p)[1]], ",")[[1]][3]
            )
        )
    })
    expect_equal(
        pvals,
        c(
            t.test(x ~ y, data = d, var.equal = TRUE)$p.value,
            t.test(x ~ y, data = d)$p.value
        )
    )

    cis <- lapply(list(pTRUE, pFALSE), function(p) {
        scan(
            text = gsub("A - B", "", p[grep("A - B", p)]), what = double(),
            quiet = TRUE
        )[-1]
    })
    expect_equal(
        cis,
        list(
            as.numeric(round(t.test(x ~ y, data = d, var.equal = TRUE)$conf.int, 3)),
            as.numeric(round(t.test(x ~ y, data = d, var.equal = FALSE)$conf.int, 3))
        )
    )
})

set.seed(400)
d <- data.frame(
    x = sample(c("A", "B"), 100, replace = TRUE, c(0.3, 0.8)),
    stringsAsFactors = TRUE
)
ptest <- list(
    p.value = 2 * pnorm(
        abs((table(d$x)[[1]] / 100 - 0.5) / 0.05),
        lower.tail = FALSE
    )
)
btest <- binom.test(table(d$x), p = 0.4, alternative = "less")
ctest <- chisq.test(table(d$x))
s1 <- getPlotSummary(x,
    data = d, summary.type = "inference",
    inference.type = "conf",
    hypothesis.test = "proportion",
    hypothesis.use.exact = FALSE,
    hypothesis.value = 0.5,
    hypothesis.alt = "two.sided"
)
s2 <- getPlotSummary(x,
    data = d, summary.type = "inference",
    inference.type = "conf",
    hypothesis.test = "proportion",
    hypothesis.use.exact = TRUE,
    hypothesis.value = 0.4,
    hypothesis.alt = "less"
)
s3 <- getPlotSummary(x,
    data = d, summary.type = "inference",
    inference.type = "conf",
    hypothesis.test = "chi2"
)
test_that("One-sample tests give correct p-value", {
    expect_match(
        paste(s1, collapse = "\n"),
        sprintf("p-value = %s", format.pval(ptest$p.value, digits = 5))
    )
    expect_match(
        paste(s2, collapse = "\n"),
        sprintf("p-value = %s", format.pval(btest$p.value, digits = 5))
    )
    expect_match(
        paste(s3, collapse = "\n"),
        sprintf("p-value = %s", format.pval(ctest$p.value, digits = 5))
    )
})

test_that("One-sample tests display correct hypotheses", {
    expect_match(
        paste(s1, collapse = "\n"),
        "Null Hypothesis: true proportion of x = A is 0.5"
    )
    expect_match(
        paste(s1, collapse = "\n"),
        "Alternative Hypothesis: true proportion of x = A is not equal to 0.5"
    )

    expect_match(
        paste(s2, collapse = "\n"),
        "Null Hypothesis: true proportion of x = A is 0.4"
    )
    expect_match(
        paste(s2, collapse = "\n"),
        "Alternative Hypothesis: true proportion of x = A is less than 0.4"
    )

    expect_match(
        paste(s3, collapse = "\n"),
        "Null Hypothesis: true proportions in each category are equal"
    )
    expect_match(
        paste(s3, collapse = "\n"),
        "Alternative Hypothesis: true proportions in each category are not equal"
    )
})


# small counts
d <- expand.grid(
    Machine = c("Desktop of tablet", "Mobile"),
    Course = c("STATS101/G", "STATS108")
)
d <- d[rep(1:4, c(4, 3, 1, 2)), ]
rownames(d) <- NULL
ctest <- chisq.test(table(d$Course, d$Machine), simulate = TRUE)
s1 <- getPlotSummary(Machine, Course,
    data = d, summary.type = "inference",
    inference.type = "conf",
    hypothesis.test = "chi2"
)
test_that("Simulated p-value is included when small expected values", {
    expect_match(
        paste(s1, collapse = "\n"),
        "Simulated p-value (since some expected counts < 5) =",
        fixed = TRUE
    )
})

test_that("Simulated p-value is included when requested", {
    cas <- read.csv("cas.csv", stringsAsFactors = TRUE)
    s <- getPlotSummary(cellsource, gender,
        data = cas,
        summary.type = "inference",
        inference.type = "conf",
        hypothesis.test = "chi2",
        hypothesis.simulated.p.value = TRUE
    )
    expect_match(paste(s, collapse = "\n"), "Simulated p-value =")
})


# a giant table

# 2000?
# tab <- matrix(sample(2000, replace = TRUE), ncol = 50)
# system.time(chisq.test(tab, simulate = TRUE))[3]




test_that("inzinference gives the same output", {
    expect_equal(
        inzinference(Sepal.Length ~ Sepal.Width,
            data = iris, trend = "linear", width = 80
        ),
        getPlotSummary(Sepal.Width, Sepal.Length,
            data = iris, trend = "linear", width = 80,
            summary.type = "inference", inference.type = "conf"
        )
    )

    expect_equal(
        inzinference(Sepal.Length ~ Sepal.Width | Species,
            data = iris, trend = "linear", width = 80
        ),
        getPlotSummary(Sepal.Width, Sepal.Length,
            g1 = Species,
            data = iris, trend = "linear", width = 80,
            summary.type = "inference"
        )
    )

    expect_equal(
        inzinference(Sepal.Length ~ Species | Sepal.Width,
            data = iris, width = 80
        ),
        getPlotSummary(Sepal.Length, Species,
            g1 = Sepal.Width,
            data = iris, width = 80, inference.type = "conf",
            summary.type = "inference"
        )
    )
})

# anova
test_that("ANOVA (one-way) output is the correct way around", {
    inf <- inzinference(Sepal.Length ~ Species, data = iris)
    expect_match(inf, "setosa\\s+-\\s+versicolor\\s+-0.930", all = FALSE)
})


####
test_that("Confidence level can be adjusted - dot plots", {
    # dot plot - one
    inf <- inzinference(~Sepal.Length,
        data = iris,
        ci.width = 0.8
    ) |> as.character()
    expect_match(inf, "Mean with 80% Confidence Interval", all = FALSE)
    expect_equal(
        inf[grep("Estimate\\s+Lower\\s+Upper", inf) + 1L] |>
            strsplit("\\s+") |> unlist() |> as.double() |> round(2L),
        c(NA_real_, 5.84, 5.76, 5.93)
    )

    # dot plot - two
    iris2 <- iris[iris$Species != "setosa", ] |> droplevels()
    inf <- inzinference(Sepal.Length ~ Species,
        data = iris2,
        ci.width = 0.90
    ) |> as.character()
    expect_match(inf, "Group Means with 90% Confidence Intervals",
        all = FALSE
    )
    x <- paste(
        collapse = "\n",
        inf[grep("Group Means with 90% Confidence Intervals", inf) + 3:4]
    )
    m0 <- read.table(textConnection(x))[, -1] |> as.matrix()
    ci <- tapply(iris2$Sepal.Length, iris2$Species, t.test,
        conf.level = 0.9
    ) |>
        sapply(function(x) x$conf.int) |>
        t()
    m <- cbind(
        tapply(iris2$Sepal.Length, iris2$Species, mean),
        ci[, 1],
        ci[, 2]
    ) |> round(3)
    expect_equivalent(m0, m)
    expect_match(inf,
        "Difference in group means with 90% Confidence Interval",
        all = FALSE
    )
    expect_equal(
        inf[grep("Difference in group means", inf) + 3L] |>
            strsplit("\\s+") |> unlist() |> tail(3) |> as.double() |>
            round(3L),
        c(-0.652, -0.844, -0.460)
    )

    # dot plot - 3+
    inf <- inzinference(Sepal.Length ~ Species,
        data = iris,
        ci.width = 0.99
    ) |> as.character()
    expect_match(inf, "Group Means with 99% Confidence Intervals",
        all = FALSE
    )
    x <- paste(
        collapse = "\n",
        inf[grep("Group Means with 99% Confidence Intervals", inf) + 3:5]
    )
    m0 <- read.table(textConnection(x))[, -1] |> as.matrix()
    ci <- tapply(iris$Sepal.Length, iris$Species, t.test,
        conf.level = 0.99
    ) |>
        sapply(function(x) x$conf.int) |>
        t()
    m <- cbind(
        tapply(iris$Sepal.Length, iris$Species, mean),
        ci[, 1],
        ci[, 2]
    ) |> round(3)
    expect_equivalent(m0, m)
    ## - difference CIs
    expect_match(inf,
        "99% Confidence Intervals",
        all = FALSE,
        fixed = TRUE
    )
    x <- paste(
        collapse = "\n",
        gsub("\n", "", inf[grep("adjusted for multiple comparisons", inf) + c(4, 5, 6)],
            fixed = TRUE
        )
    )
    m0 <- read.fwf(textConnection(x), c(29, 9, 10, 10, 10))[, 2:4] |> as.matrix()
    f <- lm(Sepal.Length ~ Species, data = iris)
    m <- s20x::multipleComp(f, 0.99)[, 1:3]
    expect_equivalent(m0, m)

    # bar chart - one way
    inf <- inzinference(~Species,
        data = iris,
        ci.width = 0.92
    ) |> as.character()
    expect_match(
        inf,
        "Estimated Proportions with 92% Confidence Interval",
        all = FALSE
    )
    x <- paste(
        collapse = "\n",
        inf[grep("Estimated Proportions with 92% Confidence Interval", inf) + 3:5]
    )
    m0 <- read.table(textConnection(x))[, -1] |>
        as.matrix() |>
        unname()
    t <- qnorm(0.96) * sqrt(1 / 3 * 2 / 3 / nrow(iris))
    m <- cbind(
        rep(1 / 3, 3),
        1 / 3 - t,
        1 / 3 + t
    ) |> round(3)
    expect_equivalent(m0, m)

    expect_match(inf, "with 92% Confidence Intervals", all = FALSE)
    x <- paste(
        collapse = "\n",
        gsub("\n", "", inf[grep("92% Confidence Intervals", inf) + 4:6], fixed = TRUE)
    )
    m0 <- read.fwf(textConnection(x), c(26, 9, 10, 9))[, -1] |>
        as.matrix() |>
        as.double() |>
        round(3)
    m <- freq1way.edited(t(as.matrix(table(iris$Species))), conf.level = 0.92)[, -(1:2)] |>
        as.matrix() |>
        as.double() |>
        round(3)
    expect_equal(m0, m)

    # bar chart - two way
    set.seed(100)
    d <- data.frame(
        x = sample(LETTERS[1:3], 100, replace = TRUE),
        y = sample(LETTERS[1:2], 100, replace = TRUE),
        stringsAsFactors = TRUE
    )
    inf <- inzinference(y ~ x, data = d, ci.width = 0.8)

    expect_match(inf, "80% Confidence Intervals", all = FALSE)

    # scatter plot
    inf <- inzinference(Sepal.Length ~ Sepal.Width, data = iris, ci.width = 0.9, trend = "linear")
    expect_match(
        inf,
        "Linear Trend Coefficients with 90% Confidence Intervals",
        all = FALSE
    )
})
iNZightVIT/iNZightPlots documentation built on May 4, 2024, 6:18 a.m.