## DATA GENERATION n <- 9 + sample(1:6, 4) dat <- data.frame(Evaluation = rnorm(sum(n), mean = rep(sample(seq(from = sample(25:55, 1), by = 1, length.out = sample(15:20, 1)), 4)/10, n), sd = rep(sample(70:80, 4)/100, n)), Occupation = factor(rep(1:4, n), labels = c("Student", "Employee", "Self-Employed", "Retired"))) dat$Evaluation[dat$Evaluation > 10] <- 10 dat$Evaluation[dat$Evaluation < 0] <- 0 ## QUESTION/ANSWER GENERATION questions <- character(5) solutions <- logical(5) explanations <- character(5) fm0 <- lm(Evaluation ~ 1, data = dat) fm1 <- lm(Evaluation ~ Occupation, data = dat) myanova <- anova(fm0, fm1) myanova_print <- matrix(nrow = 2, ncol = 6) dimnames(myanova_print) <- dimnames(myanova) myanova_print[, 1] <- round(myanova[, 1]) myanova_print[, 3] <- fmt(myanova[, 3], 0) rss <- round(myanova[,2], digits = max(0, min(3, 5-max(nchar(round(myanova[, 2], digits = 0)))))) myanova_print[, 2] <- format(rss, nsmall = max(0, min(3, 5-max(nchar(round(myanova[, 2], digits = 0)))))) myanova_print[2, 4] <- format(-diff(rss), nsmall = max(0, min(3, 5-max(nchar(round(myanova[, 2], digits = 0)))))) myanova_print[1, 3:6] <- "" myanova_print[2, 5] <- round(myanova[2, 5], digits = 3) myanova_print[2, 6] <- format.pval(myanova[2, 6]) r2 <- 1 - as.numeric(rss[2])/as.numeric(rss[1]) f2 <- sample(10:250, 1)/10 if(runif(1) > 0.5) { questions[1] <- paste0("The test statistic is smaller than $", f2, "$.", sep = "") solutions[1] <- myanova[2,5] < f2 explanations[1] <- paste0("The test statistic is $F = ", fmt(myanova[2,5], 3), "$ and hence ", ifelse(solutions[1], "", "_not_"), " smaller than $", f2, "$.", sep = "") } else { questions[1] <- paste0("The test statistic is larger than $", f2, "$.", sep = "") solutions[1] <- myanova[2,5] > f2 explanations[1] <- paste0("The test statistic is $F = ", fmt(myanova[2,5], 3), "$ and hence ", ifelse(solutions[1], "", "_not_"), " larger than $", f2, "$.", sep = "") } questions[2] <- "A one-sided alternative was tested for the mean values." solutions[2] <- FALSE explanations[2] <- paste("An ANOVA always tests the null hypothesis,", "that all mean values are equal against the alternative hypothesis that they are different.") r2a <- sample(10:60, 1)/100 questions[3] <- paste0("The fraction of explained variance is larger than $", 100 * r2a, "$%.", sep = "") solutions[3] <- r2 > r2a explanations[3] <- paste0("The fraction of explained variance is $", fmt(r2, 3), "$ and hence ", ifelse(solutions[3], "", "_not_"), " larger than ", r2a, ".", sep = "") questions[4] <- paste("It can be shown that the evaluation of the respondents depends on their occupation.", "(Significance level $5$%)") solutions[4] <- myanova[2,6] < 0.05 explanations[4] <- paste0("The $p$ value is $", format.pval(myanova[2,6], digits = 3), "$ and hence", ifelse(solutions[4], "", "_not_"), " significant. It can ", ifelse(solutions[4], "", "_not_"), " be shown that the evaluations differ with respect to the occupation of the respondents.") ## assure at least one correct answer r2b <- if(any(solutions)) { sample(10:60, 1)/100 } else { min(sample(ceiling(100 * r2) + 1:10, 1), 100)/100 } questions[5] <- paste0("The fraction of explained variance is smaller than $", 100 * r2b, "$%.", sep = "") solutions[5] <- r2 < r2b explanations[5] <- paste0("The fraction of explained variance is $", fmt(r2, 3), "$ and hence ", ifelse(solutions[5], "", "_not_"), " smaller than ", r2b, ".", sep = "") ## permute order of solutions/questions o <- sample(1:5) questions <- questions[o] solutions <- solutions[o] explanations <- explanations[o]
A survey with r sum(n)
persons was conducted to analyze the
design of an advertising campaign. Each respondent was asked to
evaluate the overall impression of the advertisement on an
eleven-point scale from 0 (bad) to 10 (good). The evaluations are
summarized separately with respect to type of occupation of the
respondents in the following figure.
\
par(mar = c(4, 4, 1, 1)) plot(Evaluation ~ Occupation, data = dat)
To analyze the influence of occupation on the evaluation of the advertisement an analysis of variance was performed:
options(show.signif.stars = FALSE) print(myanova_print, quote = FALSE, right = TRUE)
Which of the following statements are correct?
answerlist(questions, markup = "markdown")
In order to be able to answer the questions the fraction of
explained variance has to be determined. The residual sum of squares
when using only a single overall mean value ($\mathit{RSS}_0$) as
well as the residual sum of squares when allowing different mean
values given occupation ($\mathit{RSS}_1$) are required. Both are
given in the RSS column of the ANOVA table. The
fraction of explained variance is given by
$1 - \mathit{RSS}_1/\mathit{RSS}_0 = 1 - r rss[2]
/r rss[1]
=
r round(r2, digits = 3)
$.
The statements above can now be evaluated as right or wrong.
answerlist(ifelse(solutions, "True", "False"), explanations, markup = "markdown")
extype: mchoice
exsolution: r mchoice2string(solutions)
exname: Analysis of variance
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.