knitr::opts_chunk$set(echo = TRUE)
library(SDS100)
$\$
A study by Nierenberg et al (1989) investigated the relationship between personal characteristics and dietary factors, and plasma concentrations of carotenoids.
Please use the data they collected to create a 98% confidence interval for the number of grams of fiber US adults get in a day.
#download.file("https://www.lock5stat.com/datasets3e/NutritionStudy.csv", "NutritionStudy.csv") nutrition_df <- read.csv("NutritionStudy.csv") fiber <- nutrition_df$Fiber n <- length(fiber) xbar <- mean(fiber) SE <- sd(fiber)/sqrt(n) t_star <- qt(.99, n -1) CI <- c(xbar - t_star * SE, xbar + t_star * SE) CI
$\$
It is recommended that adults sleep at least 8 hours a night
A Statistics professor asked 12 undergraduate students how much sleep they were getting and found the average was 6.2 hours with a standard deviation of 1.7 hours.
Assuming this is representative of all students in a Statistics class, does this provide evidence that students in the class are not getting enough sleep on average?
mu0 <- 8 xbar <- 6.2 n <- 12 s = 1.7 (t_stat <- (xbar - mu0)/(s/sqrt(n))) pt(t_stat, n-1)
$\$
step 1:
$H_0: \mu_{visual} = \mu_{no_visual}$
$H_A: \mu_{visual} < \mu_{no_visual}$
# step 2a: # SDS100::download_data("stereograms.txt") stereograms <- read.table("stereograms.txt", header = TRUE) no_visual <- subset(stereograms, group == 'NV')$fusion_time visual <- subset(stereograms, group == 'VV')$fusion_time boxplot(visual, no_visual, names = c("visual", "no visual"), ylab = "Fusion time (s)") # step 2b: mean_no_visual <- mean(no_visual) mean_visual <- mean(visual) n_no_visual <- length(no_visual) n_visual <- length(visual) var_no_visual <- var(no_visual) var_visual <- var(visual) SE <- sqrt( (var_visual/n_visual) + (var_no_visual/n_no_visual)) t_stat <- (mean_visual - mean_no_visual)/SE degree_free <- min(n_no_visual, n_visual) - 1 # step 3: visualize null distribution x_vals <- seq(-5, 5, length.out = 1000) y_vals <- dt(x_vals, degree_free) plot(x_vals, y_vals, type = "l") abline(v = t_stat, col = "red") # step 4: calculate the p-value p_value <- pt(t_stat, df = degree_free) # step 5: reject! # can also run t.test() t.test(visual, no_visual, alternative = "less")
$\$
The data is from: https://dasl.datadescription.com/datafile/freshman-15/
# SDS100::download_data("freshman-15.txt") freshman <- read.table("freshman-15.txt", header = TRUE) initial_weight <- freshman$Initial.Weight final_weight <- freshman$Terminal.Weight # Let's define: mu_diff = mu_final - mu_initial # H0: mu_diff = 0 # HA: mu_diff < 0 weight_diff <- final_weight - initial_weight # 2a. not a good visualization boxplot(final_weight, initial_weight, names = c("Final", "Initial")) # 2a. stripchart and boxplot and stripchart(weight_diff, method = "jitter") hist(weight_diff, breaks = 20) abline(v = 0, col = "green") abline(v = mean(weight_diff), col = "red") # 2b. calculate the observed statistic mean_diff <- mean(weight_diff) sd_diff <- sd(weight_diff) n_diff <- length(final_weight) SE <- sd_diff/sqrt(n_diff) t_stat <- mean_diff/SE degree_free <- n_diff - 1 # 3. plot the null distribution x_vals <- seq(-5, 5, length.out = 1000) y_vals <- dt(x_vals, degree_free) plot(x_vals, y_vals, type = "l") # 4. p-value p_val <- pt(t_stat, degree_free, lower.tail = FALSE) # 5. conclusion! t.test(initial_weight, final_weight, paired = TRUE, alternative = "less") # compare to non-paired t-test t.test(initial_weight, final_weight, alternative = "less") # confidence interval on the weight gain... mean_diff + qt(.975, degree_free) * c(-SE, SE) # more like the freshman 1.5 lbs! # sanity check # t.test(final_weight, initial_weight, paired = TRUE)$conf.int
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.