knitr::opts_chunk$set(echo = FALSE) library(DiagrammeR) library(knitr) library(kableExtra) library(FD) library(vegan) library(datasets) library(datasets) library(FSA) library(dplyr) library(edcpR) library(car)
Defining the correct statistical test based on different case studies and implement this in R.
unloadNamespace("plant_growth") rm("plant_growth")
# First update package library(edcpR) data(plant_growth, package = "edcpR")
We study the effect of a new fertilizer (NPK) on the growth of maize. We measured the yield in 10 control plots (Ctrl) vs 10 plots that received the fertilizer treatment.
Research question: What is the effect of fertilizer on the biomass yield of maize?
data(plant_growth, package = "edcpR") colnames(plant_growth) <- c("Yield", "Treatment") head(plant_growth)
str(plant_growth) plant_growth$Treatment <- as.factor(plant_growth$Treatment) str(plant_growth)
# Difference in a quantitative variable between two groups # Sample size = 10 # Too small for a parametric test --> non-parametric test preferred! # Independent samples? --> check! # Independent two-group Mann-Whitney test wilcox.test(Yield ~ Treatment, data = plant_growth)
m<-mean(plant_growth$Yield) std<-sqrt(var(plant_growth$Yield))
# We can check if normality holds hist(plant_growth$Yield ~ plant_growth$Treatment, prob=TRUE, xlab="Yield",main="Histogram of Yield")
library(datasets) data(iris, package = "datasets")
We studied the differences in flower dimensions between three iris species (Iris setosa, Iris versicolor, Iris virginica). We sampled 10 individuals of each species and measured the sepal and petal length and width.
Research question: Do the three iris species have different sepal lengths?
unloadNamespace("iris") rm("iris")
data(iris, package = "datasets") head(iris) iris <- iris[, c(1,5)] colnames(iris) <- c("sepal_length", "Species")
str(iris) iris$Species <- as.factor(iris$Species)
# Difference in a quantitative variable between 3 samples # Total observations = 150 # 50 observations per --> assumptions # Independent samples? --> check!
par(mfrow = c(1,3)) hist(iris %>% filter(Species == "setosa") %>% pull(sepal_length), prob=TRUE, xlab="Sepal lenghth",main="Histogram of sepal length") hist(iris %>% filter(Species == "versicolor") %>% pull(sepal_length) , prob=TRUE, xlab="Sepal lenghth",main="Histogram of sepal length") hist(iris %>% filter(Species == "virginica") %>% pull(sepal_length) , prob=TRUE, xlab="Sepal lenghth",main="Histogram of sepal length")
par(mfrow = c(1,3)) qqnorm(iris %>% filter(Species == "setosa") %>% pull(sepal_length), xlab="Sepal lenghth",main="Histogram of sepal length") qqline(iris %>% filter(Species == "setosa") %>% pull(sepal_length), col = "red", lwd = 2) qqnorm(iris %>% filter(Species == "versicolor") %>% pull(sepal_length), xlab="Sepal lenghth",main="Histogram of sepal length") qqline(iris %>% filter(Species == "versicolor") %>% pull(sepal_length), col = "red", lwd = 2) qqnorm(iris %>% filter(Species == "virginica") %>% pull(sepal_length), xlab="Sepal lenghth",main="Histogram of sepal length") qqline(iris %>% filter(Species == "virginica") %>% pull(sepal_length), col = "red", lwd = 2)
# Statistical test for normality (Shapiro-Wilk) # Not recommended, because it is too strict shapiro.test(iris %>% filter(Species == "setosa") %>% pull(sepal_length)) shapiro.test(iris %>% filter(Species == "versicolor") %>% pull(sepal_length)) shapiro.test(iris %>% filter(Species == "virginica") %>% pull(sepal_length))
# We can check if homoscedasticity holds # Strong evidence for normal distribution --> Bartlets test has better performance bartlett.test(sepal_length ~ Species, data = iris)
# Non-parametric tests: Kruskall-Wallis kruskal.test(sepal_length ~ Species, data = iris)
# Post-hoc testing to check between samples library(FSA) # Benjamini-Hochberg adjustment, not Bonferonni (too strict) dunnTest(sepal_length ~ Species, data = iris, method= "bh")
# Let's consider homoscedasticity holds ... # Compute the ANOVA res.aov <- aov(sepal_length ~ Species, data = iris) summary(res.aov)
# Post-hoc testing to check between samples # Tuckey post-hoc test TukeyHSD(res.aov)
data(meerdaal, package = "edcpR")
We have vegetation records in Meerdaal forest from 50 years ago. We know the exact location of the 13 plots and sample them again in 2018.
Research question: Is there a difference in species richness between old and recent sampling of forest fragments in Meerdaal forest?
data(meerdaal, package = "edcpR") head(meerdaal) colnames(meerdaal) <- c("Plot", "Old", "Recent") # Difference in a quantitative variable between 2 paired samples # Sample size = 13 # Too small for a parametric test --> non-parametric test # Assumption: independent samples --> false, it is resampling of the same plots!
# Wilcoxon signed rank test wilcox.test(meerdaal$Old, meerdaal$Recent, paired = TRUE)
data(kembel_sp, package ="edcpR")
We measured the abundance of species in percent cover of different plant species in 20x20m quadrats in grasslands in plots in two habitat types in Alberta.
Research question: Is there a difference in total species richness between two grassland types?
# We are comparing gamma diversity! # If we want to compare the mean diversity per plot (alfa diversity, use t-test) # Chao estimators of two groups with t-test (see Session 3: Diversity) library(dplyr) # Extract plots of mixed grasslands mixed <- kembel_sp %>% filter(grepl("mix", plot)) %>% select(-plot) # Extract plots with fescue grass fes <- kembel_sp %>% filter(grepl("fes", plot)) %>% select(-plot)
library(vegan) # Get the chao-estimators for each grassland mixed_gamma <- specpool(mixed) fes_gamma <- specpool(fes) chao_mixed <- mixed_gamma$chao chao_mixed_se <- mixed_gamma$chao.se chao_fes <- fes_gamma$chao chao_fes_se <- fes_gamma$chao.se
# Take 100 random samples from a normal distribution! sample_mixed <- rnorm(100, mean = chao_mixed, sd = chao_mixed_se) sample_fes <- rnorm(100, mean = chao_fes, sd = chao_fes_se)
# Perform a t-test t.test(sample_mixed, sample_fes, paired = FALSE)
Available on Toledo under Assignments (Assignment 4). Submit:
Upload everything before December 1st, 12 pm (= at noon!).
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.