knitr::opts_chunk$set(echo = TRUE, strip.white = FALSE)
Skibiel et al 2013. Journal of Animal Ecology. The evolution of the nutrient composition of mammalian milks. 82: 1254-1264.
#setwd("C:/Users/lisanjie2/Desktop/TEACHING/1_STATS_CalU/1_STAT_CalU_2016_by_NLB/1_STAT_Duq_2017/HOMEWORK/Duq_Biostats_Homework_4_Milk_regression/homework4_RMD_vs2") setwd("C:/Users/lisanjie2/Dropbox/Duq_BioStats2017/week6_regressionIII/wk6_Rmd") dat2 <- read.csv("milk_subset.csv")
dat2$rel.lactation <- with(dat2, lacatation.months/(gestation.month + lacatation.months))
As the authors did, Log transform relative lactation
dat2$rel.lact.log10 <- log10(dat2$rel.lactation)
library(ggplot2) qplot(y = fat.log10, x = rel.lactation, data = dat2) + ggtitle("no transformation of x")
qplot(y = fat.log10, x = rel.lactation, data = dat2, col = diet) + ggtitle("no transformation of x")
Make a copy of diet column
diet2 <- dat2$diet
Look at the column with summary()
summary(diet2)
Try to recode the data using ifelse()
#if the row contains "carnivores", #leave it as carnivore, #else change it to other #so, ominivore and herbivore now = "other" diet2 <- ifelse(diet2 == "carnivore", "carnivore", "other")
Look at new column
summary(diet2)
What happened?
The factor() command will turn it back to categorical data
diet2 <- factor(diet2) summary(diet2)
Add diet2 to dataframe
dat2$diet2 <- diet2
qplot(y = fat.log10, x = rel.lactation, data = dat2, col = diet2, shape = diet2) + ggtitle("no transformation of x")
Default is for loess smooths
qplot(y = fat.log10, x = rel.lactation, data = dat2, col = diet2, shape = diet2) + geom_smooth()
qplot(y = fat.log10, x = rel.lactation, data = dat2, col = diet2, shape = diet2) + geom_smooth(method = lm)
What do you notice about the slopes?
m.null <- lm(fat.log10 ~ 1, data = dat2) #fitted intercept coef(m.null) #mean of raw data mean(dat2$fat.log10) #summary of model summary(m.null)
Plot it in base R
plot(fat.log10 ~ rel.lactation, data = dat2) abline(m.null, col = 2)
Plot it in ggplot
qplot(y = fat.log10, x = rel.lactation, data = dat2) + geom_hline(yintercept = coef(m.null), col = 2)
Questions:
ANSWERS:
...
Fit the model
m.lactation <- lm(fat.log10 ~ rel.lactation, data = dat2)
Plot it
plot(fat.log10 ~ rel.lactation, data = dat2) abline(m.lactation)
Plot it with intercept visible
plot(fat.log10 ~ rel.lactation, data = dat2, xlim = c(0, 1)) abline(m.lactation) abline(v = 0, col = 2) arrows(y0= coef(m.lactation)[1], y1= coef(m.lactation)[1], x0 = 0.05,x1 = 0, lwd = 3, col = 2, length = 0.1)
m.diet2 <- lm(fat.log10 ~ diet2, data = dat2) coef(m.diet2)
source("fnxn_plot_ANCOVA.R")
Plot it
par(mfrow = c(1,1)) plot.ANCOVA(m.diet2, raw.data = dat2, x.axis = "rel.lactation")
Question
#The "intercept" coef(m.diet2)[1] #diet2other parameter coef(m.diet2)[2] #Combined coef(m.diet2)[1] + coef(m.diet2)[2]
m.diet2.alt <- lm(fat.log10 ~ -1 + diet2, data = dat2)
# original coef(m.diet2) #alternate coef(m.diet2.alt) #for original, add int + diet2other coef(m.diet2)[1] + coef(m.diet2)[2]
m.lact.diet <- lm(fat.log10 ~ rel.lactation + diet2, data = dat2) coef(m.lact.diet)
Plot it
plot.ANCOVA(m.lact.diet)
m.lact.X.diet <- lm(fat.log10 ~ rel.lactation + diet2 + rel.lactation*diet2, data = dat2) coef(m.lact.X.diet)
m.lact.X.diet <- lm(fat.log10 ~ rel.lactation*diet2, data = dat2) coef(m.lact.X.diet)
plot.ANCOVA(m.lact.X.diet)
par(mfrow = c(2,2), mar = c(3,2,2,1)) #1 slope plot(fat.log10 ~ rel.lactation, data = dat2) abline(m.lactation) #2 intercepts plot.ANCOVA(m.diet2, raw.data = dat2, x.axis = "rel.lactation") #parallel plot.ANCOVA(m.lact.diet) #not-par plot.ANCOVA(m.lact.X.diet)
library(bbmle) ICtab(type = "AICc", base = T, logLik = T, m.null, m.lactation, m.diet2, m.lact.diet, m.lact.X.diet)
QUESTION: How do we interpret these results?
Interpretation
"interaction" vs "additive" models
anova(m.lact.X.diet,
m.lact.diet)
"additive" vs. diet only (categorical variable)
anova(m.diet2,
m.lact.diet)
QUESTION How do these results compare to the AIC results
ANSWER
data.frame(mod = c("m.lact.X.diet", "m.lact.diet", "m.diet2", "m.lactation"), R2 = round(c(summary(m.lact.X.diet)$r.squared ,summary(m.lact.diet)$r.squared ,summary(m.diet2)$r.squared ,summary(m.lactation)$r.squared),2))
QUESTION Do the changes in R2 match the AIC / p-value story?
ANSWER
par(mfrow = c(2,2)) plot(m.lact.diet)
QUESTION Is there any evidence of
ANSWER
par(mfrow = c(1,1), mar = c(4,4,1,1)) plot.ANCOVA(m.lact.X.diet) i.Aust <- which(dat2$Australia == "Australia") points(fat.log10 ~ rel.lactation, data = dat2[i.Aust,], pch = 4) i.lemur <- which(dat2$lemurs == "lemur") points(fat.log10 ~ rel.lactation, data = dat2[i.lemur,], pch = 5) i.odd.ung <- which(dat2$order == "Perrissodactyla") points(fat.log10 ~ rel.lactation, data = dat2[i.odd.ung,], pch = 6) text(y = dat2$fat.log10[86], x = dat2$rel.lactation[86], labels = 86,adj = 1)
We can label the points flagged in the residual plots like this
The indices of the large residuals
outliers <- c(86,107,108)
Plot scatterplot w/ model and label the points with large residuals
par(mfrow = c(1,1), mar = c(4,4,1,1)) plot.ANCOVA(m.lact.X.diet) text(y = dat2$fat.log10[outliers], x = dat2$rel.lactation[outliers], labels = outliers,adj = 1)
Plot scatter plot with regression lines vs. resid vs. fitted. fitted plot
#set par to 1 x 2 par(mfrow = c(1,2), mar = c(4,4,2,1)) #Plot model plot.ANCOVA(m.lact.X.diet) outliers <- c(86,107,108) #plot text text(y = dat2$fat.log10[outliers], x = dat2$rel.lactation[outliers], labels = outliers,adj = 1) #plot resid vs fitted plot ## "which = 1" seletcs the desired plot plot(m.lact.X.diet, which = 1) #horizontal line for comparison abline(h=0.0,lty =2)
Compare nested models directly
#Null vs. 1 term ## Null vs. lactation (continous) anova(m.null, m.lactation) ## Null vs. diet (categorical) anova(m.null, m.diet2) #Model w/1 term vs. #Full model w/* vs. model w/+ anova(m.lact.diet, m.lact.X.diet) anova(m.lactation, m.lact.diet) anova(m.diet2, m.lact.diet)
R2 from summary command
summary(m.lact.X.diet)$r.squared
Compare null to full model
anova(m.null,
m.lact.X.diet)
Copy the values
SS <- 11.68 RSS.null <- 26.906
Compute R2
SS/RSS.null
anova() on just full model gives slightly different output
anova(m.lact.X.diet)
If we add up the rows related to our parameters, we get the total SS for our full model
#Model terms 0.2886+8.6593+2.7321
If add up everything in the Sum of sq columns, we get the total sum of squares, aka the sum of squares from the null model
#Model terms + residuals 15.2258+0.2886+8.6593+2.7321
Dividing these we can get R2
(0.2886+8.6593+2.7321)/(15.2258+0.2886+8.6593+2.7321)
summary(m.lact.X.diet)
anova(m.lact.X.diet)
anova(m.lact.X.diet,
m.lact.diet)
#Null vs. Full model w/interaction anova(m.lact.X.diet, m.null)
y <- dat2$fat.log10 mean.y <- mean(dat2$fat.log10) n <- length(dat2$fat.log10) ss.y <- sum((y-mean.y)^2) #equivalent var.y <- var(dat2$fat.log10) var.y*(n-1)
##SS from null model sum(resid(m.null)^2) ##SS from anova table of null model anova(m.null)$"Sum Sq" ##Mean sq from null model anova(m.null)$"Sum Sq"/anova(m.null)$Df
Add up entire column
sum(anova(m.lact.X.diet)$"Sum Sq")
anova(m.lact.X.diet)$"F value"[3] anova(m.lact.X.diet, m.lact.diet)$"F"[2]
anova(m.lact.X.diet,m.lact.diet)
F = (RSS.null - RSS.alt) / [RSS.alt/(df.alt)]
anova(m.null)$"Sum Sq"
a<-sum(anova(m.lact.X.diet)$"Sum Sq"[1:3])-sum(anova(m.lact.X.diet)$"Sum Sq"[1:2]) b<-sum(anova(m.lact.X.diet)$"Sum Sq"[1:3])/123 a/b
(15.514-15.226)/(15.226/126)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.