knitr::opts_chunk$set(echo = TRUE,
                      strip.white = FALSE)

Introduction

Original Data

Skibiel et al 2013. Journal of Animal Ecology. The evolution of the nutrient composition of mammalian milks. 82: 1254-1264.

Load data

#setwd("C:/Users/lisanjie2/Desktop/TEACHING/1_STATS_CalU/1_STAT_CalU_2016_by_NLB/1_STAT_Duq_2017/HOMEWORK/Duq_Biostats_Homework_4_Milk_regression/homework4_RMD_vs2")

setwd("C:/Users/lisanjie2/Dropbox/Duq_BioStats2017/week6_regressionIII/wk6_Rmd")

dat2 <- read.csv("milk_subset.csv")

Relative lactation duration

dat2$rel.lactation <- with(dat2, lacatation.months/(gestation.month + 
                                                      lacatation.months))

Transformation

As the authors did, Log transform relative lactation

dat2$rel.lact.log10 <- log10(dat2$rel.lactation)

Plotting

Plot log10(fat) ~ rel.lactation

library(ggplot2)

qplot(y = fat.log10,
      x = rel.lactation,
      data = dat2) + 
  ggtitle("no transformation of x")

Add Colors

qplot(y = fat.log10,
      x = rel.lactation,
      data = dat2,
      col = diet) + 
  ggtitle("no transformation of x")

Combine Diet categories: pool herbivores and omnivores

Make a copy of diet column

diet2 <- dat2$diet

Look at the column with summary()

summary(diet2)

Try to recode the data using ifelse()

#if the row contains "carnivores", 
#leave it as carnivore,
#else change it to other
#so, ominivore and herbivore now = "other"
diet2 <- ifelse(diet2 == "carnivore", 
                "carnivore",
                "other")

Look at new column

summary(diet2)

What happened?

The factor() command will turn it back to categorical data

diet2 <- factor(diet2)

summary(diet2)

Add diet2 to dataframe

dat2$diet2 <- diet2

Plot new data w/ combined diet category

qplot(y = fat.log10,
      x = rel.lactation,
      data = dat2,
      col = diet2,
      shape = diet2) + 
  ggtitle("no transformation of x")

Plot data with smoother

Add smoother

Default is for loess smooths

qplot(y = fat.log10,
      x = rel.lactation,
      data = dat2,
      col = diet2,
      shape = diet2) +
  geom_smooth()

Add regression line

qplot(y = fat.log10,
      x = rel.lactation,
      data = dat2,
      col = diet2,
      shape = diet2) +
  geom_smooth(method = lm)

What do you notice about the slopes?

Model an interaction

Null model:

m.null <- lm(fat.log10 ~ 1, data = dat2)

#fitted intercept 
coef(m.null)

#mean of raw data
mean(dat2$fat.log10)

#summary of model
summary(m.null)

Plot it in base R

plot(fat.log10 ~ rel.lactation, data = dat2)
abline(m.null, col = 2)

Plot it in ggplot

qplot(y = fat.log10,
      x = rel.lactation,
      data = dat2) +
  geom_hline(yintercept = coef(m.null), col = 2)

Questions:

ANSWERS:

...

Model w/ a Single slope

Fit the model

m.lactation <- lm(fat.log10 ~ rel.lactation, 
                  data = dat2)

Plot it

plot(fat.log10 ~ rel.lactation, data = dat2)
abline(m.lactation)

Plot it with intercept visible

plot(fat.log10 ~ rel.lactation, 
     data = dat2,
     xlim = c(0, 1))
abline(m.lactation)
abline(v = 0, col = 2)
arrows(y0= coef(m.lactation)[1],
       y1= coef(m.lactation)[1],
       x0 = 0.05,x1 = 0, lwd = 3, col = 2, length = 0.1)

Model with Two intercepts

m.diet2 <- lm(fat.log10 ~ diet2, data = dat2)

coef(m.diet2)

Load my plotting function

source("fnxn_plot_ANCOVA.R")

Plot it

par(mfrow = c(1,1))
plot.ANCOVA(m.diet2, raw.data = dat2,
x.axis = "rel.lactation")

Question

#The "intercept"
coef(m.diet2)[1]

#diet2other parameter 
coef(m.diet2)[2]

#Combined
coef(m.diet2)[1] + coef(m.diet2)[2]

Alternate parameterization

Fit alternative parameteraization

m.diet2.alt <- lm(fat.log10 ~ -1 + diet2, data = dat2)

Compare coefficients

# original
coef(m.diet2)

#alternate
coef(m.diet2.alt)

#for original, add int + diet2other
coef(m.diet2)[1] + coef(m.diet2)[2]

Model w/ Two parrallel slopes

m.lact.diet <- lm(fat.log10 ~ rel.lactation + 
                    diet2, data = dat2)

coef(m.lact.diet)

Plot it

plot.ANCOVA(m.lact.diet)

Model Two non-parralel slopes

Fully written out interaction model

m.lact.X.diet <- lm(fat.log10 ~ rel.lactation + 
                              diet2 +
                              rel.lactation*diet2,
                    data = dat2)

coef(m.lact.X.diet)

Equivalent syntax for interaction model

m.lact.X.diet <- lm(fat.log10 ~ rel.lactation*diet2, 
                    data = dat2)

coef(m.lact.X.diet)

Plot interaction model

plot.ANCOVA(m.lact.X.diet)

Plot all four models

par(mfrow = c(2,2),
    mar = c(3,2,2,1))

#1 slope
plot(fat.log10 ~ rel.lactation, data = dat2)
abline(m.lactation)

#2 intercepts
plot.ANCOVA(m.diet2, raw.data = dat2,
x.axis = "rel.lactation")

#parallel
plot.ANCOVA(m.lact.diet)

#not-par
plot.ANCOVA(m.lact.X.diet)

Compare models with AICtab

library(bbmle)
ICtab(type =  "AICc",
      base = T,
      logLik = T,
      m.null, 
      m.lactation,
      m.diet2,
      m.lact.diet,
      m.lact.X.diet)

QUESTION: How do we interpret these results?

Interpretation

Do AIC results match p values?

Compare the two "Best" models with anova

"interaction" vs "additive" models

anova(m.lact.X.diet,
      m.lact.diet)

Compare the 2nd and 3rd "Best" models

"additive" vs. diet only (categorical variable)

anova(m.diet2,
      m.lact.diet)

QUESTION How do these results compare to the AIC results

ANSWER

Compare R2 values of each model

data.frame(mod = c("m.lact.X.diet",
                   "m.lact.diet",
                   "m.diet2",
                   "m.lactation"),
R2 = round(c(summary(m.lact.X.diet)$r.squared
,summary(m.lact.diet)$r.squared
,summary(m.diet2)$r.squared
,summary(m.lactation)$r.squared),2))

QUESTION Do the changes in R2 match the AIC / p-value story?

ANSWER

Look at residuals of "best"" model

par(mfrow = c(2,2))
plot(m.lact.diet)

QUESTION Is there any evidence of

ANSWER

Plot Australia and other odd points against "best" model

par(mfrow = c(1,1),
    mar = c(4,4,1,1))
plot.ANCOVA(m.lact.X.diet)

i.Aust <- which(dat2$Australia == "Australia")
points(fat.log10 ~ rel.lactation, data = dat2[i.Aust,], pch = 4)

i.lemur <- which(dat2$lemurs == "lemur")
points(fat.log10 ~ rel.lactation, data = dat2[i.lemur,], pch = 5)

i.odd.ung <- which(dat2$order == "Perrissodactyla")
points(fat.log10 ~ rel.lactation, data = dat2[i.odd.ung,], pch = 6)

text(y = dat2$fat.log10[86], 
     x = dat2$rel.lactation[86],
labels = 86,adj = 1)

Label large residuals on scatter plot

We can label the points flagged in the residual plots like this

The indices of the large residuals

outliers <- c(86,107,108)

Plot scatterplot w/ model and label the points with large residuals

par(mfrow = c(1,1),
    mar = c(4,4,1,1))
plot.ANCOVA(m.lact.X.diet)



text(y = dat2$fat.log10[outliers], 
     x = dat2$rel.lactation[outliers],
labels = outliers,adj = 1)

Compare data to residual plot

Plot scatter plot with regression lines vs. resid vs. fitted. fitted plot

#set par to 1 x 2
par(mfrow = c(1,2),
    mar = c(4,4,2,1))

#Plot model
plot.ANCOVA(m.lact.X.diet)

outliers <- c(86,107,108)

#plot text 
text(y = dat2$fat.log10[outliers], 
     x = dat2$rel.lactation[outliers],
labels = outliers,adj = 1)

#plot resid vs fitted plot
## "which = 1" seletcs the desired plot
plot(m.lact.X.diet,
     which = 1)

#horizontal line for comparison
abline(h=0.0,lty =2)

Compare models w/ANOVA

Compare nested models directly

#Null vs. 1 term
## Null vs. lactation (continous)
anova(m.null,
      m.lactation)

## Null vs. diet (categorical)
anova(m.null,
      m.diet2)


#Model w/1 term vs. 


#Full model w/* vs. model w/+
anova(m.lact.diet,
      m.lact.X.diet)



anova(m.lactation,
      m.lact.diet)

anova(m.diet2,
      m.lact.diet)

Call R2, F by hand

R2 from summary command

summary(m.lact.X.diet)$r.squared

Compare null to full model

anova(m.null,
      m.lact.X.diet)

Copy the values

SS <- 11.68
RSS.null <- 26.906

Compute R2

SS/RSS.null

Getting this information from using just anova(model)

anova() on just full model gives slightly different output

anova(m.lact.X.diet)

If we add up the rows related to our parameters, we get the total SS for our full model

#Model terms
0.2886+8.6593+2.7321

If add up everything in the Sum of sq columns, we get the total sum of squares, aka the sum of squares from the null model

#Model terms + residuals
15.2258+0.2886+8.6593+2.7321

Dividing these we can get R2

(0.2886+8.6593+2.7321)/(15.2258+0.2886+8.6593+2.7321)

F stat

summary(m.lact.X.diet)
anova(m.lact.X.diet)
anova(m.lact.X.diet,
      m.lact.diet)
#Null vs. Full model w/interaction
anova(m.lact.X.diet,
      m.null)

Sum of squares by hand

y <- dat2$fat.log10
mean.y <- mean(dat2$fat.log10)

n <- length(dat2$fat.log10)
ss.y <- sum((y-mean.y)^2)


#equivalent
var.y <- var(dat2$fat.log10)

var.y*(n-1)

Sum of squares from null model

##SS from null model
sum(resid(m.null)^2)

##SS from anova table of null model
anova(m.null)$"Sum Sq"


##Mean sq from null model
anova(m.null)$"Sum Sq"/anova(m.null)$Df

Sum of squares from full model

Add up entire column

sum(anova(m.lact.X.diet)$"Sum Sq")

anova(m.lact.X.diet)$"F value"[3]
anova(m.lact.X.diet,
      m.lact.diet)$"F"[2]
anova(m.lact.X.diet,m.lact.diet)

F = (RSS.null - RSS.alt) / [RSS.alt/(df.alt)]

anova(m.null)$"Sum Sq"
a<-sum(anova(m.lact.X.diet)$"Sum Sq"[1:3])-sum(anova(m.lact.X.diet)$"Sum Sq"[1:2])


b<-sum(anova(m.lact.X.diet)$"Sum Sq"[1:3])/123

a/b
(15.514-15.226)/(15.226/126)


brouwern/mammalsmilk documentation built on May 17, 2019, 10:38 a.m.