coding: Meaningful contrasts for factors

View source: R/factors.R

codingR Documentation

Meaningful contrasts for factors

Description

Generate coding values for a factor so factor contrasts have specified interpretations

Usage

coding(E, intercept = 1/ncol(E))

Arguments

E

a k-1 \times k matrix of linearly independent factor level contrasts to be estimated in regression output. The row names of E are the names of the estimates in the regression output. The rows of E must also be linearly independent of the vector of coefficients for the intercept argument. Usually, the rows of E are contrasts summing to 0, and the intercept consists of a row of constants each equal to 1/k.

intercept

coefficients for a linear combination of of factor levels that defines the intercept. Default the average of factor levels.

Examples

days <- c('Sat','Sun','Mon','Tue','Wed','Thu','Fri')
data <- expand.grid(id = 1:100, day = factor(days)) 
data <- within(
  data,
  {
     y <- 10*rnorm(100)[id] + rnorm(7)[day] + rnorm(days)
  })
require(nlme)
fit <- lme(y ~ day, data, random = ~ 1 |id)
summary(fit)
#
# In the output above, the intercept is the expected value 
# for the the 'reference level', which is the level omitted in
# the list, i.e. 'Fri' because it is first alphabetically.
# The other values are comparisons of each day with
# Friday.
#
data$day <- factor(data$day, 
            levels = c('Sat','Sun','Mon',
                     'Tue','Wed','Thu','Fri'))
E <- rbind(
    ":Weekend-Weekday" = c(.5,.5 , -.2, -.2, -.2, -.2, -.2),
    ":Sun-Sat"   = c(-1, 1 , 0, 0, 0, 0, 0),
    ":Tue-Mon"   = c(0, 0, -1, 1, 0, 0, 0),
    ":Wed-Tue"   = c(0, 0, 0, -1, 1, 0, 0),
    ":Thu-Wed"   = c(0, 0, 0,  0,  -1, 1, 0),
    ":Fri-Thu"   = c(0, 0,  0,  0,  0, -1, 1)
)
colnames(E) <- levels(data$day)
require(spida2)
contrasts(data$day) <- coding(E)
fit <- lme(y ~ day, data, random = ~ 1 | id)
summary(fit)
pred <- with(data, pred.grid(day = day))
pred
contrasts(pred$day) # using pred.grid preserves contrasts
pred$fit <- predict(fit, newdata = pred, level = 0)
contrasts(pred$day)
require(lattice)
require(latticeExtra)
require(latex2exp)
xyplot(fit ~ day, pred, type = 'b')

ww <- waldf(fit, pred = pred)
contrasts(ww$day)
xyplot(coef ~ day, ww, type = 'b')
xyplot(coef ~ day, ww, type = 'b',
  fit = ww$coef,
  ylab = TeX('Estimated y $\\pm$ SE'),
  upper = ww$coef + ww$se,
  lower = ww$coef - ww$se,
  subscripts = TRUE ) +
  layer(panel.band(...))
  
# A pooled analysis would produce different results
# for contrasts.
# Because days are balanced within subjects (`id`)
# The estimates are the same but the standard
# errors and p-value are different.

fit.pooled <- lm(y ~ day, data)   
summary(fit.pooled)

# The default treatment contrasts correpond to the
# following estimates

E <- rbind(
  'Sun-Sat' = c(-1,1,0,0,0,0,0),
  'Mon-Sat' = c(-1,0,1,0,0,0,0),
  'Tue-Sat' = c(-1,0,0,1,0,0,0),
  'Wed-Sat' = c(-1,0,0,0,1,0,0),
  'Thu-Sat' = c(-1,0,0,0,0,1,0),
  'Fri-Sat' = c(-1,0,0,0,0,0,1)
)
Cmat <- coding(E, intercept = c(1,0,0,0,0,0,0))
Cmat
# which is the familiar 'treatment' coding matrix

# A quick way to generate the contrast matrix
# from the E matrix without using the "spida2" 
# package:
Cmat <- solve(rbind(1/ncol(E), E))[, -1]
contrasts(data$day) <- Cmat

gmonette/spida2 documentation built on June 12, 2025, 9:44 p.m.