Cartesian product of variable values for prediction


Plotting predicted values for a model often requires computing predicted values on a grid of predictor values other than the original data set. Categorical variables (character or factor) must be of the same form (same set of character values in the case of character variables and the same levels in the same order in the case of factor variables) as they appear in the model data frame on which the model was fitted. pred.grid facilitates the process, in comparison with expand.grid, by allowing references to variables in the model data frame by name or by specifying a vector of values in the case of numeric predictors.





names of variables or named arguments with values. The arguments that are names are evaluated in the environment, which will typically be a data frame supplied as an environment with the with function. Named arguments are evaluted in the usual way. The unique values of each argument are supplied to expand.grid to create a data frame whose rows are the Cartesian product of the unique values of the input. See comments on usage in the examples.


hs <- within(
      id <- paste(Sector, school) %>% 
         as.factor %>% 
         reorder(ses + I(Sector == 'Catholic')*1000) 
      ses_mean <- capply(ses, id, mean, na.rm = TRUE)  
      mathach_mean <- capply(mathach, id, mean, na.rm = TRUE)  

fit1 <- lm(mathach ~ (ses + I(ses^2)) * id, hs)
pred1 <- with(hs, pred.grid(id, ses = seq(-3,3, .1))) 
pred1$fit1 <- predict(fit1, newdata = pred1) 

fit2 <- lm(mathach ~ (ses + I(ses^2))* Sector, hs)
# add Sector to pred1
head(pred1); dim(pred1)
pred2 <- merge(
           up(hs, ~id), 
           by = 'id', 
           all.x = TRUE)
head(pred2); dim(pred2)
pred2$fit2 <- predict(fit2, newdata = pred2) 

# Existing methods allow you to graph lines fitted
# within each panel

p <- xyplot(mathach ~ ses | id, hs, groups = Sex, 
   layout = c(7,6),
   alpha = c(.8, .5),
   auto.key = list(space = 'right'),
   between = list(y = rep(c(0,.4,0), c(2,1,2))),
   skip = rep(c(F,T,F), c(19,2,20)))
p + glayer(panel.smoother(...)) # 

# Using 'pred.grid' and 'expand.grid' makes it easier
# to graph lines fitted with models

td(pch = 1, cex = .5)
p + 
xyplot(fit1 ~ ses | id, pred1, type = 'l')

p + 
xyplot(fit1 ~ ses | id, pred1, type = 'l') +
xyplot(fit2 ~ ses | id, pred2, type = 'l', col = 'black') # ?????

pred2 <- sortdf(pred2, ~ ses)
p + 
xyplot(fit1 ~ ses | id, pred1, type = 'l') +
xyplot(fit2 ~ ses | id, pred2, type = 'l', col = 'black') # zig-zagging gone

# Referring to other variables in panel functions

xyplot(mathach ~ ses | id, hs, groups = Sex, 
   layout = c(7,6),
   cex = .4,
   ses_mean = hs$ses_mean,
   mathach_mean = hs$mathach_mean,
   par.strip.text = list(cex = .7),
   auto.key = list(space = 'right'),
   between = list(y = rep(c(0,.4,0), c(2,1,2))),
   subscripts = TRUE,
   skip = rep(c(F,T,F), c(19,2,20))) +
 glayer(panel.smoother(..., se = FALSE, lwd = 2, lty =1)) +
 layer(panel.abline(v=ses_mean[subscripts],...,col = 'gray')) +  
 layer(panel.abline(h=mathach_mean[subscripts],...,col = 'gray'))  
p + glayer(panel.abline(v=ses_mean))
p + xyplot(fit1 ~ ses | id, pred1, type = 'l')    

