gjamVignette.R
In gjam: Generalized Joint Attribute Modeling

## ----outform, echo=F----------------------------------------------------------
insertPlot <- function(file, caption){
#    outputFormat = knitr::opts_knit$get("rmarkdown.pandoc.to")
#  if(outputFormat == 'latex')
#    paste("![ ", caption, " ](", file, ")",sep="")
}
bigskip <- function(){
#  outputFormat = knitr::opts_knit$get("rmarkdown.pandoc.to")
#  if(outputFormat == 'latex')
#    "\\bigskip"
#  else
    "<br>"
}

## ----fig1, fig.width = 5.9, fig.height = 4, echo = FALSE----------------------
  sig    <- .9
  mu     <- 3.1
  offset <- -2
  
  par(mfrow = c(1, 2), bty = 'n', mar = c(4, 5, 3, .1), cex=1.2, family='serif')
  part <- c(0, 2.2, 3.3, 4.5, 6.6)
  w    <- seq(-1, 7, length = 1000)
  dw   <- dnorm(w, mu, sig)
  dp   <- dw[ findInterval(part, w) ]
  pw   <- pnorm(part, mu, sig)
  pw[-1] <- diff(pw)
  
  plot(w, 2*dw - .5, type = 'l', ylim = c(-.5, 4), yaxt = 'n', 
       ylab = expression(paste(italic(y), '|', italic(w), ', ', bold(p), 
                               sep = '')), 
       xlab = expression(paste(italic(w), '|', bold(x), ', ', bold(beta), 
                              ', ', bold(Sigma), sep = '')), 
       xlim = c(offset, 7), lwd = 2)
  axis(2, at = c(0:5))
  
  db <- .15
  int <- 4
  
  polygon( c(w, rev(w)), 2*c(dw, w*0) - .5, col = 'grey', lwd = 2)
  lines(c(-1, part[1]), c(0, 0), lwd = 2)
  
  for(j in 1:(length(part))){
    
    lines( part[j:(j+1)], c(j, j), lwd = 3)
    ww <- which(w >= part[j] & w <= part[j+1])
    
    if(j == 3){
      w1 <- ww[1]
      w2 <- max(ww)
      arrows( mean(w[ww]), 2*max(dw[ww]) - .4, mean(w[ww]), 
              j - .4, angle = 20, lwd = 5, col = 'grey', length = .2)
      arrows( w[w1] - .5 , j , -.7, j , angle = 20, 
              lwd = 5, col = 'grey', length = .2)
      text( c(w[w1], w[w2]), c(3.3, 3.3), 
            expression(italic(p)[4], italic(p)[5]), cex=.9)
      text( w[w2] + .3, .6, expression( italic(w)[italic(is)] ))
      text( 0, 3.5, expression( italic(y)[italic(is)] ))
    }
    
    coll <- 'white'
    if(j == int)coll <- 'grey'
    rect( offset, j - 1 - db, 2*pw[j] + offset, j - 1 + db, 
          col = coll, border = 'black', lwd = 2)
  }
  
  ww <- which(w >= part[int - 1] & w <= part[int])
  abline(h = -.5, lwd = 2)
  
  title('a) Data generation', adj = 0, font.main = 1, font.lab = 1, cex=.8)
  
  plot(w, 2*dw - .5, type = 'l', ylim = c(-.5, 4), yaxt = 'n', 
       ylab = expression(italic(y)), 
       xlab = expression(paste(italic(w), '|', italic(y), ', ', bold(p), sep = '')), 
       xlim = c(offset, 7), lwd = 2, col = 'grey')
  axis(2, at = c(0:5))
  
  abline(h = -.5, lwd = 2, col = 'grey')
  
  wseq <- c(-10,part)
  for(j in 1:(length(part))){
    
    coll <- 'white'
    border <- 'grey'
    
    if(j == int){
      coll <- 'grey'
      border <- 'black'
      rect( offset, j - 1 - db, 2*pw[j] + offset, j - 1 + db, 
            col = 'black', border = 'black')
    }
    lines( part[j:(j+1)], c(j, j), lwd = 3)
    lines(part[c(j, j)], 2*c(0, dp[j])-.5, col = 'grey')
  }
  
  lines(c(-1, part[1]), c(0, 0), lwd = 2)
  ww <- which(w >= part[int - 1] & w <= part[int])
  polygon( w[c(ww, rev(ww))], 2*c(dw[ww], ww*0) - .5, col = 'grey', lwd = 2)
  
  arrows( mean(w[ww]),  int - 1.3, mean(w[ww]),  2*max(dw) - .5, 
          angle = 20, lwd = 5, col = 'grey', length = .2)
  arrows( -.5,  int - 1, min(w[ww]) - .4, int - 1, angle = 20, 
          lwd = 5, col = 'grey', length = .2)
  
  title('b) Inference', adj = 0, font.main = 1, font.lab = 1, cex=.8)

## ----simulate CA, eval = F----------------------------------------------------
# library(gjam)
# f <- gjamSimData(n = 500, S = 10, Q = 4, typeNames = 'CA')
# summary(f)

## ----show formula, eval = F---------------------------------------------------
# f$formula

## ----plotSimY, fig.show = "hold", fig.width = 6.5, eval = F-------------------
# par(bty = 'n', mfrow = c(1,2), family='')
# h <- hist(c(-1,f$y), nclass = 50, plot = F)
# plot( h$counts, h$mids,type = 's' )
# plot( f$w,f$y,cex = .2 )

## ----fit CA data, eval = F----------------------------------------------------
# ml  <- list( ng = 1000, burnin = 100, typeNames = f$typeNames )
# out <- gjam( f$formula, f$xdata, f$ydata, modelList = ml )
# summary(out)

## ----printG, eval=F-----------------------------------------------------------
# print(out)

## ----summary of chains, eval = F----------------------------------------------
# summary(out$chains)

## ----summary of fitted model, eval = FALSE------------------------------------
# summary(out$inputs)

## ----show classes, eval = F---------------------------------------------------
# out$inputs$classBySpec

## ----betaMu, eval=F-----------------------------------------------------------
# out$parameters$betaMu

## ----betaAll, eval=F----------------------------------------------------------
# out$parameters$betaMu         # S by M coefficient matrix unstandardized
# out$parameters$betaSe         # S by M coefficient SE
# out$parameters$betaStandXmu   # S by M standardized for X
# out$parameters$betaStandXWmu  # (S-F) by M standardized for W/X, centered factors
# 
# out$parameters$betaTable        # SM by stats posterior summary
# out$parameters$betaStandXTable  # SM by stats posterior summary
# out$parameters$betaStandXWTable # (S-F)M by stats posterior summary
# 
# out$parameters$sensBeta         # sensitivity to response variables
# out$parameters$sensTable        # sensitivity to predictor variables
# 
# out$parameters$sigMu            # S by S covariance matrix omega
# out$parameters$sigSe            # S by S covariance std errors

## ----plot CA data, family='', eval = FALSE------------------------------------
# f   <- gjamSimData( n = 500, S = 10, typeNames = 'CA' )
# ml  <- list( ng = 1000, burnin = 200, typeNames = f$typeNames )
# out <- gjam( f$formula, f$xdata, f$ydata, modelList = ml )
# pl  <- list( trueValues = f$trueValues, GRIDPLOTS = T )
# gjamPlot( output = out, plotPars = pl )

## ----example output, fig.show = "hold", fig.width = 6.5, eval = F-------------
# par( bty = 'n', mfrow = c(1,3) )
# plot( f$trueValues$beta, out$parameters$betaMu, cex = .2 )
# plot( f$trueValues$corSpec, out$parameters$corMu, cex = .2 )
# plot( f$y,out$prediction$ypredMu, cex = .2 )

## ----design1, eval = F--------------------------------------------------------
# library(repmis)
# d <- "https://github.com/jimclarkatduke/gjam/blob/master/forestTraits.RData?raw=True"
# source_data(d)
# xdata <- forestTraits$xdata[,c(1,2,7,8)]

## ----setupX, echo=F, eval=T---------------------------------------------------
temp <- c(1.22,  0.18, -0.94,  0.64,  0.82)
deficit <- c(0.04,  0.21,  0.20,  0.82, -0.18)
soil <- c('reference', 'reference', 'SpodHist',  'reference', 'reference')
xx <- data.frame( temp, deficit, soil )
attr(xx$soil,'levels') <- c("reference","SpodHist","EntVert","Mol","UltKan")

## ----design1.2, eval = F------------------------------------------------------
# xdata[1:5,]

## ----design2, eval = T--------------------------------------------------------
formula <- as.formula( ~ temp + deficit + soil )

## ----design3, echo=F----------------------------------------------------------
  tmp <- model.frame(formula,data=xx)
  x   <- model.matrix(formula, data=tmp)
  x[1:5,]

## ----design4, eval = T--------------------------------------------------------
formula <- as.formula( ~ temp*soil )

## ----design5, echo = F--------------------------------------------------------
tmp <- model.frame(formula,data=xx,na.action=NULL)
x   <- model.matrix(formula, data=tmp)
x[1:5,]

## ----design6, eval = T--------------------------------------------------------
formula <- as.formula( ~ temp + I(temp^2) + deficit )

## ----design7, echo = F--------------------------------------------------------
tmp <- model.frame(formula,data=xx,na.action=NULL)
x   <- model.matrix(formula, data=tmp)
x[1:5,]

## ----design8, eval = T--------------------------------------------------------
formula <- as.formula( ~ temp*deficit + I(temp^2) + I(deficit^2) )

## ----design9, echo = F--------------------------------------------------------
tmp <- model.frame(formula,data=xx,na.action=NULL)
x   <- model.matrix(formula, data=tmp)
x[1:5,]

## ----get trees, eval = F------------------------------------------------------
# y  <- gjamReZero(forestTraits$treesDeZero)  # extract y
# treeYdata  <- gjamTrimY(y,10)$y             # at least 10 plots
# dim(treeYdata)
# treeYdata[1:5,1:6]

## ----fit trees, eval = F------------------------------------------------------
# ml   <- list( ng = 2500, burnin = 500, typeNames = 'DA',
#               reductList = list(r = 8, N = 20) )
# form <- as.formula( ~ temp*deficit + I(temp^2) + I(deficit^2) )
# out  <- gjam(form, xdata = xdata, ydata = treeYdata, modelList = ml)
# specNames <- colnames(treeYdata)
# specColor <- rep('black',ncol(treeYdata))
# specColor[ c(grep('quer',specNames),grep('cary',specNames)) ] <- '#d95f02'
# specColor[ c(grep('acer',specNames),grep('frax',specNames)) ] <- '#1b9e77'
# specColor[ c(grep('abie',specNames),grep('pice',specNames)) ] <- '#377eb8'
# 
# gjamPlot( output = out, plotPars = list(GRIDPLOTS=T, specColor = specColor) )

## ----gsens, eval = F----------------------------------------------------------
# cols  <- c( '#1f78b4', '#33a02c' )
# types <- c('DA','DA','OC','OC','OC','OC','CC','CC','CC','CC','CC','CA','CA','PA','PA')
# f     <- gjamSimData(S = length(types), typeNames = types)
# ml    <- list(ng = 500, burnin = 50, typeNames = f$typeNames)
# out   <- gjam(f$formula, f$xdata, f$ydata, modelList = ml)
# 
# ynames <- colnames(f$y)
# group  <- ynames[types == 'CC']
# 
# full <- gjamSensitivity(out)
# cc   <- gjamSensitivity(out, group)
# ylim <- range( c(full, cc) )
# 
# nt <- ncol(full)
# 
# boxplot( full, boxwex = 0.2,  at = 1:nt - .15, col = cols[1], log='y',
#          ylim = ylim, xaxt = 'n', xlab = 'Predictors', ylab='Sensitivity')
# boxplot( cc, boxwex = 0.2, at = 1:nt + .15, col = cols[2], add=T,
#          xaxt = 'n')
# axis( 1, at=1:nt,labels=colnames(full) )
# legend( 'bottomright',c('full response','CC data'),
#         text.col = cols, bty='n' )

## ----sens2, eval=F------------------------------------------------------------
# group  <- ynames[types == 'CA']
# ca   <- gjamSensitivity(out, group)
# 
# ylim <- range( rbind(cc,ca) )
# 
# nt <- ncol(full)
# 
# boxplot( ca, boxwex = 0.2,  at = 1:nt - .15, col = cols[1], log='y',
#          xaxt = 'n', ylim = ylim, xlab = 'Predictors', ylab='Sensitivity')
# boxplot( cc, boxwex = 0.2, at = 1:nt + .15, col = cols[2], add=T,
#          xaxt = 'n')
# axis(1,at=1:nt,labels=colnames(full))
# legend('bottomright',c('CA data','CC data'),
#        text.col = cols, bty = 'n' )

## ----plot save, eval = F------------------------------------------------------
# plotPars <- list( GRIDPLOTS=T, SAVEPLOTS = T, outfolder = 'plots' )

## ----effort simulation, eval = F----------------------------------------------
# S  <- 5
# n  <- 50
# ef <- list( columns = 1:S, values = round(runif(n,.5,5),1) )
# f  <- gjamSimData(n, S, typeNames = 'DA', effort = ef)
# ef

## ----w vs y, bty = 'n', fig.width = 6.5, eval = F-----------------------------
# plot( f$w, f$y, cex = .2, xlab = 'Latent w scale', ylab = 'Observed y' )

## ----including effort, bty = 'n', fig.width = 6.5, eval = F-------------------
# plot(f$w*ef$values, f$y, cex = .2, xlab = 'Latent w time effort', ylab = 'Observed y')

## ----fitting, eval = F--------------------------------------------------------
# S   <- 10
# n   <- 1500
# ef  <- list( columns = 1:S, values = round(runif(n,.5,5),1) )
# f   <- gjamSimData(n, S, typeNames = 'DA', effort = ef)
# ml  <- list(ng = 1000, burnin = 250, typeNames = f$typeNames, effort = ef)
# out <- gjam(f$formula, f$xdata, f$ydata, modelList = ml)
# pl  <- list( trueValues = f$trueValues )
# gjamPlot(output = out, plotPars = pl)

## ----censUp, eval=F-----------------------------------------------------------
# # assumes there is a data matrix ydata
# upper <- 100
# intv  <- matrix(c(upper,Inf),2)
# rownames(intv) <- c('lower','upper')
# tmp <- gjamCensorY(values = upper, intervals = intv, y = f$ydata, type='DA')

## ----lowerLim, eval=F---------------------------------------------------------
# miny   <- apply(f$ydata, 2, min, na.rm=T)     #minimum value by column
# censor <-  numeric(0)
# p      <- matrix(0, 3, dimnames=list(c("values","lower","upper"), NULL))
# 
# for(j in 1:ncol(f$ydata)){
#   p[1:3] <- c(miny[j], -Inf, miny[j])
#   jlist  <- list("columns" = j, "partition" = p)
#   censor <- c(censor, list(jlist))
#   names(censor)[j] <- 'CA'
# }

## ----betaPrior, eval = F------------------------------------------------------
# xdata   <- forestTraits$xdata
# formula <- as.formula(~ temp*deficit)
# snames  <- colnames(treeYdata)
# 
# # warm winter
# hot <- c("liquStyr","liriTuli","pinuEchi","pinuElli","pinuPalu","pinuTaed",
#          "querImbr","querLaur","querLyra","querMich","querMueh","querNigr",
#          "querPhel","querVirg")  # arbitrary spp, positive winter temp
# nh <- length(hot)
# lo  <- vector('list', nh)
# names(lo) <- paste('temp',hot,sep='_')
# for(j in 1:nh)lo[[j]] <- 0
# 
# # humid climate (negative deficit)
# humid <- c("abieBals", "betuAlle", "querNigr", "querPhel")  #again, arbitrary
# nh <- length(humid)
# hi  <- vector('list', nh)
# names(hi) <- paste('deficit',humid,sep='_')
# for(j in 1:nh)hi[[j]] <- 0
# 
# bp <- gjamPriorTemplate(formula, xdata, ydata = treeYdata, lo = lo, hi=hi)
# rl <- list(N = 10, r = 5)
# ml <- list(ng=1000, burnin=200, typeNames = 'CA', betaPrior = bp, reductList=rl)
# out <- gjam(formula, xdata, ydata = treeYdata, modelList = ml)
# 
# sc  <- rep('grey',ncol(treeYdata))
# sc[snames %in% hot] <- '#ff7f00'      # highlight informative priors
# sc[snames %in% humid] <- '#1f78b4'
# pl  <- list( GRIDPLOTS = T, specColor = sc)
# gjamPlot(output = out, plotPars = pl)

## ----eval=F-------------------------------------------------------------------
# formula <- as.formula(~ temp + soil)
# 
# # find species-soil type combinations that occur less than 5 times
# y0 <- treeYdata
# y0[ y0 > 0 ] <- 1
# soil <- rep( as.character(xdata$soil), ncol(y0) )
# soil <- paste('soil', soil, sep='') # soil is a factor, so full name begins with variable name
# spec <- rep( colnames(y0), each = nrow(y0) )
# specBySoil <- tapply( as.vector(y0), list(spec, soil), sum )
# wna  <- which( specBySoil < 5, arr.ind = T )                   # only if at least 5 observations
# 
# # assign NA to excluded coefficients using species-variable names
# nh <- nrow(wna)
# lo  <- vector('list', nh)
# names(lo) <- paste( rownames(specBySoil)[wna[,1]], colnames(specBySoil)[wna[,2]], sep='_' )
# hi <- lo
# for(j in 1:nh)lo[[j]] <- NA
# hi <- lo
# 
# # setup prior and fit model
# bp  <- gjamPriorTemplate(formula, xdata, ydata = treeYdata, lo = lo, hi=hi)
# rl  <- list(N = 10, r = 5)
# ml  <- list(ng=1000, burnin=200, typeNames = 'CA', betaPrior = bp, reductList=rl)
# out <- gjam(formula, xdata, ydata = treeYdata, modelList = ml)

## ----compData, eval = FALSE---------------------------------------------------
# f   <- gjamSimData( S = 8, typeNames = 'CC' )
# ml  <- list( ng = 2000, burnin = 500, typeNames = f$typeNames )
# out <- gjam( f$formula, f$xdata, f$ydata, modelList = ml )
# gjamPlot( output = out, plotPars = list( trueValues = f$trueValues) )

## ----compFC, eval = FALSE-----------------------------------------------------
# f   <- gjamSimData( S = 10, typeNames = 'FC' )
# ml  <- list( ng = 2000, burnin = 500, typeNames = f$typeNames )
# out <- gjam( f$formula, f$xdata, f$ydata, modelList = ml )
# gjamPlot( output = out, plotPars = list( trueValues = f$trueValues ) )

## ----ordinal, eval = FALSE----------------------------------------------------
# f   <- gjamSimData( typeNames = 'OC' )
# ml  <- list( ng = 2000, burnin = 500, typeNames = f$typeNames )
# out <- gjam( f$formula, f$xdata, f$ydata, modelList = ml )

## ----ordinal partition, eval = FALSE------------------------------------------
# par( mfrow=c(1,1), bty = 'n' )
# keep <- strsplit(colnames(out$parameters$cutMu),'C-') #only saved columns
# keep <- matrix(as.numeric(unlist(keep)), ncol = 2, byrow = T)[,2]
# plot( f$trueValues$cuts[,keep], out$parameters$cutMu, xlab = 'True partition',
#       ylab = 'Estimated partition')

## ----ordPlots, eval = FALSE---------------------------------------------------
# pl  <- list(trueValues = f$trueValues)
# gjamPlot(output = out, plotPars = pl)

## ----cat, eval = T, echo = F--------------------------------------------------
leaf  <- c('bd', 'nd', 'be', 'other')
xylem <- c('dp', 'rp', 'other') 
np    <- 7
xx <- data.frame( leaf = factor(sample(leaf,np,replace=T)),
            xylem = factor(sample(xylem,np,replace=T) ))
xx

## ----catY, eval = T, echo = F-------------------------------------------------
wl <- match(xx$leaf,leaf)
wx <- match(xx$xylem,xylem)
ml <- matrix(0,np,4)
ml[cbind(1:np,wl)] <- 1
colnames(ml) <- paste('leaf',leaf,sep='_')
mx <- matrix(0,np,3)
mx[cbind(1:np,wx)] <- 1
colnames(mx) <- paste('xylem',xylem,sep='_')
cbind(ml,mx)

## ----cat2, eval = FALSE-------------------------------------------------------
# types <- c( 'CAT', 'CAT' )
# f     <- gjamSimData( n = 3000, S = length(types), typeNames = types )
# ml    <- list( ng = 1500, burnin = 500, typeNames = f$typeNames, PREDICTX = F )
# out   <- gjam( f$formula, xdata = f$xdata, ydata = f$ydata, modelList = ml )
# gjamPlot( out, plotPars = list(trueValues = f$trueValues, PLOTALLY = T) )

## ----many types, eval = FALSE-------------------------------------------------
# types <- c('OC','OC','OC','OC','CC','CC','CC','CC','CC','CA','CA','PA','PA' )
# f     <- gjamSimData( S = length(types), Q = 5, typeNames = types )
# ml    <- list( ng = 2000, burnin = 500, typeNames = f$typeNames )
# out   <- gjam( f$formula, f$xdata, f$ydata, modelList = ml )
# tmp   <- data.frame( f$typeNames, out$inputs$classBySpec[,1:10] )
# print( tmp )

## ----mixed analysis, eval = FALSE---------------------------------------------
# gjamPlot( output = out, plotPars = list(trueValues = f$trueValues, GRIDPLOTS = T) )

## ----random group, eval = FALSE-----------------------------------------------
# modelList$random <- 'columnNameInXdata'

## ----eval=F, echo=F-----------------------------------------------------------
# # lane's data
# 
# load("output-SE.rdata", verbose=T)
# 
#  icols <- 1:25
#  xdata <- out$inputs$xdata
#  ydata <- out$inputs$y[,icols]
#  form  <- as.formula( ~ minWinTemp + springPrcp + timeCat )
#  ml    <- out$modelList
#  ml$ng <- 2000
#  ml$burnin <- 500
#  ml$typeNames <- 'CA'
#  ml$effort <- NULL
#  ml$random <- 'observer'
# 
# 
#  ml$reductList <- NULL
#  out    <- .gjam(formula = form, xdata = xdata, ydata = ydata, modelList = ml)
# 
#  ml$reductList <- list(N = 25, r = 10)
#  outputDR <- .gjam(formula = form, xdata = xdata, ydata = ydata, modelList = ml)
# 
#  output <- outputDR
# 
# 
#  out <- gjamPredict(output, newdata = list( xdata = xdata ) )
# 
# 
# par(mfrow=c(5,5), mar=c(3,3,1,1))
# 
# for(j in 1:25){
#  # plot( jitter( ydata[,j], 60), output$prediction$ypredMu[,j], cex=.1)
#   plot( jitter( ydata[,j], 60), out$sdList$yMu[,j], cex=.1)
#   title(colnames(ydata)[j])
#   abline(0,1)
#   abline(h = mean(ydata[,j]))
# }
# 
# 
# 
#  ng     <- output$modelList$ng
#  burnin <- output$modelList$burnin
# 
# randByGroupMu  <- output$parameters$randByGroupMu  # S by G random groups, mean
# randByGroupSe  <- output$parameters$randByGroupSe  # SE
# groupIndex     <- output$parameters$groupIndex     # group index
# rndEffMu       <- output$parameters$rndEffMu       # n by S from dimension reduction
# rndEffSe       <- output$parameters$rndEffSe       # SE
# ngroup         <- ncol(randByGroupMu)
# x              <- output$inputs$xStand
# Q <- ncol(x)
# n <- nrow(x)
# S <- ncol(ydata)
# 
# N <- r <- NULL
# REDUCT <- F
# RANDOM <- F
# MARGIN <- T
# 
# N <- output$modelList$reductList$N
# r <- output$modelList$reductList$r
# if( !is.null(N) | N != 0 )REDUCT <- F
# if( !is.null(randByGroupMu) )RANDOM <- T
# 
# ### only if marginalizing random groups ###############
# avm <- output$parameters$randGroupVarMu
# avs <- output$parameters$randGroupVarSe
# lt  <- lower.tri(avm, diag = T)
# wt  <- which(lt, arr.ind=T)
# avg <- matrix(0, S, S)
# #######################################################
# 
# ysum <- ydata*0
# 
# nsim <- 100
# 
# for(i in 1:nsim){
# 
#   g  <- sample(burnin:ng, 1)
#   bg <- matrix(output$chains$bgibbs[g,], Q, S)
# 
#   if(REDUCT){
# 
#     sigmaerror <- output$chains$sigErrGibbs[g]
# 
#     if(!MARGIN){                                  # use fitted random effects for dim reduct
#       rndEff <- rndEffMu + matrix( rnorm( n*S, 0, rndEffSe ), n, S )
#       sg     <- diag(sigmaerror, S)
#     }else{                                        # marginalize random effects
#       rndEff <- 0
#       Z  <- matrix(output$chains$sgibbs[g,],N,r)
#       K  <- output$chains$kgibbs[g,]
#       sg <- .expandSigma(sigmaerror, S, Z = Z, K, REDUCT = T)
#     }
# 
#   } else {
#     sg <- .expandSigma(output$chains$sgibbs[g,], S = S, REDUCT = F)
#   }
# 
#   mu <- x%*%bg
# 
#   groupRandEff <- 0
# 
#   if(RANDOM){
#     if(MARGIN){
#       avg[ wt ] <- rnorm(nrow(wt), avm[wt], avs[wt])
#       avg[ wt[,c(2,1)] ] <- avg[ wt ]
#       groupRandEff <- .rMVN(ngroup, 0, avg)[groupIndex,]  # observer
#     }else{
#       randByGroup  <- rnorm( length(randByGroupMu), randByGroupMu, randByGroupSe )
#       groupRandEff <- t( matrix( randByGroup, S, ngroup ) )[groupIndex,]
#     }
#   }
# 
#   yp <- mu + .rMVN(n,0, sg) + groupRandEff + rndEff
#   yp[ yp < 0 ] <- 0
# 
#   ysum <- ysum + yp
# }
# 
# yp <- ysum/nsim
# 
# par(mfrow=c(1,2), mar=c(4,4,1,1))
# plot( sqrt(ydata), sqrt(yp), cex=.1)
# abline(0,1)
# plot( sqrt(ydata), sqrt(output$prediction$ypredMu), cex=.1)
# abline(0,1)
# 
# par(mfrow=c(4,4), mar=c(3,3,1,1))
# 
# for(j in 1:15){
#  # plot( jitter( output$prediction$ypredMu[,j], 60), sqrt(yp[,j]), cex=.1)
#   plot( jitter( ydata[,j], 60), sqrt(yp[,j]), cex=.1)
#   title(colnames(ydata)[j])
#   abline(0,1)
#   abline(h = mean(ydata[,j]))
# }
# 

## ----simulate missing data, eval = FALSE--------------------------------------
# f <- gjamSimData(typeNames = 'OC', nmiss = 20)
# which(is.na(f$xdata), arr.ind = T)

## ----holdouts, eval = FALSE---------------------------------------------------
# f   <- gjamSimData( typeNames = 'CA', nmiss = 20 )
# ml  <- list( ng = 2000, burnin = 500, typeNames = f$typeNames, holdoutN = 50 )
# out <- gjam( f$formula, f$xdata, f$ydata, modelList = ml )
# 
# par( mfrow=c(1,3), bty = 'n' )
# xMu  <- out$prediction$xpredMu        # inverse prediction of x
# xSd  <- out$prediction$xpredSd
# yMu  <- out$prediction$ypredMu        # predicted y
# hold <- out$modelList$holdoutIndex    # holdout observations (rows)
# 
# plot(out$inputs$xUnstand[hold,-1],xMu[hold,-1], cex=.2, xlab='True', ylab='Predictive mean')
# title('holdouts in x')
# abline( 0, 1, lty=2 )
# plot(out$inputs$y[hold,], yMu[hold,], cex=.2, xlab='True', ylab='')
# title('holdouts in y')
# abline( 0, 1, lty=2 )
# 
# xmiss   <- out$missing$xmiss                              # locations of missing x
# xmissMu <- out$missing$xmissMu
# xmissSe <- out$missing$xmissSe
# xmean   <- apply(f$xdata,2,mean,na.rm=T)[xmiss[,2]] # column means for missing values
# plot(xmean, xmissMu, xlab='Variable mean', ylab='Missing estimate') #posterior estimates
# segments(xmean, xmissMu - 1.96*xmissSe, xmean, xmissMu + 1.96*xmissSe)          #approx 95% CI
# title('missing x')

## ----effortPredict, eval = FALSE----------------------------------------------
# sc   <- 3                                            # no. CA responses
# sd   <- 10                                           # no. DA responses
# tn   <- c( rep('CA',sc),rep('DA',sd) )               # combine CA and DA obs
# S    <- length(tn)
# n    <- 500
# emat <- matrix( runif(n,.5,5), n, sd )                # simulated DA effort
# eff  <- list( columns = c((sc+1):S), values = emat )
# f    <- gjamSimData( n = n, typeNames = tn, effort = eff )
# ml   <- list( ng = 2000, burnin = 500, typeNames = f$typeNames, effort = f$effort )
# out  <- gjam( f$formula, f$xdata, f$ydata, modelList = ml )
# gjamPredict( out, y2plot = colnames(f$ydata)[tn == 'DA'] ) # predict DA data

## ----effortPredictNew, eval = FALSE-------------------------------------------
# cols <- c( '#1b9e77','#d95f02' )
# new  <- list( xdata = f$xdata, effort=eff, nsim = 500 ) # effort unchanged
# p1   <- gjamPredict( output = out, newdata = new )
# 
# plot(f$y[,tn == 'DA'], p1$sdList$yMu[,tn == 'DA'],
#      xlab = 'Observed', ylab = 'Predicted', cex=.2, col = cols[1] )
# abline( 0, 1, lty = 2 )
# 
# new$effort$values <- eff$values*0 + 1                 # predict for effort = 1
# p2 <- gjamPredict( output = out, newdata = new )
# 
# points( f$y[,tn == 'DA'], p2$sdList$yMu[,tn == 'DA'], col= cols[2], cex=.2 )
# legend( 'topleft', c('Actual effort', 'Effort = 1'), text.col = cols, bty='n' )

## ----effortPredictCond, eval = FALSE------------------------------------------
# cols <- c( '#1b9e77','#d95f02','#e7298a' )
# 
# condCols <- 1:3
# pCols    <- c(1:S)[-condCols]
# 
# new <- list(ydataCond = f$y[,condCols], nsim=200)         # cond on observed CA data
# p1  <- gjamPredict(output = out, newdata = new)$sdList$yMu[,pCols]
# 
# new    <- list(ydataCond = f$y[,condCols]*0, nsim=200)    # spp 1, 2 absent
# p2     <- gjamPredict(output = out, newdata = new)$sdList$yMu[,pCols]
# 
# obs <- f$y[,pCols]
# plot( jitter(obs), p2, xlab='Observed', ylab = 'Predicted', cex=.1,
#       ylim=range(c(p1,p2)), col = cols[1] )
# points( jitter(obs), out$prediction$ypredMu[,pCols], col=cols[2], cex=.1)
# points( jitter(obs), p1, col=cols[3], cex=.1)
# abline( 0, 1, lty=2 )
# legend( 'topleft',
#         c('Conditioned on absent spp 1:3', 'Unconditional', 'Conditioned on observed spp 1:3'),
#         text.col = cols, bty='n')

## ----input, eval = F----------------------------------------------------------
# library(repmis)
# d <- "https://github.com/jimclarkatduke/gjam/blob/master/forestTraits.RData?raw=True"
# source_data(d)
# 
# xdata <- forestTraits$xdata                            # n X Q
# ydata <- gjamReZero( forestTraits$treesDeZero )        # n X S
# ydata <- ydata[,colnames(ydata) != 'other']
# ydata <- gjamTrimY( ydata, 200, OTHER=F )$y

## ----gjamCA, eval=F-----------------------------------------------------------
# ml      <- list( ng = 2500, burnin = 1000, typeNames = 'DA',
#                  PREDICTX = F )
# formula <- as.formula(~ temp + deficit*moisture)
# output  <- gjam( formula, xdata, ydata, modelList = ml )
# 
# # prediction
# newdata <- list( nsim=100 )
# tmp     <- gjamPredict( output, newdata=newdata )
# full    <- tmp$sdList$yMu

## ----cond, eval=F-------------------------------------------------------------
# cols    <- c( '#1b9e77','#d95f02' )
# cnames  <- sample( colnames(ydata), S/2)
# wc      <- match(cnames, colnames(ydata))
# newdata <- list(ydataCond = ydata[,cnames], nsim=100)
# tmp     <- gjamPredict(output, newdata = newdata)
# condy   <- tmp$sdList$yMu
# 
# plot(ydata[,-wc], full[,-wc], ylim = c(range(ydata)),
#      xlab = 'Observed', ylab = 'Predicted', col = cols[1], cex = .3)
# abline( 0, 1, lty=2 )
# points( ydata[,-wc], condy[,-wc], col = cols[2], cex = .3)
# legend( 'topleft', c('Unconditional', 'Conditional on half of species'),
#         text.col = cols[1:2], bty='n' )

## ----eval = F-----------------------------------------------------------------
# gjamConditionalParameters( output, conditionOn = c('acerSacc','betuAlle','faguGran','querRubr') )

## ----eval=FALSE---------------------------------------------------------------
# f   <- gjamSimData( n = 100, S = 200, typeNames='CA' )
# ml  <- list( ng = 2000, burnin = 500, typeNames = f$typeNames,
#             reductList = list(r = 15, N = 25), PREDICTX = F )
# out <- gjam( f$formula, f$xdata, f$ydata, modelList = ml )
# gjamPlot( output = out, plotPars = list(trueValues = f$trueValues) )

## ----fungal data summary, bty='n', fig.width=6.5, eval=F----------------------
# library(repmis)
# d <- "https://github.com/jimclarkatduke/gjam/blob/master/fungEnd.RData?raw=True"
# source_data(d)
# 
# xdata  <- fungEnd$xdata
# otu    <- gjamReZero(fungEnd$yDeZero)
# status <- fungEnd$status
# 
# par( mfrow=c(1,3), bty='n' )
# hist( status, main='Host condition (morbid = 0)', ylab = 'Host obs' )
# hist( otu, nclass=100, ylab = "Reads", main = "OTU's" )
# nobs <- gjamTrimY( otu, minObs = 1, OTHER = F )$nobs
# hist( nobs/ncol(otu), nclass=100, xlab = 'Fraction of observations',
#       ylab = 'Frequency', main='Incidence' )

## ----get y, eval = F----------------------------------------------------------
# tmp <- gjamTrimY(otu, minObs = 100)
# y   <- tmp$y
# dim(fungEnd$y)               # all OTUs
# dim(y)                       # trimmed data
# tail(colnames(y))            # 'other' class added

## ----trim y, eval=FALSE-------------------------------------------------------
# ydata <- cbind(status, y)     # host status is also a response
# S     <- ncol(ydata)
# typeNames    <- rep('CC',S)   # composition count data
# typeNames[1] <- 'PA'          # binary host status

## ----factors, eval = F--------------------------------------------------------
# xdata$host <- relevel(xdata$host,'acerRubr')

## ----model fit, eval=F, eval=FALSE--------------------------------------------
# ml <- list( ng = 2000, burnin = 500, typeNames = typeNames,
#             reductList = list(r = 8, N = 15) )
# output <- gjam(~ host*poly, xdata, ydata, modelList = ml)

## ----plots, eval=F------------------------------------------------------------
# S <- ncol(ydata)
# specColor     <- rep('blue',S)
# specColor[1]  <- '#b2182b'                 # highlight host status
# fit <- gjamPlot( output, plotPars = list(specColor = specColor, GRIDPLOTS = T,
#                                          SIGONLY = T) )
# fit$eComs[1:5,]

## ----bstatus, eval=F----------------------------------------------------------
# beta <- output$parameters$betaTable
# ws   <- grep( 'status_',rownames(beta) )  # find coefficients for status
# beta[ws,]

## ----bsig, eval=F-------------------------------------------------------------
# ws <- which(beta$sig95 == '*')
# beta[ws,]

## ----int, eval=F--------------------------------------------------------------
# x <- output$inputs$xUnstand
# xvector <- x[1,]*0
# names(xvector)  <- colnames(x)
# 
# xvector['hostfraxAmer'] <- 1
# xvector['polypoly'] <- 1
# fit1 <- gjamIIE(output, xvector, omitY = 'other')
# 
# par(mfrow=c(1,3), bty='n', oma = c(1,2,0,0),
#     mar = c(3,3,3,1), tcl = -0.5, mgp = c(3,1,0))
# gjamIIEplot(fit1, response = 'status', effectMu = 'direct',
#             effectSd = 'direct', legLoc = 'bottomright', ylim=c(-10,10))
# title('Direct effect by host')
# 
# gjamIIEplot(fit1, response = 'status', effectMu = 'int', effectSd = 'int',
#             legLoc = 'topright', ylim=c(-10,10))
# title('Interactions with polyculture')
# 
# gjamIIEplot(fit1, response = 'status', effectMu = 'ind', effectSd = 'ind',
#             legLoc = 'topright', ylim=c(-5,5))
# title('Indirect effect of microbiome')

## ----traitBox1, fig.width = 7, fig.height = 3.5, echo = FALSE-----------------

.getBox <- function(x0,y0,tall,wide,col='white'){
  x1 <- x0 + wide
  y1 <- y0 + tall
  rect(x0, y0, x1, y1, col = col)
  mx <- mean(c(x0,x1))
  my <- mean(c(y0,y1))
  invisible(list( vec = c(x0,x1,y0,y1), mu = c(mx,my)) )
}

par(bty='n', mar=c(1,1,1,1), oma = c(0,0,0,0), 
    mar = c(3,2,2,1), tcl = -0.5, mgp = c(3,1,0), family='serif')

n <- 100
S <- 70
M <- 16
P <- 6
Q <- 14

xbox <- c(M,Q,M)
ybox <- c(n,n,Q)
xb <- c('M','Q','M')
yb <- c('n','n','Q')
xgap <- c(8,5,5)

ymax <- n + 6
xmax <- sum(xbox) + sum(xgap) + 5

plot(0,0,xlim=c(0,xmax),ylim=c(0,ymax),xaxt='n',yaxt='n',xlab='',ylab='',
     cex=.01)
xs <- 2

ti <- c('=','x','x')
ci <- c('U','X','beta')

for(j in 1:length(xbox)){
  
  ylo <- ymax - ybox[j]
  tmp <- .getBox(xs,ylo,ybox[j],xbox[j])
  xs  <- xgap[j] + tmp$vec[2]
  
  text(tmp$mu[1],ylo - 6,paste(yb[j],' x ',xb[j]))

  if(j < length(xbox))text(tmp$vec[2] + xgap[j]/2,ymax - ybox[j+1]/2, ti[j])
  if(j == 1)text(tmp$mu[1],tmp$mu[2],
                 expression(paste(italic(E),'[',bold(W),']')))
  if(j == 2)text(tmp$mu[1],tmp$mu[2],expression(bold(X)))
  if(j == 3)text(tmp$mu[1],tmp$mu[2],expression(A))
}

## ----input6, eval = F---------------------------------------------------------
# library(repmis)
# d <- "https://github.com/jimclarkatduke/gjam/blob/master/forestTraits.RData?raw=True"
# source_data(d)
# 
# xdata <- forestTraits$xdata                          # n X Q
# types <- forestTraits$traitTypes                     # 12 trait types
# sbyt  <- forestTraits$specByTrait                    # S X 12
# pbys  <- gjamReZero(forestTraits$treesDeZero)        # n X S
# pbys  <- gjamTrimY(pbys,5)$y                         # at least 5 plots
# sbyt  <- sbyt[match(colnames(pbys),rownames(sbyt)),] # trait matrix matches ydata
# identical(rownames(sbyt),colnames(pbys))

## ----input7, eval = F---------------------------------------------------------
# table(sbyt$leaf)      # four levels
# 
# table(sbyt$xylem)     # diffuse/tracheid vs ring-porous
# 
# table(sbyt$repro)     # two levels

## ----input8, eval = F---------------------------------------------------------
# tmp         <- gjamSpec2Trait(pbys, sbyt, types)
# tTypes      <- tmp$traitTypes                  # M = 15 values
# u           <- tmp$plotByCWM                   # n X M
# censor      <- tmp$censor                      # (0, 1) censoring, two-level CAT's
# specByTrait <- tmp$specByTrait                 # S X M
# M           <- ncol(u)
# n           <- nrow(u)
# cbind(colnames(u),tTypes)                      # M trait names and types

## ----setup2, eval = F---------------------------------------------------------
# censorList    <- gjamCensorY(values = c(0,1), intervals = cbind( c(-Inf,0),c(1,Inf) ),
#                              y = u, whichcol = c(13:14))$censor

## ----xdata, eval = F, echo = FALSE--------------------------------------------
# head(xdata)
# table(xdata$soil)

## ----soil, eval = F-----------------------------------------------------------
# xdata$soil <- relevel(xdata$soil,'reference')

## ----fit, eval = F------------------------------------------------------------
# ml  <- list(ng = 3000, burnin = 500, typeNames = tTypes, holdoutN = 20,
#             censor=censor, notStandard = c('u1','u2','u3'))
# out <- gjam(~ temp + stdage + moisture*deficit + deficit*soil,
#                  xdata = xdata, ydata = u, modelList = ml)
# tnames <- colnames(u)
# sc <- rep('black', M)                                  # highlight types
# wo <- which(tnames %in% c("leafN","leafP","SLA") )     # foliar traits
# wf <- grep("leaf",tnames)                              # leaf habit
# wc <- which(tnames %in% c("woodSG","diffuse","ring") ) # wood anatomy
# 
# sc[wc] <- 'brown'
# sc[wf] <- 'darkblue'
# sc[wo] <- 'darkgreen'
# 
# pl  <- list(GRIDPLOTS = TRUE, PLOTALLY = T, specColor = sc)
# gjamPlot(output = out, plotPars = pl)
# summary(out)

## ----fit pars, eval = F-------------------------------------------------------
# out$parameters$betaMu         # Q by M coefficient matrix alpha
# out$parameters$betaStandXmu   # Q by M standardized for X
# out$parameters$betaStandXWmu  # (Q-F) by M standardized for W/X, centered factors
# out$parameters$betaTable      # QM by stats posterior summary
# out$parameters$sigMu          # M by M covariance matrix omega
# out$parameters$sigSe          # M by M covariance std errors

## ----IIEx, eval = F-----------------------------------------------------------
# xdrydry <- xwetdry  <- out$inputs$xUnstand[1,]
# xdrydry['moisture'] <- xdrydry['deficit'] <- -1
# xwetdry['moisture'] <- 1
# xwetdry['deficit']  <- -1

## ----IIE1, eval = F-----------------------------------------------------------
# par(mfrow=c(2,2), bty='n', mar=c(1,3,1,1), oma = c(0,0,0,0),
#     mar = c(3,2,2,1), tcl = -0.5, mgp = c(3,1,0), family='')
# 
# fit1 <- gjamIIE(output = out, xvector = xdrydry)
# fit2 <- gjamIIE(output = out, xvector = xwetdry)
# 
# gjamIIEplot(fit1, response = 'leafbroaddeciduous',
#             effectMu = c('main','int'), effectSd = c('main','int'),
#             legLoc = 'bottomleft', ylim=c(-.31,.3))
# title('deciduous')
# gjamIIEplot(fit1, response = 'leafneedleevergreen',
#             effectMu = c('main','int'), effectSd = c('main','int'),
#             legLoc = 'bottomleft', ylim=c(-.3,.3))
# title('evergreen')
# 
# gjamIIEplot(fit2, response = 'leafbroaddeciduous',
#             effectMu = c('main','int'), effectSd = c('main','int'),
#             legLoc = 'bottomleft', ylim=c(-.3,.3))
# gjamIIEplot(fit2, response = 'leafneedleevergreen',
#             effectMu = c('main','int'), effectSd = c('main','int'),
#             legLoc = 'bottomleft', ylim=c(-.3,.3))

## ----IIE4, eval = F-----------------------------------------------------------
# xvector <- out$inputs$xUnstand[1,]
# par(mfrow=c(2,1), bty='n', mar=c(1,1,1,1), oma = c(0,0,0,0),
#     mar = c(3,2,2,1), tcl = -0.5, mgp = c(3,1,0), family='')
# 
# omitY <- colnames(u)[colnames(u) != 'leafbroaddeciduous'] # omit all but deciduous
# 
# fit <- gjamIIE(out, xvector)
# gjamIIEplot(fit, response = 'leafP', effectMu = c('main','ind'),
#             effectSd = c('main','ind'), legLoc = 'topright', ylim=c(-.6,.6))
# title('foliar P')
# gjamIIEplot(fit, response = 'leafN', effectMu = c('main','ind'),
#             effectSd = c('main','ind'), legLoc = 'bottomright', ylim=c(-.6,.6))
# title('foliar N')

## ----traitBox2, fig.width = 6.7, fig.height = 4, echo = FALSE-----------------
par(bty='n', bty='n', mar=c(1,1,1,1), oma = c(0,0,0,0), 
    mar = c(3,2,2,1), tcl = -0.5, mgp = c(3,1,0), family='serif')

n <- 100
S <- 70
M <- 16
P <- 6
Q <- 14

xbox <- c(S,M,Q,S,M)
ybox <- c(n,S,n,Q,S)
xb   <- c('S','M','Q','S','M')
yb   <- c('n','S','n','Q','S')
xgap <- c(15,28,15,15,15,15)

ymax <- n + 5
xmax <- sum(xbox) + sum(xgap) + 5

plot(0,0,xlim=c(0,xmax),ylim=c(0,ymax),xaxt='n',yaxt='n',xlab='',ylab='',
     cex=.01)
xs <- xgap[1]

ti <- c('x','=','x','x')
ci <- c('W','T','X','beta','T')
col <- rep('white',length(xbox))
col[c(2,5)] <- 'wheat'

for(j in 1:length(xbox)){
  
  ylo <- ymax - ybox[j]
  tmp <- .getBox(xs,ylo,ybox[j],xbox[j], col[j])
  xs  <- xgap[j] + tmp$vec[2]
  
  text(tmp$mu[1], ylo - 6, paste(yb[j],' x ',xb[j]) )

  if(j < length(xbox))text(tmp$vec[2] + xgap[j]/2,ymax - ybox[j+1]/2, ti[j])
  if(j == 1)text(tmp$mu[1],tmp$mu[2],
                 expression(paste(italic(E),'[',bold(W),']')))
  if(j == 2)text(tmp$mu[1],tmp$mu[2],expression(bold(T)))
  if(j == 3)text(tmp$mu[1],tmp$mu[2],expression(bold(X)))
  if(j == 4)text(tmp$mu[1],tmp$mu[2],expression(hat(Beta)))
  if(j == 5)text(tmp$mu[1],tmp$mu[2],expression(bold(T)))
}

## ----PTM, eval = F------------------------------------------------------------
# tl  <- list(plotByTrait = u, traitTypes = tTypes, specByTrait = specByTrait)
# rl  <- list(r = 8, N = 25)
# ml  <- list(ng = 2000, burnin = 500, typeNames = 'CC', holdoutN = 20,
#                   traitList = tl, reductList = rl)
# out <- gjam(~ temp + stdage + deficit*soil, xdata = xdata,
#                      ydata = pbys, modelList = ml)
# 
# S  <- nrow(specByTrait)
# sc <- rep('black', S)
# wr <- which(specByTrait[,'ring'] == 1)                  # ring porous
# wb <- which(specByTrait[,'leafneedleevergreen'] == 1)   # needle-leaf evergreen
# wd <- which(specByTrait[,'leafneedledeciduous'] == 1)   # needle-leaf deciduous
# ws <- which(specByTrait[,'shade'] >= 4)                 # shade tolerant
# sc[wr] <- '#8c510a'
# sc[ws] <- '#3288bd'
# sc[wb] <- '#003c30'
# sc[wd] <- '#80cdc1'
# 
# M  <- ncol(specByTrait)
# tc <- rep('black', M)
# names(tc) <- colnames(specByTrait)
# tc[ 'ring' ] <- '#8c510a'
# tc[ 'leafneedleevergreen' ] <- '#3288bd'
# tc[ 'leafneedledeciduous' ] <- '#003c30'
# tc[ 'shade' ] <- '#80cdc1'
# 
# par(family = '')
# pl  <- list(GRIDPLOTS=T,
#                   specColor = sc, traitColor = tc, ncluster = 5)
# fit <- gjamPlot(output = out, pl)

## ----trait pars, eval = F-----------------------------------------------------
# out$parameters$betaTraitXMu     # Q by M coefficient matrix alpha, standardized for X
# out$parameters$betaTraitXWmu    # Q by M standardized for X/W
# out$parameters$betaTraitXTable  # QM by stats posterior summary
# out$parameters$betaTraitXWTable # QM by stats summary for X/W
# out$parameters$varTraitMu       # M by M trait residual covariance, standardized for X
# out$parameters$varTraitTable    # M^2 by stats summary, standardized for X/W

## ----trait pred, eval = F-----------------------------------------------------
# out$prediction$tMu[1:5,]     # n by M predictive means
# out$prediction$tSe[1:5,]     # n by M predictive std errors

## ----checkRank, eval=F--------------------------------------------------------
# f <- gjamSimData(S = 5, typeNames = 'CA')
# x <- model.matrix(f$formula, f$xdata)
# qr(x)$rank

## ----cont1, echo=F------------------------------------------------------------
D <- rbind( c(1, -1, -1), c(0,1,0), c(0, 0, 1))
colnames(D) <- c('intercept','b','c')
rownames(D) <- c('a','b','c')
C <- D
C[,1] <- 1
C

## ----cont2, echo=F------------------------------------------------------------
t(D)