distWorkshop: Distributional analyses of linguistic data in the GAMM framework

Documented in plotQGAMs

#' Plot quantiles
#'
#' Plot the results of a series of QGAM models (Fasiolo et al., 2017)
#' @param
#' models A list of QGAM models as generated by the mqgam() function in the qgam package.
#' @param
#' predictor The predictor to be plotted. This predictor needs to be present in the fitted 
#' models, as well as in data.
#' @param
#' data The data the QGAM models were fit to. Needs to include the response variable, 
#' as well as all predictors in these models.
#' @param
#' cols A vector of colors. The lines corresponding to the quantiles will be plotted in 
#' these colors.
#' @param
#' se The number of standard errors for the confidence intervals. Default: 2 (i.e., 95\% confidence intervals)
#' 
#' @examples
#' # Remove outliers from the ld data set, which contains lexical 
#' # decision latencies from the British Lexicon Project (Keuleers 
#' # et al, 2012)
#' predictors = c("RT", "logFrequency", "Length", "logOLD20", "SND20")
#' ld = removeOutliers(ld, predictors)
#' ld = na.omit(ld)
#' 
#' # Tune learning rate for median
#' tune = tuneLearnFast(RT ~ s(logFrequency) + s(Length) + 
#'                           s(logOLD20) + s(SND20),
#'                      data = ld, qu = 0.5)
#' sigpar = tune$lsig
#' 
#' # Define quantiles
#' quants = c(0.10,0.25,0.50,0.75,0.90)
#' 
#' # Run qgam models
#' qgams = mqgam(RT ~ s(logFrequency) + s(Length) + s(logOLD20) + 
#'                    s(SND20),
#'                    data = ld, qu = quants, lsig = sigpar)
#' 
#' # Plot effect of frequency at quantiles
#' plotQGAMs(qgams, "logFrequency", ld)
#' 
#' @references 
#' Fasiolo M., Goude Y., Nedellec R., & Wood S. N. (2017). Fast
#' calibrated additive quantile regression. URL: 
#' https://arxiv.org/abs/1707.03307.
#' 
#' Keuleers, E., Lacey, P., Rastle, K., & Brysbaert, M. (2012). 
#' The British Lexicon Project: Lexical decision data for 28,730 
#' monosyllabic and disyllabic English words. Behavior Research 
#' Methods, 44(1), 287-304.
#' 
#' @export

# Plot quantiles
plotQGAMs = function(models, predictor, data,
                     cols = c("#000080","#1A1A9A","#3333B3","#4D4DCD","#6666E6"), 
                     se = 2, xlab = NA, ylab = NA, main = NA, ylim = NA, ...) {

  # Retrieve predictors from the model
  preds = colnames(models$model)
  
  # Subset data
  data = data[,which(colnames(data)%in%preds),]
  data = na.omit(data)
  
  # Make data frame for model predictions
  dfr = data.frame(predictor = seq(min(data[,predictor]), max(data[,predictor]),
          length.out = 100))
  colnames(dfr) = predictor
  for(i in 1:length(preds)) {
    if(!(preds[i]%in%colnames(dfr))) {
      if(is.numeric(data[,preds[i]])) {
        dfr[,preds[i]] = mean(data[,preds[i]], na.rm = TRUE)
      } else {
        diff = abs(mean(pn[,names(models$model[1])], na.rm = TRUE) - 
                   tapply(pn[,names(models$model[1])], pn[,preds[i]], 
                          FUN = function(x){mean(x,na.rm = T)}))
        dfr[,preds[i]] = names(diff[which(diff==min(diff))])
      }
    }
  }
  
  # Retrieve quantiles from the model
  quants = as.numeric(names(models$fit))
  
  # Get predicted value for each quantile
  preds = list()
  for(q in 1:length(quants)){
    preds[[q]] <- qdo(models, quants[q], predict, newdata = dfr, se.fit = TRUE)
  }

  # Define ylimit for the plot
  if(is.na(ylim)) {
    ylimit = range(unlist(lapply(preds, FUN = function(x){c(range(x$fit + se*x$se.fit),
      range(x$fit - se*x$se.fit))})))
    ylimit[1] = ylimit[1] * 0.95; ylimit[2] = ylimit[2] * 1.05
  } else {
    ylimit = ylim
  }

  # Define plot labels
  mainlab = ifelse(is.na(main), predictor, main)
  ylabel = ifelse(is.na(ylab), names(models$model[1]), ylab)
  xlabel = ifelse(is.na(xlab), predictor, xlab)

  # Set up plot
  plot(dfr[,predictor], preds[[i]]$fit, ylim = ylimit, col = cols[i], type = "n",
       ylab = ylabel, xlab = xlabel, main = mainlab, ...)
  
  # Plot quantiles
  for(q in 1:length(models$fit)) {
    lines(dfr[,predictor], preds[[q]]$fit, col = cols[i])
    polygon(c(dfr[,predictor], rev(dfr[,predictor])),
            c(preds[[q]]$fit + se * preds[[q]]$se.fit, 
              rev(preds[[q]]$fit - se * preds[[q]]$se.fit)),
            col = paste(cols[q], "66", sep = ""), border = NA)
  }
  
  # Add rug
  suppressWarnings(rug(as.numeric(quantile(data[,predictor], seq(0, 1, length.out = 200))),
    side = 1))
  
}

PeterHendrix13/distWorkshop documentation built on Nov. 5, 2019, 2:51 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

PeterHendrix13/distWorkshop
Distributional analyses of linguistic data in the GAMM framework

R/plotQGAMs.R
In PeterHendrix13/distWorkshop: Distributional analyses of linguistic data in the GAMM framework

Defines functions plotQGAMs

Documented in plotQGAMs

R Package Documentation

Browse R Packages

We want your feedback!

PeterHendrix13/distWorkshop Distributional analyses of linguistic data in the GAMM framework

R/plotQGAMs.R In PeterHendrix13/distWorkshop: Distributional analyses of linguistic data in the GAMM framework

Defines functions plotQGAMs

Documented in plotQGAMs

R Package Documentation

Browse R Packages

We want your feedback!

PeterHendrix13/distWorkshop
Distributional analyses of linguistic data in the GAMM framework

R/plotQGAMs.R
In PeterHendrix13/distWorkshop: Distributional analyses of linguistic data in the GAMM framework