R/residuals.R

Defines functions residuals.index rstandard.index rstudent.index .rvals

Documented in residuals.index rstandard.index rstudent.index

#' Extract Model Residuals
#'
#' For the index of dissimilarity (ID), the residuals are the differences
#' between the share of the Y population and the share of the X population per
#' neighbourhood. For the multilevel index, the residuals are estimated at and
#' partitioned between each level of the model.
#'
#' @param object an object of class \code{index}
#' @param ... other arguments
#' @return a numeric vector of matrix containing the residuals
#' @examples
#' data(aggdata)
#' index <- id(aggdata, vars = c("Bangladeshi", "WhiteBrit"))
#' # The ID can be derived from the residuals
#' 0.5 * sum(abs(residuals(index)))
#' # which is the same as
#' index[1]
#'
#' # Extract the standardized and look for regions where the share of the
#' # Bangladeshi population is unusualy high with respect to the White British
#' # resids <- rstandard(index)
#' # table(aggdata$RGN[resids > 2.58])
#'
#' # Residuals for a multilevel index
#' index <- id(aggdata, vars = c("Bangladeshi", "WhiteBrit"),
#' levels = c("MSOA","LAD","RGN"))
#' resids <- residuals(index)
#' head(resids)
#' # Again, the ID can be derived from the residuals
#' 0.5 * sum(abs(rowSums(resids)))
#'
#' # Looking at the residuals, the London effect is different from other regions
#' sort(tapply(resids[,4], aggdata$RGN, mean))
#'
#' # At the local authority scale it is Tower Hamlets and Newham
#' # (both in London) that have the highest share of the Bangladeshi population
#' # with respect to the White British:
#' tail(sort(tapply(resids[,3], aggdata$LAD, mean)),5)
#' @seealso \code{\link{rstandard.index}} \code{\link{rstudent.index}}

residuals.index <- function(object, ...) {

  if (!is.null(attr(object, "mlm"))) {

    vv <- attr(object, "mlm")
    return(.rvals(vv))

  } else {

    vv <- attr(object, "ols")
    return(residuals(vv))

  }

}

#' The Standardised resdiduals for the single-level Index of Dissimilarity
#'
#' Calculates the standardised residuals for the single-level index
#'
#' The residuals are the differences between the share of the Y population
#' and the share of the X population per neighbourhood. A positive residual
#' occurs where the share of the Y population exceeds the share of the X
#' population, and a negative residual where the opposite. The standardised
#' residuals can help to identify 'extreme' differences.
#'
#' @param model an object of class \code{index} generated by the function
#' \code{\link{id}}
#' @param ... other arguments

rstandard.index <- function(model, ...) {

  vv <- attr(model, "ols")
  return(rstandard(vv))

}

#' The Studentised resdiduals for the single-level Index of Dissimilarity
#'
#' Calculates the studentised residuals for the single-level index
#'
#' The residuals are the differences between the share of the Y population
#' and the share of the X population per neighbourhood. A positive residual
#' occurs where the share of the Y population exceeds the share of the X
#' population, and a negative residual where the opposite. The studentised
#' residuals can help to identify 'extreme' differences.
#'
#' @param model an object of class \code{index} generated by the function
#' \code{\link{id}}
#' @param ... other arguments

rstudent.index <- function(model, ...) {

  vv <- attr(model, "ols")
  return(rstudent(vv))

}


.rvals <- function(mlm) {

  resids <- residuals(mlm)
  rf <- lme4::ranef(mlm)
  results <- matrix(nrow=length(resids), ncol=length(rf)+1)
  rownames(results) <- names(resids)
  colnames(results) <- c("Base",names(rf))

  results[,1] <- resids
  mod.data <- slot(mlm, "frame")
  for(i in 1:length(rf)) {

    k <- which(names(mod.data) == names(rf)[i])
    mch <- match(mod.data[,k], rownames(rf[[i]]))
    results[,(i+1)] <- rf[[i]][mch,1]

  }
  return(results)

}
profrichharris/MLID documentation built on May 26, 2019, 8:34 a.m.