R/lm_functions.R

#' extract all test vectors from \code{lm} object
#' 
#' @param limo \code{lm} object
#' @return \code{list} of test vectors
#' @export
#' 
#' @examples
#' set.seed(42)
#' y <- rnorm(10)
#' x <- runif(10)
#' mod <- lm(y ~ x)
#' 
#' # get test vectors
#' vT <- extract_testvec(mod)
#' # calculate coefficient estimates
#' sapply(vT, function(vt) vt%*%y)
#' coef(mod)
#' 
extract_testvec <- function(limo)
{
  
  X <- model.matrix(limo)
  if(ncol(X)==0) return(list())
  ass <- limo$assign
  if(is.null(ass)){
    vecs <- lapply(1:ncol(X), function(j) (solve(crossprod(X))%*%t(X))[j,,drop=F])
  }else{
    rlass <- rle(ass)
    rll <- rlass$lengths
    rlv <- rlass$values + 1
    singles <- rlv[rll==1]
    singlesCol <- (1:ncol(X))[sapply(ass+1, function(i) i %in% singles)]
    facs <- rlv[rll>1]
    facsCol <- lapply(facs, function(id) which(ass+1 == id))
    vT <- lapply(singlesCol, function(j) (solve(crossprod(X))%*%t(X))[j,,drop=F])
    Xgtilde <- lapply(facsCol, function(j){
      
      Xj <- X[, j]
      Xminusj <- svd(X[, -j, drop=FALSE])$u
      Xj - tcrossprod(Xminusj) %*% Xj
      
    })
    vecs <- c(vT, Xgtilde) 
    vecs <- vecs[order(c(singles,facs))]
  }
  nam <- attr(limo$terms, "term.labels")
  if("(Intercept)"%in%colnames(X)) nam <- c("(Intercept)", nam)
  names(vecs) <- nam
  return(vecs)
  
}

#' hat matrix calculation of linear model
#' 
#' @param limo \code{lm} object
#' @param X \code{matrix} respresenting the design matrix of a linear model
#' @return hat matrix
#' 
#' @examples
#' set.seed(42)
#' y <- rnorm(10)
#' x <- runif(10)
#' mod <- lm(y ~ x)
#' 
#' str(getHatMat(mod),1)
#' str(getHatMat(X = as.matrix(cbind(interc=rep(1,10),x))),1)
#' 
#' @export
getHatMat <- function(limo = NULL, X = NULL)
{
  
  if(is.null(X)){
    
    if(is.null(limo)) stop("Either limo or X argument must be specified.")
    X <- model.matrix(limo)
    
  } 
  
  if(ncol(X)==0){
    
    # null model
    return(matrix(0, nrow=nrow(X), ncol=nrow(X)))
    
  }else{
    
    tX <- t(X)
    return(X%*%solve(tX%*%X)%*%tX)
    
  }
  
  

}

#' extract components from list of \code{lm} objects
#' 
#' @param listOfModels \code{list} of \code{lm} objects
#' @param response response vector; must be supplied if not stored in the \code{lm} object.
#' @param what \code{character} describing the method, on the basis of which variable selection was performed
#' @param REML \code{logical}; whether scale estimates should be calculated via \code{REML} or not. Default is
#' \code{FALSE} since the log-likelihood is calculated with \code{REML=FALSE} by the \code{logLik} function.
#' @param alpha significance level for selection via likelihood ratio test or significance hunting.
#' @param df positive integer; defines the degree of freedom when using the likelihood ratio test for model selection.
#' Defaults to \code{1} (testing one parameter at a time).
#' @return \code{list} of components for each model. \code{A} and \code{c}: matrices and scalar of 
#' affine inequality; \code{y}: response vector; \code{bestInd} indicator for best model in \code{listOfModels}.
#' 
#' @examples 
#' set.seed(42)
#' y <- rnorm(10)
#' x <- runif(10)
#' mod1 <- lm(y ~ x)
#' 
#' x2 <- runif(10)
#' mod2 <- lm(y ~ x2)
#' 
#' # AIC comparison
#' AIC(mod1,mod2)
#' 
#' lom <- list(mod1, mod2)
#' 
#' # get components
#' comps <- extract_components(lom, response = y)
#' str(comps,1)
#' t(comps$y)%*%comps$A[[1]]%*%comps$y
#' 
#' 
#' @export
#' 
extract_components <- function(listOfModels,
                               response = NULL,
                               what = c("aic", "bic", "llonly", 
                                        "lrt", "Ftest", "sigHunt"),
                               REML = FALSE, alpha = NULL, 
                               df = 1)
{
  
  
  # get response from model
  y <- listOfModels[[1]]$y
  if(is.null(y)){
    if(is.null(response)) stop("response must be supplied") else
      y <- response  
  } 
  # get number of observations
  n <- length(y)
  
  # check alpha and df
  if(!is.null(alpha)) stopifnot(alpha > 0 & alpha <= 0.5)
  if(!is.null(df) && 
     (all.equal(round(df),df)!=TRUE | df < 0)) stop("df must be a positive integer.") 
  
  # check what model selection procedure was conducted
  what <- match.arg(what)
  
  #####################################
  # define components
  
  # get hat matrix of each model
  modfits <- lapply(listOfModels, getHatMat)
  
  # define number of columns per model
  ps <- sapply(listOfModels, function(x) ncol(model.matrix(x)))
  ps <- ps*REML # if ML was used, this sets all ps to 0
  
  # define the A calculation function
  lb_A <- function(n, p1, p2, pen1, pen2, Px1, Px2){
    
    diag(Px1) <- diag(Px1)-1
    diag(Px2) <- diag(Px2)-1
    
    Px1 <- -1*Px1
    Px2 <- -1*Px2
    
    return(
      (n-p1) * exp( -(p2 - p1 + pen1 - pen2)/n ) * Px2 - (n-p2) * Px1
    )
  }
  
  # prepare components
  if(what == "aic"){ 
    
    critvals <- do.call("AIC", listOfModels)
    bestInd <- which.min(critvals$AIC)
    pens <- 2*critvals$df
    
  }else if(what == "bic"){
    
    critvals <- do.call("BIC", listOfModels)
    bestInd <- which.min(critvals$BIC)
    pens <- log(length(y))*critvals$df
    
  }else if(what == "llonly"){
    
    critvals <- sapply(listOfModels, logLik)
    bestInd <- which.min(critvals)
    
  }else if(what == "lrt"){
    
    if(length(listOfModels) != 2) stop("Likelihood ratio comparisons only allowed for two competing models.")

    p1 <- ncol(model.matrix(listOfModels[[1]]))
    p2 <- ncol(model.matrix(listOfModels[[2]]))
    
    if(p1 > p2) listOfModels <- rev(listOfModels)
        
    critvals <- sapply(listOfModels, logLik)
    pens <- c( pchisq(q = 1 - alpha, df = df), 0)
    bestInd <- as.numeric(-2 * diff(critvals) >= pchisq(q = 1 - alpha, df = df)) + 1
    
    if(bestInd == 2){
      
      # switch components
      pens <- rev(pens)
      ps <- rev(pens)
      modfits <- rev(modfits)
      
    }
     
  }else if(what == "Ftest"){
    
    if(length(listOfModels) != 2) stop("F-test comparisons only allowed for two competing models.")
    
    p1 <- ncol(model.matrix(listOfModels[[1]]))
    p2 <- ncol(model.matrix(listOfModels[[2]]))
    
    testres <- anova(listOfModels[[1]], listOfModels[[2]])
    critval <- qf(0.95, 1, n-p2)
    kappa <- critval * (p2-p1)/(n-p2)
    # check
    bestInd <- sum(resid(listOfModels[[1]])^2) -
      (1+kappa)*sum(resid(listOfModels[[2]])^2) <= 0
    stopifnot((testres$p.value <= 0.05) == !bestInd)
    
    # if models are given in the order H_0, H_1
    # take the negative LR
    notBestInd <- which((1:2)!=bestInd)
    
    A <- list(modfits[[bestInd]] + kappa * (diag(n) - modfits[[notBestInd]]) - modfits[[notBestInd]])
    
    if((p1 > p2 & bestInd == 1) | (p1 < p2 & bestInd == 2)) A[[1]] <- -1*A[[1]]
    
    
  }else if(what == "sigHunt"){
    
    
    
  }

  if(what != "Ftest"){
  
    # calculate the A's
    A <- lapply((1:length(listOfModels))[-bestInd], function(i)
      lb_A(n = n, p1 = ps[bestInd], p2 = ps[i], pen1 = pens[bestInd],
           pen2 = pens[i], Px1 = modfits[[bestInd]], Px2 = modfits[[i]])
    )
  
  }
  
  # return object components of inequalities
  return(list(A = A, c = 0, y = y, bestInd = bestInd))

}




#' Calculate the space restriction limits of a parameter
#'
#' @param comps result of \code{calculate_components} function
#' @param vTs named list of testvectors
#' @return named \code{list} of class \code{limitObject}. Each element corresponds to one 
#' test vector and does contain \code{vT}, the test vector, and \code{limits}, 
#' an \code{Intervals} object for the calculated limits
#'
#' @importFrom intervals interval_intersection interval_union interval_complement
#' @export
#'
calculate_limits <- function(comps, vTs)
{
  
 
  if(!is.list(vTs)) vTs <- list(vTs)
  
  Pv <- lapply(vTs, function(vt){
    
    if(nrow(vt)!=1){ 
      
      attr(vt, "type") <- "group"
      return(vt) 
      
    }else{ 
      
      r <- t(vt)%*%vt / as.numeric(vt%*%t(vt))
      attr(r, "type") <- "metric"
      return(r)
    }
  
  })
  
  nrMods <- length(comps$A)
  
  res <- lapply(1:length(vTs), function(j){
    
    # for all test vectors (all parameters)
    
    # calculate limits
    vlims <- lapply(1:nrMods, function(k) getBoundsPen(A = comps$A[[k]],
                                                       pv = Pv[[j]], 
                                                       y = comps$y, 
                                                       vt=vTs[[j]])
    )
    
    if(attr(Pv[[j]], "type") == "metric"){
    
      # intersect limits
      limits <- do.call("interval_intersection", lapply(vlims, "[[", "lim"))
    
    }else{ ## (attr(Pv[[j]], "type") == "group")
      
      # vlims <- vlims[!sapply(vlims, function(li) nrow(li$lim)==1 &
      #                         li$lim[[1]]==-Inf &
      #                         li$lim[[2]]==Inf)]
      # limits <- do.call("interval_union", lapply(vlims, "[[", "lim"))
      # limits <- interval_union(limits, Intervals(c(-Inf,0)))
      # limits <- interval_complement(limits, check_valid = FALSE)
      
      lims <- lapply(vlims,"[[","lim")
      limits <- do.call("interval_intersection", 
                        lapply(lims, function(ii) interval_intersection(ii, Intervals(c(0, Inf)))))
      
    }
    
    return(
      list(vT = vTs[[j]], 
           limits = limits)
    )
    
  })
  
  names(res) <- names(vTs)
  class(res) <- "limitObject"
  return(res)
  
  
}

combine_limitObjects <- function(listOfLimitObjects, y){
  
  names <- unique(c(sapply(listOfLimitObjects, names)))
  res <- lapply(names, function(nam){
    
    limits <- lapply(listOfLimitObjects, function(x) x[[nam]])
    
    vT <- limits[[1]]$vT
    
    limits <- do.call("interval_intersection", lapply(limits, "[[", "limits"))
    
    if(nrow(vT)==1 && !any(sapply(1:nrow(limits),function(i)
      xinInt(x = as.numeric(vT%*%y), int = limits[i,]))))
      stop("Wrong limits. No interval does include the actual value of interest.")
    
    return(list(vT = vT, limits = limits))
    
  })
  names(res) <- names
  
  return(res)
  
}
davidruegamer/coinflibs documentation built on May 14, 2019, 10:33 a.m.