R/sum_funs.R

#' Summary function for robust SE
#' 
#' An adaptation of the \code{summary.lm} function from the \code{stats} package. Documentation for all arguments can be found there, except for the \code{robust}, \code{cluster}, and \code{type} options.
#' 
#' @param x an object of class "lm", usually, a result of a call to lm.
#' @param robust logical; if TRUE, a heteroskedasticity-robust covariance matrix is used in the summary.
#' @param cluster logical; if TRUE, a cluster-robust covaraiance matrix is sued in the summary.
#' @param type character string that indicates the type of robust covariance matrix to use; defaults to HC1.
#' @return returns a \code{summary.lm} object, in which the variance-covariance matrix is substituted (optionally) with a heteroskedasticity or cluster robust variance-covariance matrix estimator.
#' @details The different types of robust estimators differ in their degrees-of-freedom corrections for finite sample bias. See vignette.
#' @export
.summary_robust = function (x, 
                           robust = FALSE, 
                           cluster = NULL, 
                           type = "HC1") 
{
    if (!is.null(x$weights))
        warning("weigths of x will be ignored")
    
    if (!attr(x$terms, "intercept"))
        stop("no intercept in regression")
    
    # call and terms (create res object to store results)
    res = x[c("call", "terms")]
    
    # model matrix
    m_mat = model.matrix(x)
    
    # basic dimensions
    n = nrow(m_mat)
    k = x$rank

    # fitted values and residuals
    yhat = x$fitted.values
    e = x$residuals

    # warning due to missingness in cluster var
    res$cluster_warn = FALSE
    
    if (robust & is.null(cluster)) {
        
        rdf = n - k
        vcov = .robustSE(m_mat, e, type)
        
    } else if (!robust & !is.null(cluster)) {
        
        if (is.null(x$na.action))
            cl = cluster
        else 
            cl = cluster[-x$na.action]
        
        if (any(is.na(cl))) {
            
            warning(
                paste0("Cluster variable has missing values!",
                       "\nDropping missing clusters ...",
                       "\n(Recode missing clusters into valid integers",
                       " in the case you want them to be included)"
                )
            )
            
            res$cluster_warn = TRUE
            
            nas = which(is.na(cl))
            cl = cl[-nas]
            m_mat = m_mat[-nas, ]
            n = nrow(m_mat)
            
        }
            
        m = length(unique(cl))
        rdf = m - 1L
        
        if (n != length(cl))
            stop("length(cluster) is not equal nrow(model.matrix(x))")
        
        vcov = .clusterSE(m_mat, e, cl, type)
        
    } else {
        
        stop("specify either robust OR cluster but not both")

    }
    
    # n.obs and n.predictors
    res$n = n
    res$k = k
    
    # type of se
    res$cov_type = if (robust) {

        c("robust", type)
        
    } else {
        
        c("cluster", type)
        
    }
    
    # number of clusters if any
    res$m = if (is.null(cluster)) NA else m
    
    # coefficients and regression tables
    beta = x$coefficients
    se = sqrt(diag(vcov))
    t_val = beta / se

    res$coefficients = cbind(
        Estimate = beta, 
        `Std. Error` = se, 
        `t value` = t_val, 
        `Pr(>|t|)` = 2 * pt(abs(t_val), rdf, lower.tail = FALSE)
    )
    
    # mse and r-squared
    rss = sum(e^2)
    mss = sum((yhat - mean(yhat))^2)
    res$rmse = sqrt(rss / rdf)
    res$r.squared <- mss/(mss + rss)
    
    # covariance matrix
    res$cov = vcov
    dimnames(res$cov) = dimnames(res$coefficients)[c(1, 1)]
    
    if (!is.null(x$na.action)) 
        res$na.action = x$na.action
    
    # assign class summary.robustSE
    class(res) = "robustSE"
    
    return(res)
    
}
baruuum/jars documentation built on Nov. 3, 2019, 2:06 p.m.