R/lm1.R

Defines functions lm1

Documented in lm1

#'lm1
#'
#'Fit linear models.
#'
#'@param formula an object of class "formula": a symbolic description of the model to be fitted.
#'
#'@param data an optional data frame
#'
#'@param intercept an indicator to indicate whether the model contains an intercept or not. If TRUE, then the model contains an intercept; if FALSE then the model does not contain an intercept. The default value is TRUE.
#'
#'@param interaction a two-column matrix including interaction terms of covariates. The default value is NULL, which means no interaction in the model.
#'
#'@param na.action a function which indicates what should happen when the data contain NAs. Options include 'na.omit' (default, to remove rows with NA), 'na.fail'(to stop regression), and 'na.impute' (replace with mean).
#'
#'@return a invisible list containing the following components: "call", "data", "y","x", "coefficients", "fitted.values","summary", "residuals", "rsquare", "adjusted_rsquare","rank", "df.residual","fstat", "fpvalue","SSE", "MSE", "leverage" and "cov.matrix".
#'
#'@examples
#'
#'lm1(mpg ~ disp + wt, data = mtcars)
#'
#'## Set na.action to `na.impute` to replace na with column mean
#'# Add na values
#'mtcars.na <- mtcars[,c(1,3,6)]
#'mtcars.na[1,2] <- NA
#'# Conduct linear regression
#'lm1(mpg ~ disp + wt, data = mtcars.na, na.action = "na.impute")
#'
#'## Linear regression without intercept
#'lm1(mpg ~ disp + wt, data = mtcars, intercept = FALSE)
#'
#'## Linear regression with interactions
#'lm1(mpg ~ disp + wt, data = mtcars, interaction = matrix(c("disp", "wt"),1,2))
#'
#'@importFrom stats na.pass na.omit var pt pf quantile qqnorm
#'@importFrom graphics par abline
#'@import bench
#'
#'@export
#'
lm1 <- function(formula, data = NULL, intercept = TRUE, interaction = NULL,na.action = "na.omit"){
  # extract data needed
  data <- stats::model.frame(formula, data, na.action = na.pass)
  # processing NA values
  if (anyNA(data) == TRUE){
    if (na.action == "na.omit"){
      data <- na.omit(data)
    } else if (na.action == "na.impute"){
      for(i in 1:ncol(data)){
        data[is.na(data[,i]), i] <- mean(data[,i], na.rm = TRUE)
      }
    } else if (na.action == "na.fail"){
      stop("Data contains NAs.")
    }
  }

  # add interaction terms
  if(!is.null(interaction)){
    for(i in 1:nrow(interaction)){
      data = cbind(data,data[,interaction[i,]][1]*data[,interaction[i,]][2])
      names(data) = c(names(data)[-length(names(data))],paste(interaction[i,], collapse = "*"))
    }
  }
  # extract y and x
  y <- as.vector(data[,1])
  n <- length(y)
  x <- as.matrix(data[,-1])
  if(nrow(x) != n){stop("length of y must be same as num of rows of covariate")} #
  if(intercept){x <- as.matrix(cbind(rep(1,nrow(x)),x))} # add intercept
  p <- ncol(x)

  # coefficients and y.hat(fitted values)
  beta <- solve(t(x) %*% x) %*% t(x) %*% y
  y.hat <- as.vector(as.vector(x %*% beta))
  names(y.hat) <- row.names(data)

  coefficients <- as.data.frame(t(beta))
  if(intercept){
    names(coefficients) = c("(intercept)",names(data)[-1])#,row.names = c("coefficients")
  }else{names(coefficients) = names(data)[-1]}


  # residuals, SSE, SSR, SSY, R^2, adjust R^2, leverage
  res <- as.vector(y - x %*% beta)
  names(res) <- row.names(data)

  SSE <- sum(res^2)
  MSE <- SSE/(n-p)

  SSY <- sum((y-mean(y))^2)
  SSR.sum <- SSY-SSE
  rsquare <- SSR.sum/SSY
  rsquare.adj <- 1 - (SSE/(n-p))/(SSY/(n-1))

  hat.values <- diag(x %*% solve(t(x) %*% x) %*% t(x))
  cov.matrix <- var(x)
  # se, T-values, F-values, and P-values
  se <- sqrt(diag(solve(t(x) %*% x)))*sqrt(MSE)
  tvalue <- beta / se
  pvalue <- 2*pt(abs(tvalue),length(y) - ncol(x),lower.tail=FALSE)

  fvalue <- SSR.sum/(p-1)/MSE
  fpvalue <- pf(fvalue, p-1, n-p-1, lower.tail = FALSE)

  coef <- data.frame("Estimate" = beta, "Std. Error" = se,
                      "t value" = tvalue, "Pr(>|t|)" = pvalue, check.names = FALSE,
                     row.names = names(coefficients))

  call <- noquote(paste(c('lm(', 'formula = ',formula, ')'), collapse = ''))

  # return invisible outputs to provide variables needed in other functions
  invisible <- list(call, data, y, x, coefficients, y.hat,
                    coef, res, rsquare, rsquare.adj, p, n-p, fvalue, fpvalue,
                    SSE, MSE, hat.values, cov.matrix)
  names(invisible) <- c("call", "data", "y","x", "coefficients", "fitted.values",
                        "summary", "residuals", "rsquare", "adjusted_rsquare",
                        "rank", "df.residual","fstat", "fpvalue",
                        "SSE", "MSE", "leverage", "cov.matrix")

  return(invisible(invisible))
}
mengqi00/linear1 documentation built on Dec. 21, 2021, 4:57 p.m.