R/linreg.R

Defines functions summary.linreg coef.linreg pred.linreg pred resid.linreg resid plot.linreg is_outlier smooth_function print.linreg linreg

Documented in coef.linreg is_outlier linreg plot.linreg pred pred.linreg print.linreg resid resid.linreg smooth_function summary.linreg

#' Linear regression model, creating a new class
#'
#' Uses QR decomposition to compute a linear regression model from the specified
#' formula and included data. It returns the values calculated in a new S3 class
#' called "linreg"
#'
#' @param formula The formula for the linear regression model, in the class "formula"
#' @param data A data set including the variables that the model contains
#'
#' @return Returns the class "linreg" and all the values in a list.
#'
#' All the variables included in the list:
#' \itemize{
#' \item{argnames}      - {The name of the data set and formula used}
#' \item{coefficients}  - {Coefficients of the variables used in the model}
#' \item{fitted_values} - {Predicted values from the model}
#' \item{residuals}     - {Residuals of from the model}
#' \item{df}            - {Degrees of freedom of the model}
#' \item{sigma_sq}      - {Variance}
#' \item{variance_coef} - {Variance of the coefficients}
#' \item{t_values}      - {t-values for each coefficient}
#' \item{p_values}      - {P-values for each coefficient}
#' }
#' 
#' @importFrom stats model.matrix pt
#'
#' @export
#'
#' @examples
#' data("iris")
#' linreg(formula = Petal.Length ~ Species, data = iris)
#'

linreg <- function(formula, data){
  argnames <- sys.call()
  stopifnot(class(formula) == "formula" & exists(as.character(argnames[[3]])))
  
  X <- model.matrix(formula, data)
  y <- data[, all.vars(formula)[1]]
  
  QR <- qr(X)
  beta_hat <- qr.coef(qr(X), y) 
  y_hat <- qr.fitted(QR, y) 
  names(y_hat) <- rownames(data)
  residual_hat <- qr.resid(QR, y)
  names(residual_hat) <- rownames(data)
  df <- length(y) - QR$rank
  sigma_sq_hat <- sum((y-y_hat)^2)/df
  var_beta_hat <- sigma_sq_hat * chol2inv(qr.R(QR))
  t_beta <- beta_hat / sqrt(diag(var_beta_hat))
  p_value <- pt(abs(t_beta), df, lower.tail = FALSE) * 2
  
  lin_mod <- list(argnames = argnames, 
                  coefficients = beta_hat, 
                  fitted_values = y_hat, 
                  residuals = residual_hat, 
                  df = df, 
                  sigma_sq = sigma_sq_hat, 
                  variance_coef = var_beta_hat, 
                  t_values = t_beta, 
                  p_values = p_value)
  
  class(lin_mod) <- "linreg"
  return(lin_mod)
}

#' A generic print function for the class linreg
#'
#' A function that prints the formula, coefficients when linreg class is called
#'
#' @param x A object of the class linreg
#' @param ... Other arguments
#' @return Returns a printed statement
#' @export
#'
#'

print.linreg <- function(x, ...){
  argnames <- as.character(x$argnames)
  cat("Call: \n", argnames[1], "(formula = ", argnames[2], ", data = ", 
      argnames[3], ")", sep = "")
  cat("\n")
  cat(format(c("Coefficients: \n", names(x$coefficients), "\n", 
               sprintf("%.3f", x$coefficients)), justify = "right"))
}

#' Lowess smoothing function
#'
#' A function that uses the smoothing function LOWESS from the stats package.
#' Which uses locally weighted polynomial regression for the smoothing function
#'
#' @param x A vector of values corresponding coordinates in the plot
#' @param y A vector of values corresponding to coordinates in the plot
#'
#' @importFrom stats lowess
#'
#' @return Returns a data frame of the calculated values
#' @export
#'

smooth_function <- function(x, y) {
  as.data.frame(lowess(x, y))
}

#' Find outliers (if any exist)
#'
#' Calculates if the observed residual is an outlier given the 1.645 standard 
#' deviations away from the mean.
#'
#' @param x Takes a vector of residuals
#'
#' @importFrom stats sd
#'
#' @return Returns a boolean index for those values that are outliers
#' @export
#'

is_outlier <- function(x) {
  return(x < mean(x) - 1.645 * sd(x) | x >  mean(x) + 1.645 * sd(x) )
}

#' A generic plot function for the class linreg
#'
#' Uses the generic plot function to create specific plots using the package ggplot2.
#' The function outputs two plots similar to those from plot(class(lm)). Uses
#' both functions "is_outlier" and "smooth_function"
#'
#' @param x A object of class "linreg"
#' @param ... Other arguments.
#'
#' @import ggplot2
#' @importFrom gridExtra grid.arrange
#' @return Returns two plots, residuals vs fitted values with a smoothing line, 
#' and standardized residuals vs fittedvalues
#' @export
#'

plot.linreg <- function(x, ...){
  Fitted_values <- Residuals <- Outlier <- y <- NULL 

  argnames <- as.character(x$argnames)
  
  df <- data.frame(Residuals = x$residuals, 
                   Fitted_values = x$fitted_values,
                   Outlier = ifelse(is_outlier(x$residuals), 
                                    names(which(x$residuals ==  x$residuals)), 
                                    NA_real_))
  lowess_curve <- smooth_function(df$Fitted_values, 
                                  df$Residuals)
  
  fit_res <- ggplot(df, aes(Fitted_values, Residuals)) + 
    geom_point(color = "black", fill = "white", shape = 21) + 
    geom_line(data = lowess_curve, aes(x, y), color = "red") +
    geom_text(data = df, aes(label = Outlier), na.rm = TRUE, 
              hjust = -0.1, size = 4, alpha = 0.9, color = "black") +
    theme_bw() + theme(plot.title = element_text(hjust = 0.5), 
                       panel.grid = element_blank(), 
                       axis.text = element_text(color = "black")) + 
    xlab(paste0("Fitted values \n", argnames[1], "(", argnames[2], ")")) + 
    ylab("Residuals") + 
    ggtitle("Residuals vs Fitted")
  
  
  df_scale <- data.frame(Residuals = sqrt(abs(scale(x$residuals))), 
                         Fitted_values = x$fitted_values)
  lowess_curve_scaled <- smooth_function(df_scale$Fitted_values, 
                                         df_scale$Residuals)
  
  scale_loc <- ggplot(df_scale, aes(Fitted_values, Residuals)) + 
    geom_point(color = "black", fill = "white", shape = 21) + 
    geom_line(data = lowess_curve_scaled, aes(x, y), color = "red") + 
    theme_bw() + theme(plot.title = element_text(hjust = 0.5), 
                       panel.grid = element_blank(), 
                       axis.text = element_text(color = "black")) + 
    xlab(paste0("Fitted values \n", argnames[1], "(", argnames[2], ")")) + 
    ylab(expression(sqrt("Standardized residuals"))) + 
    ggtitle("Scale-location")
  
  grid.arrange(fit_res, scale_loc, 
               layout_matrix = rbind(c(1), c(2)))
}

#' Creates a generic function to retrieve residuals 
#'
#' Creaters a generic funtion "resid()"
#'
#' @param x An object
#' @param ... Other arugments.
#'
#'
#' @export
#'

resid <- function(x,...) UseMethod("resid")


#' Residuals
#' 
#' Connects resid() to the class linreg and returns the residuals from 
#' linear regression
#' 
#' @param x A object of the class linreg
#' @param ... Other arguments
#' 
#' @export
#' 

resid.linreg <- function(x, ...){
  return(x$residuals)
}

#' Generic function for retrieving fitted values
#' 
#' Creates a generic function "pred()"
#' 
#' @param x An object.
#' @param ... Other arguments.
#'
#' @export
#'
#'

pred <- function(x, ...) {
  UseMethod("pred")
}

#' Fitted values
#'
#' Gives "pred()" function a specific output for class "linreg"
#'
#' @param x A object of class "linreg"
#' @param ... Other arguments.
#'
#' @return Returns a vector of fitted values from the linear regression model
#' @export
#'

pred.linreg <- function(x, ...){
  return(x$fitted_values)
}

#' Linear regression coefficients
#'
#' Uses the generic function coef() and gives a special output from the linreg class
#' It returns a vector of coefficents from the linear regression model
#'
#' @param object A object of class "linreg"
#' @param ... Other arguments.
#'
#' @return A vector of coefficients from the linear regression model
#' @export
#'

coef.linreg <- function(object, ...){
  return(object$coefficients)
}

#' Summary of the linreg class
#' 
#' Using the generic function summary it gives  a summary of the "linreg" class
#' mimicking the ouput given by using the  function summary() on a object
#' of the class "lm".
#'  
#' @param object A object of the class "linreg"
#' @param ... Other arguments.
#'  
#' @return Prints the output of the class "linreg" it prints a structured and
#' summarized view of the:
#' Formula
#' Coefficients of the variables
#' standard deviation of the coefficients
#' t-values
#' p-values
#' degrees of freedom
#'  
#' @export
#'  
#'  

summary.linreg <- function(object, ...){
  
  star <- ifelse(object$p_values < 0.001, "***", 
                 ifelse(object$p_values < 0.01, "**", 
                        ifelse(object$p_values < 0.05, "*",
                               ifelse(object$p_values < 0.1, ".", " ")))) 
  
  argnames <- as.character(object$argnames)
  cat("Call:", "\n",  
      argnames[1], "(formula = ", argnames[2], ", data=", argnames[3], ")\n\n", 
      sep = "")
  
  cat("Coefficients:")
  cat("\n", sep = "")
  cat(sprintf("%12s %9s %10s %8s %9s", 
                       "", "Estimate", "Std. Error", "t value", "Pr(>|t|)"), 
      "\n", sep = "" )
  cat(format(paste0(sprintf("%10s %9s %10s %8s %9s", 
               format(names(object$coefficients), justify = "left"), 
               sprintf("%.5f", object$coefficients), 
               sprintf("%.5f", sqrt(diag(object$variance_coef))), 
               sprintf("%.3f", object$t_values), 
               sprintf("%10s", format.pval(object$p_value))),
               " ", 
               sprintf("%3s", star), "\n"), 
             justify = "right"), sep = "")
  cat("---\n", "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1", 
      "\n", sep = "")
  cat("Residual standard error:", sprintf("%.4f", sqrt(object$sigma_sq))
      , "on", object$df, "degrees of freedom")
}
CharlesTGurra/lab4RT documentation built on Sept. 25, 2020, 8:49 a.m.