#' Linear regression model, creating a new class
#'
#' Uses QR decomposition to compute a linear regression model from the specified
#' formula and included data. It returns the values calculated in a new S3 class
#' called "linreg"
#'
#' @param formula The formula for the linear regression model, in the class "formula"
#' @param data A data set including the variables that the model contains
#'
#' @return Returns the class "linreg" and all the values in a list.
#'
#' All the variables included in the list:
#' \itemize{
#' \item{argnames} - {The name of the data set and formula used}
#' \item{coefficients} - {Coefficients of the variables used in the model}
#' \item{fitted_values} - {Predicted values from the model}
#' \item{residuals} - {Residuals of from the model}
#' \item{df} - {Degrees of freedom of the model}
#' \item{sigma_sq} - {Variance}
#' \item{variance_coef} - {Variance of the coefficients}
#' \item{t_values} - {t-values for each coefficient}
#' \item{p_values} - {P-values for each coefficient}
#' }
#'
#' @importFrom stats model.matrix pt
#'
#' @export
#'
#' @examples
#' data("iris")
#' linreg(formula = Petal.Length ~ Species, data = iris)
#'
linreg <- function(formula, data){
argnames <- sys.call()
stopifnot(class(formula) == "formula" & exists(as.character(argnames[[3]])))
X <- model.matrix(formula, data)
y <- data[, all.vars(formula)[1]]
QR <- qr(X)
beta_hat <- qr.coef(qr(X), y)
y_hat <- qr.fitted(QR, y)
names(y_hat) <- rownames(data)
residual_hat <- qr.resid(QR, y)
names(residual_hat) <- rownames(data)
df <- length(y) - QR$rank
sigma_sq_hat <- sum((y-y_hat)^2)/df
var_beta_hat <- sigma_sq_hat * chol2inv(qr.R(QR))
t_beta <- beta_hat / sqrt(diag(var_beta_hat))
p_value <- pt(abs(t_beta), df, lower.tail = FALSE) * 2
lin_mod <- list(argnames = argnames,
coefficients = beta_hat,
fitted_values = y_hat,
residuals = residual_hat,
df = df,
sigma_sq = sigma_sq_hat,
variance_coef = var_beta_hat,
t_values = t_beta,
p_values = p_value)
class(lin_mod) <- "linreg"
return(lin_mod)
}
#' A generic print function for the class linreg
#'
#' A function that prints the formula, coefficients when linreg class is called
#'
#' @param x A object of the class linreg
#' @param ... Other arguments
#' @return Returns a printed statement
#' @export
#'
#'
print.linreg <- function(x, ...){
argnames <- as.character(x$argnames)
cat("Call: \n", argnames[1], "(formula = ", argnames[2], ", data = ",
argnames[3], ")", sep = "")
cat("\n")
cat(format(c("Coefficients: \n", names(x$coefficients), "\n",
sprintf("%.3f", x$coefficients)), justify = "right"))
}
#' Lowess smoothing function
#'
#' A function that uses the smoothing function LOWESS from the stats package.
#' Which uses locally weighted polynomial regression for the smoothing function
#'
#' @param x A vector of values corresponding coordinates in the plot
#' @param y A vector of values corresponding to coordinates in the plot
#'
#' @importFrom stats lowess
#'
#' @return Returns a data frame of the calculated values
#' @export
#'
smooth_function <- function(x, y) {
as.data.frame(lowess(x, y))
}
#' Find outliers (if any exist)
#'
#' Calculates if the observed residual is an outlier given the 1.645 standard
#' deviations away from the mean.
#'
#' @param x Takes a vector of residuals
#'
#' @importFrom stats sd
#'
#' @return Returns a boolean index for those values that are outliers
#' @export
#'
is_outlier <- function(x) {
return(x < mean(x) - 1.645 * sd(x) | x > mean(x) + 1.645 * sd(x) )
}
#' A generic plot function for the class linreg
#'
#' Uses the generic plot function to create specific plots using the package ggplot2.
#' The function outputs two plots similar to those from plot(class(lm)). Uses
#' both functions "is_outlier" and "smooth_function"
#'
#' @param x A object of class "linreg"
#' @param ... Other arguments.
#'
#' @import ggplot2
#' @importFrom gridExtra grid.arrange
#' @return Returns two plots, residuals vs fitted values with a smoothing line,
#' and standardized residuals vs fittedvalues
#' @export
#'
plot.linreg <- function(x, ...){
Fitted_values <- Residuals <- Outlier <- y <- NULL
argnames <- as.character(x$argnames)
df <- data.frame(Residuals = x$residuals,
Fitted_values = x$fitted_values,
Outlier = ifelse(is_outlier(x$residuals),
names(which(x$residuals == x$residuals)),
NA_real_))
lowess_curve <- smooth_function(df$Fitted_values,
df$Residuals)
fit_res <- ggplot(df, aes(Fitted_values, Residuals)) +
geom_point(color = "black", fill = "white", shape = 21) +
geom_line(data = lowess_curve, aes(x, y), color = "red") +
geom_text(data = df, aes(label = Outlier), na.rm = TRUE,
hjust = -0.1, size = 4, alpha = 0.9, color = "black") +
theme_bw() + theme(plot.title = element_text(hjust = 0.5),
panel.grid = element_blank(),
axis.text = element_text(color = "black")) +
xlab(paste0("Fitted values \n", argnames[1], "(", argnames[2], ")")) +
ylab("Residuals") +
ggtitle("Residuals vs Fitted")
df_scale <- data.frame(Residuals = sqrt(abs(scale(x$residuals))),
Fitted_values = x$fitted_values)
lowess_curve_scaled <- smooth_function(df_scale$Fitted_values,
df_scale$Residuals)
scale_loc <- ggplot(df_scale, aes(Fitted_values, Residuals)) +
geom_point(color = "black", fill = "white", shape = 21) +
geom_line(data = lowess_curve_scaled, aes(x, y), color = "red") +
theme_bw() + theme(plot.title = element_text(hjust = 0.5),
panel.grid = element_blank(),
axis.text = element_text(color = "black")) +
xlab(paste0("Fitted values \n", argnames[1], "(", argnames[2], ")")) +
ylab(expression(sqrt("Standardized residuals"))) +
ggtitle("Scale-location")
grid.arrange(fit_res, scale_loc,
layout_matrix = rbind(c(1), c(2)))
}
#' Creates a generic function to retrieve residuals
#'
#' Creaters a generic funtion "resid()"
#'
#' @param x An object
#' @param ... Other arugments.
#'
#'
#' @export
#'
resid <- function(x,...) UseMethod("resid")
#' Residuals
#'
#' Connects resid() to the class linreg and returns the residuals from
#' linear regression
#'
#' @param x A object of the class linreg
#' @param ... Other arguments
#'
#' @export
#'
resid.linreg <- function(x, ...){
return(x$residuals)
}
#' Generic function for retrieving fitted values
#'
#' Creates a generic function "pred()"
#'
#' @param x An object.
#' @param ... Other arguments.
#'
#' @export
#'
#'
pred <- function(x, ...) {
UseMethod("pred")
}
#' Fitted values
#'
#' Gives "pred()" function a specific output for class "linreg"
#'
#' @param x A object of class "linreg"
#' @param ... Other arguments.
#'
#' @return Returns a vector of fitted values from the linear regression model
#' @export
#'
pred.linreg <- function(x, ...){
return(x$fitted_values)
}
#' Linear regression coefficients
#'
#' Uses the generic function coef() and gives a special output from the linreg class
#' It returns a vector of coefficents from the linear regression model
#'
#' @param object A object of class "linreg"
#' @param ... Other arguments.
#'
#' @return A vector of coefficients from the linear regression model
#' @export
#'
coef.linreg <- function(object, ...){
return(object$coefficients)
}
#' Summary of the linreg class
#'
#' Using the generic function summary it gives a summary of the "linreg" class
#' mimicking the ouput given by using the function summary() on a object
#' of the class "lm".
#'
#' @param object A object of the class "linreg"
#' @param ... Other arguments.
#'
#' @return Prints the output of the class "linreg" it prints a structured and
#' summarized view of the:
#' Formula
#' Coefficients of the variables
#' standard deviation of the coefficients
#' t-values
#' p-values
#' degrees of freedom
#'
#' @export
#'
#'
summary.linreg <- function(object, ...){
star <- ifelse(object$p_values < 0.001, "***",
ifelse(object$p_values < 0.01, "**",
ifelse(object$p_values < 0.05, "*",
ifelse(object$p_values < 0.1, ".", " "))))
argnames <- as.character(object$argnames)
cat("Call:", "\n",
argnames[1], "(formula = ", argnames[2], ", data=", argnames[3], ")\n\n",
sep = "")
cat("Coefficients:")
cat("\n", sep = "")
cat(sprintf("%12s %9s %10s %8s %9s",
"", "Estimate", "Std. Error", "t value", "Pr(>|t|)"),
"\n", sep = "" )
cat(format(paste0(sprintf("%10s %9s %10s %8s %9s",
format(names(object$coefficients), justify = "left"),
sprintf("%.5f", object$coefficients),
sprintf("%.5f", sqrt(diag(object$variance_coef))),
sprintf("%.3f", object$t_values),
sprintf("%10s", format.pval(object$p_value))),
" ",
sprintf("%3s", star), "\n"),
justify = "right"), sep = "")
cat("---\n", "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1",
"\n", sep = "")
cat("Residual standard error:", sprintf("%.4f", sqrt(object$sigma_sq))
, "on", object$df, "degrees of freedom")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.