#' myLinearRegression
#'
#'
#' This function builds a linear regression model and returns the coefficients
#' and their p-values. However, it provides the option to only regress using a
#' certain set of rows within a dataset. This makes it very useful for passing
#' your test/train splits or crossvalidation splits into the model. It also
#' returns a scatterplot matrix if you have fewer than five independent
#' variables. Created by Alex Moore.
#'
#' @param y The dependent variable as a vector
#' @param x The independent variables as a matrix
#' @param sub The index of rows/observations you'll use for building the model
#' @return A dataframe with the coefficients and p-values for the model
#' @return A scatterplot matrix if you have less than 5 independent variables.
#' @export
#' @examples myLinearRegression(mtcars$mpg, mtcars[ , 2:4], 1:20)
myLinearRegression <- function(y, x, sub) {
# Load the appropriate packages
library(GGally)
library(ggplot2)
library(xtable)
# Taking out observations so only the values from sub remain
# Then binding x and y together for a single dataframe
x <- x[sub, ]
y <- y[sub]
xy <- cbind(y, x)
if(length(colnames(x)) < 5) {
# Build the plot!
print(ggpairs(xy, title = "Scatterplot Matrix") +
theme_bw())
} else { # ends the first if statement
print("Too many variables to plot")
} # ends the else statement
# Build the model
modfit_xy <- lm(y ~ ., xy)
# Get the summary table for the coefficients with the right information
df_modfit_xy <- as.data.frame(summary(modfit_xy)$coefficients)
# Make the table smaller so it only contains the columns of interest.
df_modfit_xy <- df_modfit_xy[ , c(1, 4)]
# I'm returning this as a dataframe instead of a list because a dataframe
# seems way more practical than a list and it contains all the information we
# need.
return(df_modfit_xy)
} # Ends the function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.