#'linear_model
#'
#'Used to fit linear models.
#'
#'@param formula a symbolic description of the model to be fitted
#'@param data a data frame containing the variables in the model
#'
#'
#'@return a list containing following components:
#'@return \code{formula}: the symbolic description of the model
#'@return \code{Coefficients}: the estimated coefficients for each predictor
#'@return \code{s.e}: the standard error of coefficient estimates
#'@return \code{Cov_beta}: the variance-covariance matrix of estimated coefficients
#'@return \code{t_value}: the t statistics of each coefficient
#'@return \code{p_value}: the p value of corresponding coefficients' t test
#'@return \code{sigma_hat}: the square root of MSE
#'@return \code{R2}: R square
#'@return \code{F_statistics}: the F statistics for the full model
#'@return \code{fitted_values}: the fitted mean values
#'@return \code{residuals}: the residuals, observed values mins fitted
#'@return \code{X}: the model matrix used
#'@return \code{Y}: the response used
#'
#'@examples
#'ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
#'trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
#'group <- gl(2, 10, 20, labels = c("Ctl","Trt"))
#'weight <- c(ctl, trt)
#'data <- data.frame(cbind(weight, group))
#'lm.D9 <- linear_model(weight ~ group, data)
#'
#'@import dplyr
#'@import tidyselect
#'
#'@export
#'
linear_model <- function(formula, data) {
# get x and y matrix
X <- data.matrix(modelr::model_matrix(data, formula))
ystring <- toString(formula[[2]])
val_names <- all.vars(formula)
y <- data.matrix(dplyr::select(stats::na.omit(data[,val_names]),tidyselect::all_of(ystring)))
# get n and p values
n <- nrow(X)
p <- ncol(X)
# check singularity
fullrank_check <- tryCatch(solve(t(X)%*%X), error = function(e) e)
if(!any(class(fullrank_check) == "error")) {
# fitted values
beta_hat <- solve(t(X) %*% X) %*% t(X) %*% y
hat_matrix <- X %*% solve(t(X) %*% X) %*% t(X)
y_hat <- hat_matrix %*% y
colnames(beta_hat) <- "Estimate"
# s.e for betas
residuals <- y - y_hat
colnames(residuals) <- "residuals"
SSE <- sum((residuals)^2)
MSE <- SSE / (n - p)
sigma_hat <- c("sigma_hat" = sqrt(MSE), "df" = n - p)
var_cov <- MSE * solve(t(X) %*% X)
se.beta <- sqrt(diag(var_cov))
# t statistics
t_stat <- beta_hat / se.beta
colnames(t_stat) <- "t value"
# p value
p_values <- unlist(sapply(t_stat, function(x) 2*(1-stats::pt(abs(x),n-p))))
# multiple R square and adjusted R square
SSR <- sum((y_hat - mean(y))^2)
SSY <- sum((y - mean(y))^2)
R_sq <- SSR/SSY
R_sq_adj <- 1 - (SSE/(n - p))/(SSY/(n - 1))
RSquare <- c("Multiple R-squared" = R_sq, "Adjusted R-squared" = R_sq_adj)
# F statistics
F_stat <- (SSR/(p - 1))/MSE
df1 <- p - 1
df2 <- n - p
P_f <- 1 - stats::pf(F_stat, df1, df2)
Fdata <- c("F statistics" = F_stat, "df1" = df1, "df2" = df2, "pvale" = P_f)
# fill in results
results <- list()
results$formula <- formula
results$Coefficients <- beta_hat
results$s.e <- se.beta
results$Cov_beta <- var_cov
results$t_value <- t_stat
results$p_value <- p_values
results$sigma_hat <- sigma_hat
results$R2 <- RSquare
results$F_statistics <- Fdata
results$fitted_values <- y_hat
results$residuals <- residuals
results$X <- X
results$Y <- y
invisible(results)
print(results[1:9])
return(results)
} else print("Please check your predictors")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.