# Select Build, Build and reload to build and lode into the R-session.
get.signif.code <- function(pvals)
{
# Use "ifelse" instead of traditional if-else statements since these
# will parallelize. In the event of many p-values, this will come in handy.
signif.code <- ifelse(pvals < 0.001, "***",
ifelse(pvals < 0.01, "**",
ifelse(pvals < 0.05, "*",
ifelse(pvals < 0.1, ".", " "))))
return(signif.code)
}
mylm <- function(formula, data = list(), contrasts = NULL, ...){
# Extract model matrix & responses
mf <- model.frame(formula = formula, data = data)
X <- model.matrix(attr(mf, "terms"), data = mf, contrasts.arg = contrasts)
X.T <- t(X)
y <- model.response(mf)
y.T <- t(y)
terms <- attr(mf, "terms")
# Important dimensions
n <- nrow(X)
p <- ncol(X)
k <- ifelse("(Intercept)" %in% colnames(X), p - 1, p)
# Add code here to calculate coefficients, residuals, fitted values, etc...
# and store the results in the list est
xtxinv <- solve(X.T %*% X)
betahat <- xtxinv %*% X.T %*% y
sse <- t(y - (X %*% betahat)) %*% (y - (X %*% betahat))
df <- n - p
sigmasqhat <- sse / df # unbiased sigmasq-est
stderror <- sqrt(sigmasqhat)
betahatstderrors <- stderror[1,1] * sqrt(diag(xtxinv))
betahatcov <- sigmasqhat[1,1] * xtxinv
# z-test of each estimated coefficient
zhat <- (betahat - 0) / sqrt(diag(betahatcov))
betahatpvals <- 2 * pnorm(abs(zhat), lower.tail = FALSE)
yhat <- X %*% betahat
epsilon <- y - yhat
iminush <- diag(n) - (X %*% xtxinv %*% t(X)) # I-H
stdepsilon <- epsilon / (stderror[1,1] * sqrt(diag(iminush)))
studentepsilon <- stdepsilon * sqrt((n-p-1) / (n-p-stdepsilon^2))
# anova
# rsq
sst <- y.T %*% (diag(n) - 1/n * matrix(1, n, n)) %*% y
ssr <- sst - sse
rsq <- ssr / sst
rsqadj<- 1 - ((n - 1) / (n - p)) * (1 - rsq)
msqr<-ssr/p
msqe<-sse/df
#Chisquare approximation
chisqhat<-ssr/(sse/(n-p))
p.val<- pchisq(chisqhat,k, lower.tail = FALSE)
est <- list(terms = terms,
model = mf,
X = X,
betahat = betahat,
sse = sse,
df = df,
n=n,
p=p,
k=k,
sigmasqhat = sigmasqhat,
betahatcov = betahatcov,
stderror = stderror,
betahatstderrors = betahatstderrors,
zhat = zhat,
betahatpvals = betahatpvals,
yhat = yhat,
epsilon = epsilon,
studentepsilon = studentepsilon,
sst=sst,
ssr=ssr,
rsq=rsq,
rsqadj=rsqadj,
msqr=msqr,
msqe=msqe,
chisqhat=chisqhat,
p.val=p.val,
y=y)
# Store call and formula used
est$call <- match.call()
est$formula <- formula
# Set class name. This is very important!
class(est) <- 'mylm'
# Return the object with all results
return(est)
}
print.mylm <- function(object, ...){
cat('Call\n')
print(object$call)
cat("\n")
cat('Coefficients:\n')
output.betahat <- as.vector(object$betahat)
names(output.betahat) <- rownames(object$betahat)
print(output.betahat)
}
summary.mylm <- function(object, ...){
cat("Call:\n")
print(object$call)
cat("\n")
residuals.output.vec <- quantile(object$epsilon)
names(residuals.output.vec) <- c("Min", "1Q", "Median", "3Q", "Max")
cat("Residuals:\n")
print(round(residuals.output.vec, digits = 2))
cat("\n")
cat('Coefficients:\n')
coeff.df <- data.frame("Estimate" = object$betahat)
coeff.df$`Std. Error` <- object$betahatstderrors
coeff.df$`z-value` <- object$zhat
coeff.df$`Pr(>|z|)` <- object$betahatpvals
coeff.df$` ` <- get.signif.code(object$betahatpvals)
print(format(coeff.df, digits = 3))
cat("---\n")
cat("Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n\n")
cat(c("Residual standard error: ", format(object$stderror, digits = 4),
" on ", object$df, " degrees of freedom", "\n"))
cat(c("Multiple R-squared: ", format(object$rsq, digits = 4),
", \t", "Adjusted R-squared: ", format(object$rsqadj, digits = 4)), "\n")
cat(c("Chi Square Statistic: ", format(object$chisqhat, digits = 4),
"on ", object$k, " degrees of freedom",",\t",c("Pr(>|X|):",format(object$p.val,digits=4)), "\n"))
}
plot.mylm <- function(object, ...){
library(ggplot2)
# ggplot requires that the data is in a data.frame, this must be done here
df <- data.frame(yhat = object$yhat, epsilon = object$epsilon)
ggplot(df, aes(x=yhat, y=epsilon)) +
geom_point() + geom_smooth(method = "loess") +
labs(x = "Fitted values", y = "Residuals") +
ggtitle("Residual vs Fitted Values")
}
anova.mylm <- function(object, ...){
# Code here is used when anova(object) is used on objects of class "mylm"
# Name of response
response <- deparse(object$terms[[2]])
# Components to test
comp <- attr(object$terms, "term.labels")
# Add Intercept to the list of covariates IF it is in fact used in the model.
if ("(Intercept)" %in% colnames(object$X))
{
covariates <- c("(Intercept)", comp)
txtFormula <- paste(response, "~", 1, sep = "")
}
else
{
covariates <- comp
txtFormula <- paste(response, "~", covariates[1])
}
if (length(covariates) == 1)
{
stop("ANOVA can not be calculated because only one covariate was used.")
}
formula <- formula(txtFormula)
# Growing lists in loops is slow in R. As such, we initialize our lists with a length.
model <- vector("list", length(covariates))
model[[1]] <- mylm(formula = formula, data = object$model)
# Again, initialize list with pre-set length so it's slightly faster.
output.df.list <- vector("list", length(covariates) - 1)
# Fit the sequence of models
# Start iterating from 2 because we already fit the first model above.
for(numCovar in 2:length(covariates)){
txtFormula <- paste(txtFormula, covariates[numCovar], sep = "+")
formula <- formula(txtFormula)
model[[numCovar]] <- mylm(formula = formula, data = object$model)
model1 <- model[[numCovar - 1]]
model2 <- model[[numCovar]]
df <- model1$df - model2$df
output.df <- data.frame("Df" = df)
output.df$`Sum Sq` <- as.numeric(model1$sse - model2$sse)
output.df$`Mean Sq` <- as.numeric(output.df$`Sum Sq` / output.df$Df)
output.df$`Chisq Value` <- as.numeric(output.df$`Sum Sq` / (object$sse / object$df))
output.df$`Pr(>X)` <- as.numeric(pchisq(output.df$`Chisq Value`, df = output.df$Df, lower.tail = FALSE))
output.df.list[[numCovar - 1]] <- output.df
}
output.df <- do.call(rbind, output.df.list)
# Prepare output.df for printing
output.df$` ` <- get.signif.code(output.df$`Pr(>X)`)
output.df <- format(output.df, digits = 3)
# Add line for Residuals:
residuals.output <- c(object$df, object$sse, object$sse / object$df, NA, NA, NA)
output.df <- rbind(output.df, round(residuals.output))
output.df[is.na(output.df)] <- ""
# Name rows
row.names(output.df) <- c(comp, "Residuals")
# Print Analysis of Variance Table
cat('Analysis of Variance Table\n\n')
cat(c('Response: ', response, '\n'), sep = '')
print(format(output.df, digits = 3))
cat('---\n')
cat("Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.