Nothing
#' @title Linear Regression
#' @description Regression analysis (one numerical predictor variable) with simplified output.
#' Wrapper function for \code{lm} in package \code{stats}.
#'
#' @rdname lmGC
#' @usage lmGC(form,data=parent.frame(),graph=FALSE,check=FALSE)
#' @param form formula of form y~x, both variables numeric
#' @param data dataframe supplying y and x above. If one or more of the variables is not in data, then
#' they will be searched for in the parent environment.
#' @param graph Produce scatterplot with fitted polynomial, together with prediction standard error bands
#' @param check Asks to produce a lowess or gam curve with approximate 95%-confidence band. If the
#' fitted line wanders outside the band, then perhaps a linear fit is not appropriate.
#' @return A list of class "GClm". Elements that may be queried include "slope", "intercept",
#' "s" (residual standard error), "R^2" (unadjusted).
#' @export
#' @author Homer White \email{hwhite0@@georgetowncollege.edu}
#' @examples
#' #To study the relationship between two numerical variables:
#' lmGC(fastest~GPA,data=m111survey,graph=TRUE)
lmGC <-function(form,data=parent.frame(),graph=FALSE,check=FALSE) {
prsd <- ParseFormula(form)
respname <- as.character(prsd$lhs)
expname <- as.character(prsd$rhs)
if (length(expname)>1) stop("Only one predictor variable permitted")
resp <- simpleFind(varName=respname,data=data)
exp <- simpleFind(varName=expname,data=data)
if (!is(resp,"numeric")) stop("Response variable must be numerical")
if (!is(exp,"numeric")) stop("Predictor variable must be numerical")
#get the numbers from stats::lm
df <- data.frame(exp,resp)
names(df) <- c(expname,respname)
form <- as.formula(paste0(respname,"~",expname))
resultslm <- lm(form, data=df)
results <- summary(resultslm)
residse <- results$sigma
#Collect what we need for our print function:
results2 <- list(expname=expname,
respname=respname,
exp=exp,
resp=resp,
coefficients=results$coefficients,
r.squared=results$r.squared,
resid.sterr=residse,
graph=graph,
check=check,
mod=resultslm)
class(results2) <- "GClm"
return(results2)
}#end GClm
#' @title Diagnostic Plots for GC Linear Regression
#' @description Used by generic plot function
#'
#' @rdname plot.GClm
#' @method plot GClm
#' @usage
#' \S3method{plot}{GClm}(x,...)
#' @param x An object of class GClm
#' @param \ldots ignored
#' @return two diagnostic plots
#' @export
#' @author Homer White \email{hwhite0@@georgetowncollege.edu}
#' @examples
#' SpeedModel <- lmGC(fastest~GPA,data=m111survey)
#' plot(SpeedModel)
plot.GClm <-function(x,...) {
GClm <- x
mod <- GClm$mod
residuals <- mod$residuals
fitted.values <- mod$fitted.values
p1 <- lattice::densityplot(~residuals,xlab="residuals",main="Residuals")
p2 <- lattice::xyplot(residuals~fitted.values,xlab="predicted y values",
ylab="residuals",main="Residuals vs. Fits",pch=19,
panel=function(...){
lattice::panel.xyplot(...)
lattice::panel.abline(h=0)
})
print(p1,split=c(1,1,1,2), more=TRUE)
print(p2,split=c(1,2,1,2))
}#end plot.GClm
#' @title Prediction Function for GC Linear Regression
#' @description Used by generic predict function
#'
#' @rdname predict.GClm
#' @method predict GClm
#' @usage
#' \S3method{predict}{GClm}(object,x,level=NULL,...)
#' @param object An object of class GClm
#' @param x value of the predictor variable
#' @param level desired level of prediction interval
#' @param \ldots ignored
#' @return numeric prediction
#' @export
#' @author Homer White \email{hwhite0@@georgetowncollege.edu}
#' @examples
#' #predict fastest speed driven, for person with GPA=3.0:
#' SpeedModel <- lmGC(fastest~GPA,data=m111survey)
#' predict(SpeedModel,x=3.0)
#' #include prediction interval:
#' predict(SpeedModel,x=3.0,level=0.95)
predict.GClm <-function(object,x,level=NULL,...) {
expname <- object$expname
respname <- object$respname
exp <- object$exp
model <- object$mod
if (!is.null(level)) {
if (level <=0 || level >= 1) {
stop("Level must be a number between 0 and 1")
}
}
residse <- object$resid.sterr
newdf <- data.frame(x)
names(newdf) <- expname
prediction1 <- predict(model,newdata=newdf,se.fit=TRUE)
predVal <- prediction1$fit
sepred <- sqrt(residse^2+(prediction1$se.fit)^2)
cat(paste0("Predict ",respname," is about ",signif(predVal,4),
",\ngive or take ",signif(sepred,4)," or so for chance variation.\n\n"))
if (!is.null(level)) {
prediction2 <- suppressWarnings(
predict(model,newdata=newdf,
interval="prediction",
level=level)
)
lower <- prediction2[2]
upper <- prediction2[3]
cat(paste0(100*level,"%-prediction interval:\n"))
int <- c(lower,upper)
cat(sprintf("%-10s%-20s%-20s","","lower.bound","upper.bound"),"\n")
cat(sprintf("%-10s%-20f%-20f","",int[1],int[2]),"\n\n")
}
}#end predict.lmGC
#' @title Print Function for GC Linear Regression
#' @description Utility print function
#' @keywords internal
#'
#' @rdname print.GClm
#' @method print GClm
#' @usage
#' \S3method{print}{GClm}(x,...)
#' @param x an object of class GClm
#' @param \ldots ignored
#' @return graphical output and output to console
#' @export
#' @author Homer White \email{hwhite0@@georgetowncollege.edu}
print.GClm <-function(x,...) {
GClm <- x
coefs <- GClm$coefficients
exp <- GClm$exp
resp <- GClm$resp
respname <- GClm$respname
expname <- GClm$expname
cat("\n")
cat("\tLinear Regression\n\n")
cat("Correlation coefficient r = ",signif(cor(GClm$exp,GClm$resp,use="na.or.complete"),4),"\n\n")
cat("Equation of Regression Line:\n\n")
cat("\t",respname,"=",round(coefs[1],4),"+",round(coefs[2],4),"*",
expname,"\n")
cat("\n")
cat("Residual Standard Error:\ts =",round(GClm$resid.sterr,4),"\n")
cat("R^2 (unadjusted):\t\tR^2 =",round(GClm$r.squared,4),"\n")
#make data frame with complete cases to suppress warnings in ggplot RE missing data
df <- data.frame(GClm$exp,GClm$resp)
names(df) <- c(expname,respname)
df <- df[complete.cases(df),]
if (GClm$graph && !GClm$check) {
title <- paste0("Scatterplot with linear fit")
p1 <- ggplot2::ggplot(df,ggplot2::aes_string(x=expname,y=respname))+
ggplot2::ggtitle(title)+
ggplot2::geom_point()+
ggplot2::stat_smooth(method = "lm",size = 1,se=FALSE)+
ggplot2::xlab(expname)+ggplot2::ylab(respname)
suppressWarnings(print(p1)) #suppress just in case Hadley has more friendly advice
}
if (GClm$check) {
if (length(GClm$exp) < 1000) method <- "loess" else method <- "gam"
title <- paste0("Checking the Model Fit\n(Model is blue; ",method,
" curve is red;\n95%-confidence band for curve included)")
p1 <- ggplot2::ggplot(df,
ggplot2::aes(x=as.symbol(expname),y=as.symbol(respname)))+
ggplot2::ggtitle(title)+
ggplot2::geom_point()+
ggplot2::stat_smooth(method = "lm", size = 1,se=FALSE)+
ggplot2::xlab(expname)+ggplot2::ylab(respname) +
ggplot2::stat_smooth(method=method,color="red",size=1,se=TRUE)
suppressWarnings(print(p1))
}
}#end print.GClm
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.