In this lab we have created a package to handle linear regression models. We will use linear algebra class to create the most basic functionality in the R package. We have also implement an object oriented system
to handle special functions such as
linreg()
It take 2 parameter as input
'linreg(formula, data)'
formula = Petal.Length ~ Species
)
left is dependent variable stored in variable in Y
and right is independent Variable which is stored as matrix in X
data = iris or faithful
)Regression coefficients (betaCoff
) : $\hat{\beta}= (X^TX)^{-1}X^Ty$
The fitted values (fittedValues
) : $\hat{y}= X\hat{\beta}$
The residuals (residualsVal
) : $\hat{e}= y-X\hat{\beta}$
The degree of freedom (degree_f
) : $df=n-p$ , where $n$ is the number of observations and $p$ is the number of parameters in the model
The residual variance (residualsVariance
) : $\hat{\sigma}^2=\frac{e^Te}{df}$
The variance of the regression coefficients (regression_coff
) : $\hat{Var}(\hat{\beta})=\hat{\sigma}^2(X^TX)^{-1}) $
The t-values for each coefficient (tValues
) :$t_\beta=\frac{\hat{\beta}}{\sqrt{Var(\hat{\beta})}}$
The p-values for each coefficient (pValue
) : $P(|>t|)$ the probability of each coefficient to be equal to 0
data("iris")
or
data("faithful")
require(ggplot2) require(magick) require(grid) linreg <- setRefClass("linreg", fields = list(formula= "formula", data = "data.frame", betaCoff="numeric", fittedValues="numeric", residualsVal="numeric",residualsVariance="matrix", dataSetName="character",tValues="numeric",pValue="numeric",regression_coff="numeric", degree_f="numeric" ), methods = list( initialize = function(formula, data) { #This function acts as constructor formula <<- formula data <<- data X <- model.matrix(formula,data) Y <- data[[(all.vars(formula)[1])]] QR <- qr(X) betaCoff <<- qr.coef(QR,Y) #This variable stores the calculated value of beta coefficient fittedValues <<- qr.fitted(QR,Y) #This variable stores the calculated value of fitted value residualsVal <<- qr.resid(QR,Y) #This variable store the calculated calculated value residual value degree_f <<- nrow(X) - ncol(X) #This variable stores the calculated value of degree of freedom residualsVariance <<- ( t(residualsVal) %*% residualsVal) / (degree_f) #This variable stores the calculated value of residual variance regression_coff <<- as.numeric(residualsVariance) * diag(solve((t(X) %*% X))) #This variable stores the calculated value of variance of the regression coefficients #tvalues tValues <<- diag(betaCoff / sqrt(diag(regression_coff))) #Calculates and stores and t-value #p values pValue <<- 2*pt(-abs(tValues),df=nrow(X)-ncol(X)) #Calculates and stores P-Value dataSetName <<- deparse(substitute(data)) #Stores the name of dataset being used }, print = function() { "This function prints the formula and dataset name as well as the calculated coefficients" r_name <- rownames(as.data.frame(betaCoff)) cat("Call:") cat("\n") formula_print<- paste0("linreg(","formula = ",formula[2]," ",formula[1]," ",formula[3],", ","data = ",dataSetName,")",sep="") cat(formula_print) cat("\n") cat("\n") cat("Coefficients:") cat("\n") cat(" ") cat(r_name) cat(" ") cat("\n") cat(betaCoff) }, plot = function() { logo <- image_read('http://www.ida.liu.se/mall11/images/logo-sv.png') img<- rasterGrob(logo,interpolate=TRUE) #liu-theme theme_liu <- function() { theme_bw() + theme(plot.background = element_rect(size = 1, color = "#ccf0fa", fill = "#0cc7d3"), text=element_text(size = 15, family = "serif", color = "white",face = "bold"), plot.title = element_text(size=20,margin = margin(t=5,b=10),hjust = 0.5), axis.text.y = element_text(colour = "white"), axis.text.x = element_text(colour = "white"), panel.background = element_rect(fill = "#ccf5f0") ) } "This function plots the graphs for fitted values" phras<- paste("lm(",format(formula),")") stand_res <- sqrt(abs((residualsVal-mean(residualsVal))/sqrt(var(residualsVal)))) plot1<-ggplot(data.frame(fittedValues,residualsVal),aes(y=residualsVal,x=fittedValues))+geom_point(shape=1,size=3)+xlab(paste("Fitted values", phras, sep = "\n"))+ ylab("Residuals")+ggtitle("Residuals vs Fitted") +geom_hline(yintercept=0, linetype="dashed")+geom_smooth(span = 1.5,colour="red",method="loess",se=FALSE)+annotation_custom(img, xmin = 1.4, xmax = 2.4, ymin = 0.8, ymax = 1.4)+theme_liu() plot2<-ggplot(data.frame(fittedValues,stand_res),aes(y=stand_res,x=fittedValues))+geom_point(shape=1)+xlab(paste("Fitted values",phras, sep = "\n"))+ ylab(expression(sqrt(abs("Standardized residuals")))) + ggtitle("Scale-Location")+geom_smooth(span = 1.5,colour="red",method="loess",se=FALSE)+annotation_custom(img, xmin = 1.4, xmax = 2.4, ymin = 1.4, ymax = 1.9)+theme_liu() return(list("plot1" = plot1,"plot2" = plot2)) }, resid = function() { "This function returns the vector of calculted residual values" return(residualsVal) }, pred = function() { "This function returns the vector of calculated fitted values" return(fittedValues) }, coef = function() { "This function returns the vector of beta coefficients " return (betaCoff) }, summary = function() { "This function returns the summary of linear regression" r_name <- rownames(as.data.frame(betaCoff)) cat("Call:") cat("\n") formula_print<- paste0("linreg(","formula = ",formula[2]," ",formula[1]," ",formula[3],", ","data = ",dataSetName,")",sep="") cat(formula_print) cat("\n") cat("\n") cat("Coefficients:") cat("\n") for(j in 1:length(r_name)) { cat(paste( r_name[j] ,as.numeric(round(betaCoff[j],4) ) ,round(sqrt(as.numeric(regression_coff[j])),3), round(tValues[j],3) , round(pValue[j],3), "***" ) ) cat("\n") } cat("\n") cat(paste("Residual standard error: ",round(sqrt(residualsVariance),5) ," on " ,degree_f, " degrees of freedom",sep="")) } ) ) data(iris) l <- linreg(formula = Petal.Length ~ Species, data = iris)
data("iris")
or
data("faithful")
formula <- Petal.Length ~ Species data <- iris l <- linreg(formula , data) l # l is an object
resid()
l$resid() #return the vector of residuals e.
l$pred() #return the predicted values ^y
coef()
l$coef() #return the coefficients as a named vector.
summary()
return a similar printout as printed for 'summary(lm(Petal.Length~Species, data = iris))' objects
l$summary()
print()
It should print out the coefficients and coefficient names
, similar as done by the print(lm(Petal.Length~Species, data = iris))
class.
l$print()
plot()
plot the graph using ggplot2
l$plot()
Repo link
"Here you can find a private repo link which will be public soon" (Rcourse-Lab4)
rabnsh696@student.liu.se
or samza595@student.liu.se
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.