knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

This package contains one function of a class RC and the prime function for this is to calculate a linear regression.

The function, class and methods

The function and class, linreg and the function can take two arguments, formula and data. The function has serveral methods connected to it and if you refer to them they will do different tasks. Such as:

These methods need be "fed" with som information on what to do. The function new(...) can insert what you want in to the class. How to refer to this method for a RC class you need to use a $ sign in the context of linreg$new(formula, data). To refer to a methods that you have created, and to use the information you have put in to the new(...) function, you have to put in, for example linreg$new(formula, data)$print(). This will print out the information about the linear regression according to the print() method.

Plots

With the linreg$new(formula, data)$plot() method you will get the two plots shown here:

data(iris)

########### Kod ########## 
linreg<-setRefClass("linreg", fields = list(formula="formula", 
                                            data="data.frame",
                                            Call="character",
                                            Coefficients="numeric",
                                            Fits="numeric",
                                            Residuals="numeric",
                                            df="numeric",
                                            Var_residuals="numeric",
                                            Std_betas="numeric",
                                            tBetas="numeric",
                                            Pvalues="numeric"),
                    methods = list(

                      initialize = function(formula,data){

                        Call[1] <<-deparse(substitute(data))
                        Call[2] <<-Reduce(paste,deparse(formula))

                        formula<<-formula
                        data<<-data

                        ########### Beräkningar ########### 
                        x<-model.matrix(formula,data)
                        y<-all.vars(formula)[1]
                        y<-as.matrix(data[,names(data)==y])

                        b_hat<-solve(t(x)%*%x)%*%t(x)%*%y
                        y_fits<-x%*%b_hat
                        e<-y-y_fits
                        df1<-length(y)-ncol(x)
                        var_e<-(t(e)%*%e)/df1

                        var_b_hat<-as.numeric(var_e)*diag(solve((t(x)%*%x)))
                        std_b_hat<-sqrt(var_b_hat)
                        t_b_hat<-as.numeric(b_hat)/std_b_hat
                        p_b_hat<-(1-(pt(abs(t_b_hat),df = df1)))*2

                        b_hat_numeric<-as.numeric(b_hat)
                        names(b_hat_numeric)<-rownames(b_hat)



                        input_var<-as.character(match.call(expand.dots = FALSE))
                        ########### Spara beräkningar ###########
                        Coefficients<<-b_hat_numeric
                        Fits<<-as.numeric(y_fits)
                        Residuals<<-as.numeric(e)
                        df<<-df1
                        Var_residuals<<-as.numeric(var_e)
                        Std_betas<<-std_b_hat
                        tBetas<<-t_b_hat
                        Pvalues<<-p_b_hat
                        ###########




                      },

                      pred = function(){
                        "Give you the predicted values as a numeric vector"

                        svar<-Fits
                        names(svar)<-1:length(svar)
                        return(svar)
                      },

                      print = function() {
                        "Give you a nice view of the calculation"


                        Dataset<-Call[1]
                        Formula<-Call[2]
                        Formula<-paste("linreg(formula = ",Formula,", data = ",Dataset,")",sep="")
                        beta<-Coefficients
                        namn<-names(beta)
                        names(beta)<-NULL
                        beta<-round(beta,4)


                        for(i in 2:length(beta)){
                          beta[i]<-format(beta[i], width=max(nchar(beta[i]),nchar(namn[i])),justify = c("right"))
                          namn[i]<-format(namn[i], width=max(nchar(beta[i]),nchar(namn[i])),justify = c("right"))
                        }

                        beta[1]<-format(beta[1], width=max(nchar(beta[1]),nchar(namn[1]),nchar("Coefficients")),justify = c("right"))
                        namn[1]<-format(namn[1], width=max(nchar(beta[1]),nchar(namn[1]),nchar("Coefficients")),justify = c("right"))

                        beta[1]<-paste(beta[1]," ",sep="")
                        namn[1]<-paste(namn[1]," ",sep="")

                        cat("Call:",sep="\n")
                        cat(Formula, sep="\n")
                        cat(sep="\n")
                        cat("Coefficients:",sep="\n")
                        cat(paste(namn,collapse = "  "),sep="",collapse="\n")
                        cat(paste(beta,collapse = "  "),sep="",collapse="\n")
                      },

                      summary = function(){
                        "Give you a nice summary of the calculation"

                        ########## Samma som print #######
                        Dataset<-Call[1]
                        Formula<-Call[2]
                        Formula<-paste("linreg(formula = ",Formula,", data = ",Dataset,")",sep="")
                        beta<-Coefficients
                        namn<-names(beta)
                        names(beta)<-NULL
                        beta<-round(beta,4)


                        for(i in 1:length(beta)){
                          beta[i]<-format(beta[i], width=max(nchar(beta[i]),nchar(namn[i])),justify = c("right"))
                          namn[i]<-format(namn[i], width=max(nchar(beta[i]),nchar(namn[i])),justify = c("right"))
                        }
                        ##########

                        Variable<-as.character(names(Coefficients))
                        Estimate<-round(Coefficients,3)
                        Std_Error<-round(Std_betas,3)
                        t_value<-round(tBetas,3)
                        Pr_t<-round(Pvalues,5)

                        svar<-data.frame(Variable,Estimate,Std_Error,t_value,Pr_t)
                        row.names(svar)<-NULL
                        svar$Variable<-as.character(svar$Variable)


                        svar<-rbind(c(" ","Estimate","Std. Error","t value","Pr(>|t|)"),svar)

                        for(i in 1:length(svar)){
                          for(j in 1:nrow(svar)){
                            svar[j,i]<-format(svar[j,i], width=max(nchar(svar[,i])),justify = c("right"))
                          }
                        }

                        svar$p_stjarna<-""
                        svar$p_stjarna[svar$Pr_t<0.001]<-"***"
                        svar$p_stjarna[svar$Pr_t>0.001]<-"**"
                        svar$p_stjarna[svar$Pr_t>0.01]<-"*"
                        svar$p_stjarna[svar$Pr_t>0.05]<-"."
                        svar$p_stjarna[svar$Pr_t>0.1]<-" "
                        svar$p_stjarna<-format(svar$p_stjarna, width=max(nchar(svar$p_stjarna)),justify = c("right"))


                        cat("Call:",sep="\n")
                        cat(Formula, sep="\n")
                        cat(sep="\n")
                        cat("Coefficients:",sep="\n")
                        for(i in 1:nrow(svar)){
                          cat(paste(svar[i,],collapse = "   "),sep="",collapse="\n")
                        }
                        cat("",sep="\n")
                        cat(paste("Residual standard error: ",round(sqrt(Var_residuals),5) ," on " ,df, " degrees of freedom",sep=""))


                      },

                      resid = function(){
                        "Give you the residuals as a numeric vector"

                        svar<-Residuals
                        names(svar)<-1:length(svar)
                        return(svar)
                      },

                      coef = function(){
                        "Give you the coef values as a numeric vector"
                        svar<-Coefficients
                        return(svar)

                      },

                      plot = function() {

                        "Printing out two graps of residuals and standardized residuals vs. fitted values."
                        dataint <- data.frame(residual = Residuals, fits = Fits, std_residual = sqrt(abs(scale(Residuals))))

                        class(dataint$std_residual)

                        require(ggplot2)
                        test<-ksmooth(dataint$fits,dataint$residual,kernel = "normal",bandwidth=1)$y
                        #Residuals vs Fitted
                        pl_1 <- 
                          ggplot(data = dataint, aes(x = fits, y = residual) ) +
                          geom_point() +
                          labs(x = "Fitted values", y = "Residuals") +
                          geom_hline(yintercept = 0) + theme_bw() + ggtitle("Residuals vs Fitted") +
                          theme(plot.title = element_text(hjust = 0.5))

                        if(!all(sapply(data[,colnames(data) %in% all.vars(formula)[-1]],is.factor))){
                          pl_1 <- pl_1 + geom_smooth(method="loess", se=FALSE, color="red")
                        } else{
                          pl_1 <- pl_1 + stat_summary(fun.y=mean, colour="red", geom="line")
                        }




                        #Standardized residuals vs Fitted
                        pl_2 <- ggplot(data = dataint, aes(x = fits, y = std_residual) ) +
                          geom_point() + labs(x = "Fitted values", y = expression(sqrt("|Standardized residuals|"))) +
                          geom_hline(yintercept = 0) +
                          theme_bw() +
                          ggtitle("Standardized residuals vs Fitted") +
                          theme(plot.title = element_text(hjust = 0.5))

                        if(!all(sapply(data[,colnames(data) %in% all.vars(formula)[-1]],is.factor))){
                          pl_2 <- pl_2 + geom_smooth(method="loess", se=FALSE, color="red")
                        } else{
                          pl_2 <- pl_2 + stat_summary(fun.y=mean, colour="red", geom="line")
                        }

                        list(pl_1,pl_2)

                        # cat ("Press [enter] to continue")
                        # line <- readline()
                        # pl_1
                        # Sys.sleep(0.01)
                        # cat ("Press [enter] to continue")
                        # line <- readline()
                        # pl_2
                      }
                    )
)
plotting <- linreg$new(Petal.Length~Sepal.Width+Sepal.Length, data=iris)$plot()

plotting[[1]]; plotting[[2]]

Both plots was produced with the package ggplot2.



herwineric/Lab4_Albin_Eric documentation built on May 9, 2019, 5:24 p.m.