R/linreg.R

#' linreg
#' Implements multiple linear regression.
#' @param formula a formula for linear regression.
#' @param data data set to be used.
#' @return different statistics using \code{formula} and \code{data} .
#' @examples
#' lr <- linreg$new(Petal.Length~Species, data = iris)
#' lr$print()
#' lr$plot()
#' lr$resid()
#' lr$coef()
#' lr$summary()
#' @export linreg
linreg <- setRefClass("linreg", fields =  list(dfname="character",formulaNames = "vector",formula="formula",data="data.frame",
  regCoefficients = "numeric",
  fittedVals = "numeric",
  residualVals = "numeric",
  df = "integer",
  residualVariance = "numeric",
  varRegCoefficeints = "numeric",
  tvals = "numeric",
  pvals = "numeric"
),

methods = list(
  
  initialize = function(formula,data){
    
    # https://economictheoryblog.com/2016/02/20/rebuild-ols-estimator-manually-in-r/
    
    formula <<- formula
    data <<- data
    
    '%ni%' <- Negate("%in%")
    X <- model.matrix(formula,data) # Creating design matrix
    y <- as.matrix(data[all.vars(formula)[1]]) # dependent variable
    dfname <<- as.character(substitute(data))
    # Calculating statistics
    
    ## Regression coefficients ((X'X)^(-1))X'y
    betacoeff = solve( t(X) %*% X) %*% t(X) %*% y
    beta <- as.vector(betacoeff)
    #betacoeff = t(betacoeff)
    regCoefficients <<- beta
    formulaNames <<- as.vector(colnames(t(betacoeff)))
    
    ## Fitted values
    fit <- as.vector(X %*% beta)
    fittedVals <<- fit
    
    ## Residuals
    resid <- as.vector(y - fit)
    residualVals <<- resid
    
    ## Degree of freedom (n-p)
    n <- nrow(X)
    p <- ncol(X)
    dfree <- n- p
    df <<- dfree
    
    ## Residual Variance
    resvar <- as.vector((t(resid) %*% resid)/dfree)
    residualVariance <<- resvar
    
    ## Variance of the regression coefficients
    varregcoeff <- resvar * as.vector(diag(solve( t(X) %*% X )))
    varRegCoefficeints <<- varregcoeff
    
    ## t-values for each coefficient
    #tval <- diag(beta / (var(beta))
    tval <- diag(beta / sqrt(diag(varRegCoefficeints)))
    tvals <<- tval
    
    
    ## Calculating p-values
    pval <- 2*pt(-abs(tval),df=dfree)
    pvals <<- pval
  },
  
  print = function(){
    cat("Call:\n")
    cat("linreg(formula = ",as.character(formula[2])," ~ ",as.character(formula[3])," ,data = ",dfname,")")
    prdf <- as.data.frame(t(regCoefficients))
    colnames(prdf) <-formulaNames
    cat("\n\nCoefficients:\n")
    print.data.frame(as.data.frame(prdf),row.names = FALSE)
  },
  
  plot = function(){
    
    require(ggplot2)
    require(gridExtra)
    require(png)
    require(grid)
    require(RCurl)
    
    # bulding theme
    logo <- readPNG(getURLContent('http://framtidensforskning.se/wp-content/uploads/2018/06/liu_primary_black-530x157.png'))
    logo <- rasterGrob(logo)
  
    
    theme_liu <- function () { 
      theme_bw(base_size=9) %+replace% 
        theme(
          axis.text = element_text(colour = "white"),
          axis.ticks = element_line(color="white",size = 2),
          panel.background  = element_blank(),
          plot.title = element_text(color="white",size = rel(2)),
          axis.title.y = element_text(color="white",size = rel(1.5), angle = 90),
          axis.title.x = element_text(color="white",size = rel(1.5)),
          plot.background = element_rect(fill="lightblue", colour="lightblue"),
          legend.background = element_rect(fill="transparent", colour=NA),
          legend.key = element_rect(fill="transparent", colour=NA),
          panel.border = element_rect(linetype="dashed" ,fill = NA, colour = "white"),
          panel.grid.major = element_line(size = 0.50, linetype = 'solid',
                                          colour = "white"), 
          panel.grid.minor = element_line(size = 0.50, linetype = 'solid',
                                          colour = "white")
          
        )
    }
    
    
    fitres <- data.frame(fittedVals,residualVals)
    # Plot fit vs residuals
    p1 <- ggplot(data = fitres,aes(x=fittedVals,y=residualVals))+geom_point()+geom_smooth(method = "loess", formula = y ~ x,se=FALSE) + 
      xlab("Fitted Values") + ylab("Residuals") + ggtitle("Residuals vs. Fitted") +theme_liu()
    # Plot fit vs standardize residuals
    p2 <- ggplot(data=fitres,aes(x=fittedVals,y=sqrt(abs(residualVals))))+geom_point()+
      geom_smooth(method = "loess", formula = y ~ x,se=FALSE) + 
      xlab("Fitted Values") + ylab(expression(paste(sqrt("Standardized residuals")))) + ggtitle("Scale−Location") +theme_liu()
    
    grid.arrange(logo,p1, p2)
  },
  
  resid = function(){
    return(residualVals)
  },
  
  pred = function(){
    return(fittedVals)
  },
  
  coef = function(){
    return(regCoefficients)
  },
  
  summary = function(){
    cat("Call:\n")
    cat("linreg(formula",deparse(formula), "data=",dfname,")")
    cat("\nCoefficients:\n")
    std.error <- sqrt(varRegCoefficeints)
    summary.data <- cbind(Estimate =regCoefficients  ,Std.Error = std.error , t.value = tvals, p.value = pvals)
    rownames(summary.data) <- formulaNames
    cols <- colnames(summary.data)[1:3]
    summary.data[,cols] = round(summary.data[,cols],4)
    estim <- "***"
    summary.data <- cbind(summary.data,estim)
    print.data.frame(as.data.frame(summary.data))
    cat("\nResidual standard error: ")
    sd.res <- sqrt(residualVariance)
    cat(round(sd.res,4), "on", df, "degrees of freedom\n")
  }
  
)
)
a <-linreg$new(Petal.Length~Species, data = iris)
a$plot()
obiii/Lab4 documentation built on May 15, 2019, 1:18 p.m.