R/bonus.R

#' ridge regression
#'
#' @field regco: regression coefficient (beta with a hat)
#' @field fitted_value: fitted value (y with a hat)
#' @field residuals : residuals (e with a hat)
#' @field deg_of_fredom : degree of fredom
#' @field resi_var: variance of residuals
#' @field regco_var: variance of regression coefficient
#' @field t_regco: t value of regression coefficient
#' @field object: input formula
#' @field data : input dataframe(the data reference of formula)
#' @field dataname: name of dataframe
#' @field fname : string of formula(without bracket)
#' @field mat1: created by model.matrix()
#' @field y: true value in dataframe not estimated value
#'
#' @description Function "visualize_airport_delays" is implemented in this class.
#'              To call this function,first need to build an object of this class
#'              r1 <- ridgereg$new(Petal.Length~Species, data = iris, lambda = 1)
#'              r1$visualize_airport_delays().
#' @import methods
#' @import dplyr
#' @importFrom ggplot2 ggplot
#' @importFrom plyr is.formula
#' @import nycflights13
#'
#' @export ridgereg
#' @exportClass ridgereg

ridgereg<-setRefClass("ridgereg",
                      fields = list(regco = "matrix",fitted_value="matrix",residuals = "matrix",
                                    deg_of_fredom = "numeric",resi_var = "numeric",regco_var = "matrix",
                                    t_regco = "matrix", object = "formula", data = "data.frame",
                                    dataname = "character",fname = "character",mat1 = "matrix",y = "numeric"
                      ),
                      method = list(
                        initialize = function(object,data,lambda)
                        {
                          if(!plyr::is.formula(object)|!is.data.frame(data)||!is.numeric(lambda)||!length(lambda)==1)
                            stop()
                          fname <<- Reduce(paste, deparse(object))
                          dataname <<- deparse(substitute(data))
                          object <<- object
                          data <<- data
                          # modify data by applying (x - mean(x))/variance(x)
                          mat1 <<- model.matrix(object,data)
                          iden <- diag(length(mat1[1,]))
                          namey <- all.vars(object)[1]
                          y <<- data[,namey]
                          regco <<- solve(t(mat1)%*%mat1+lambda*iden)%*%t(mat1)%*%y
                          fitted_value <<- mat1%*%regco
                          residuals <<- y - fitted_value
                          deg_of_fredom <<- length(y) - length(all.vars(object))
                          resi_var <<- (t(residuals)%*%residuals)[1,1]/deg_of_fredom
                          regco_var <<- resi_var*solve(t(mat1)%*%mat1)
                          t_regco <<- regco/sqrt(diag(regco_var))
                        },

                        modify = function(mat)
                        {
                          for(i in 1:length(mat[1,]))
                          {
                            if(!is.numeric(mat[1,i]))
                              next()
                            mean_value <- sum(mat[,i])/length(mat[,i])
                            mat[,i] <-mat[,i] - mean_value
                            sum2 <- 0
                            sum2 <- sum2 + (t(mat[,i])%*%mat[,i])[1,1]
                            sum2 <- sum2/length(mat[,i])
                            mat[,i] <- mat[,i]/sqrt(sum2)
                          }
                          return(mat)
                        },

                        print = function()
                        {
                          should_print <- paste0('ridgereg(formula = ',fname)
                          should_print <- paste0(should_print,', data = ')
                          should_print <- paste0(should_print,dataname)
                          should_print <- paste0(should_print,')')
                          base::print(should_print)
                          base::print(regco[,1])
                        },

                        QR = function()
                        {
                          matQ <- qr.Q(qr(mat1))
                          matR <- qr.R(qr(mat1))
                          regco <<- solve(matR)%*%t(matQ)%*%y
                          regco_var <<- resi_var*solve(matR)%*%t(solve(matR))
                        },

                        pred = function()
                        {
                          return(fitted_value)
                        },

                        coef = function()
                        {
                          vec <- as.vector(regco)
                          names(vec)<-row.names(regco)
                          return(vec)
                        },
                        visualize_airport_delays = function()
                        {
                          library(nycflights13)
                          name_of_airp <- unique(flights$origin)
                          df <-data.frame(faa = name_of_airp)
                          delay <-c()
                          for(i in 1:length(name_of_airp))
                          {
                            flight <- flights[which(!is.na(flights$dep_delay)),]
                            c1 <- which(flight$origin == name_of_airp[i])
                            delay_mean <- sum(flight$dep_delay[c1])/length(c1)
                            if(!is.infinite(delay_mean))
                              delay <- c(delay,delay_mean)
                            else
                              delay <- c(delay,NA)
                          }
                          df <- transform(df,delay = delay)
                          flight<-dplyr::left_join(airports,df,by="faa")
                          flight <- flight[which(!is.na(flight$delay)),]
                          flight <-data.frame(name = flight$name,lat = flight$lat,lon = flight$lon,delay = flight$delay)

                          p2<-ggplot2::ggplot(data = flight,ggplot2::aes(x = flight[,2],y = flight[,3]))+
                            ggplot2::geom_point(shape=21, size=3, color = 'red')+
                            ggplot2::labs(x ='lon', y ='lat')+
                            ggplot2::theme_classic(base_size = 16)+
                            ggrepel::geom_text_repel(ggplot2::aes(flight[,2],flight[,3],label = paste0("airport: ",name," delay:",round(delay,2))))
                          base::print(p2)
                        }

                      )
)
WuhaoStatistic/bonuslab documentation built on Dec. 18, 2021, 7:20 p.m.