R/lm_pool.R

#' Linear model function
#'
#' A function to run fast linear model
#'
#' @param data a data.table class dataset for modeling.
#' @param dep a character of dependent variable name.
#' @param actual a character of actual variable name.
#' @param w a character of weight variable name.
#' @param date.start a character of modeling start date in "MM-DD-YYYY".
#' @param date.end a character of modeling start date in "MM-DD-YYYY".
#' @param date.var a character of date variable name.
#' @param group a character of cross section variable name.
#' @param ind a character vector containing independent variables.
#' @param wols a boolean for weighted OLS or not. Default is F
#' @param is.int a boolean for intercept. Default value is True.
#'
#' @return a list of modeling result. It contains the following components:
#'    coef: a matrix with coefficients and statistics.
#'    fit: a list containing modeling result of each cross section.
#'    group: a character of cross section name.
#'    dep: a character of dependent variable name.
#'    actual: a character of actual variable name.
#'    w: a character of weight variable name.
#'    date.start: a character of modeling start date in "MM-DD-YYYY".
#'    date.end: a character of modeling start date in "MM-DD-YYYY".
#'    date.var: a character of date variable name.
#'
#' @export
lm_pool=function(dep,ind,date.start,date.end,date.var,actual,w,data,group,wols=F,is.int=T){
  require(data.table);require(RcppEigen)
  #############################################################
  # lm pool; output coef.pool,t-test,fit
  # data format: data.table class; only group, y,acutual,independents and date variable
  #############################################################
  index=data[[date.var]]>=date.start & data[[date.var]]<=date.end
  X=data[index,ind,with=F]
  if(is.int) X=as.matrix(data.table(int=rep(1,nrow(X)),X)) else X=as.matrix(X)
  y=as.matrix(data[index,dep,with=F])

  if(wols) fit=lm.wfit(X,y,w=data[[w]][index]) else fit=fastLmPure(X,y)
  
  coef=fit$coefficients
  if (wols) fit$se=sqrt(diag(solve(t(X) %*% diag(data[[w]][index]) %*% X)*sum(fit$residuals^2*data[[w]][index])/fit$df.residual))
  t.score=coef/fit$se
  p.value=2*pt(abs(t.score),fit$df.residual,lower.tail = F)
  coef[is.na(coef)]=0
  coef.pool=matrix(coef,nc=1,dimnames = list(names(coef),"pool"))
  p.value.table=matrix(c(t.score,p.value),nc=2,nrow=length(p.value),dimnames = list(names(p.value),c("t.score","p.value")))
  coef.pool=cbind(coef.pool,p.value.table)
  return(list(coef=coef.pool,fit=fit,date.start=date.start,date.end=date.end,actual=actual,
              date.var=date.var,dep=dep,w=w,group=group))
}
xinzhou1023/shrinkest documentation built on May 4, 2019, 1:07 p.m.