R/lm_loop.R

#' Linear model loop function
#'
#' A function to run linear model by cross section
#'
#' @param data a data.table class dataset for modeling.
#' @param dep a character of dependent variable name.
#' @param actual a character of actual variable name.
#' @param w a character of weight variable name.
#' @param date.start a character of modeling start date in "MM-DD-YYYY".
#' @param date.end a character of modeling start date in "MM-DD-YYYY".
#' @param date.var a character of date variable name.
#' @param group a character of cross section variable name.
#' @param ind a character vector containing independent variables.
#' @param is.multithread TRUE turns on parallel computation. Suggest to use it when data is large.
#' @param spec a character vector specifying the cluster spec for parallel computation. For example, spec=rep("localhost",4). Need is.multithread to be TRUE.
#' @param wols a boolean for weighted OLS
#' @param is.int a boolean for intercept. Default value is True.
#'
#' @return a list of modeling result. It contains the following components:
#'    coef: a matrix with coefficients and statistics.
#'    fit: a list containing modeling result of each cross section.
#'    group: a character of cross section name.
#'
#' @export
lm_loop=function(dep,ind,date.start,date.end,date.var,actual,w,data,group,is.multithread=F,spec=NULL,wols=F,is.int=T){
  require(data.table);require(RcppEigen);require(doSNOW)
  ##################################################################################################
  # lm loop part; output coef.matrix (coef by cross section)
  # para
  #   dep="y" # character of dependent
  #   group="group" # character of cross section name in data dataset
  #   is.multithread=T # T if do multithread computation
  #   spec=rep("localhost",4) # default NULL
  #   data=fread("sample data.csv") # data format: data.table class; only group, y, independents
  ##################################################################################################
  comb <- function(x, ...) {
    lapply(seq_along(x),
           function(i) c(x[[i]], lapply(list(...), function(y) y[[i]])))
  }
  loop=unique(data[[group]],by=NULL)
  if (is.multithread){
    if (is.null(spec)) stop("Error: spec cannot be null if is.multithread is True.") else{
      cl=makeCluster(spec,type="SOCK",outfile="")
      registerDoSNOW(cl)
      temp.result=foreach(i=1:length(loop),.multicombine = T,.combine = "comb",.init = list(list(),list(),list()),
                          .packages=c("data.table","RcppEigen")) %dopar%
                          {
                            print(paste(i," Cross section: ",loop[i],sep=""))
                            index=(1:nrow(data))[data[[group]]==loop[i]]
                            temp=data[index]
                            fit=lm_pool(dep=dep,ind=ind,date.start=date.start,date.end=date.end,
                                        date.var=date.var,actual=actual,w=w,data=temp,group=group,wols = wols,is.int=is.int)
                            coef=fit$fit$coefficients
                            coef[is.na(coef)]=0
                            list(matrix(coef,nc=1,dimnames = list(names(coef),loop[i])),fit,group=loop[i])
                          }
      stopCluster(cl)
    }
  }else{
    temp.result=foreach(i=1:length(loop),.multicombine = T,.combine = "comb",.init = list(list(),list(),list()),
                        .packages=c("data.table","RcppEigen")) %do%
                        {
                          print(paste(i," Cross section: ",loop[i],sep=""))
                          index=(1:nrow(data))[data[[group]]==loop[i]]
                          temp=data[index]
                          fit=lm_pool(dep=dep,ind=ind,date.start=date.start,date.end=date.end,
                                      date.var=date.var,actual=actual,w=w,data=temp,group=group,wols = wols,is.int=is.int)
                          coef=fit$fit$coefficients
                          coef[is.na(coef)]=0
                          list(matrix(coef,nc=1,dimnames = list(names(coef),loop[i])),fit,group=loop[i])
                        }
  }
  coef.matrix=do.call("cbind",temp.result[[1]])
  fit=temp.result[[2]]
  return(list(coef=coef.matrix,fit=fit,group=temp.result[[3]]))
}
xinzhou1023/shrinkest documentation built on May 4, 2019, 1:07 p.m.