R/lm_remove.R

#' Method Remove function
#'
#' Ninah linear model selection method.
#'
#' @param data a data.table class dataset for modeling.
#' @param dep a character of dependent variable name.
#' @param actual a character of actual variable name.
#' @param w a character of weight variable name.
#' @param date.start a character of modeling start date in "MM-DD-YYYY".
#' @param date.end a character of modeling start date in "MM-DD-YYYY".
#' @param date.var a character of date variable name.
#' @param group a character of cross section variable name.
#' @param var.fix a character vector containing independent variables which don't need be tested.
#' @param var.test var.fix a character vector containing independent variables to be tested.
#' @param is.multithread TRUE turns on parallel computation. Suggest to use it when data is large.
#' @param spec a character vector specifying the cluster spec for parallel computation. For example, spec=rep("localhost",4). Need is.multithread to be TRUE.
#' @param wols a boolean vector for weighted OLS
#' @param is.int a boolean for intercept. Default value is True.
#'
#' @return a list of model seletion result. It contains the following components:
#'    t.test: a data.table with coefficients and statistics.
#'    fit: a list containing modeling result of each testing model.
#'
#' @export
lm_remove=function(dep,date.start,date.end,date.var,actual,w,data,group,var.fix,var.test,is.multithread=F,spec=NULL,wols=F,is.int=T){
  ##########################################################
  # output t.test table and all the fits
#   data=fread("sample data.csv")
#   dep="y" # character of dependent
#   # fixed variables
#   var.fix=c(
#     "Card_DM_465i",
#     "Checking_Promotion",
#     "csCHECKING_DDAxx50i",
#     "csciCHECKING_DDA_compxx50i",
#     "Existing_Home_Sales_USxxi",
#     "hol"
#   )
#   # variables to be tested
#   var.test=c(
#     "hmucsrttvxx_ixADShl30hrf30_i",
#     "hmucsrtinxx_ixADShl30hrf30_i",
#     "hmucsrtprxx_ixADShl30hrf30_i",
#     "hmucsrtotherxx_ixADShl30hrf15_i",
#     "hmucbGenttoxx_ixADShl3hrf5_i",
#     "hmucbSoluttvxx_ixADShl6hrf15_i",
#     "hmucbSolutdpxx_ixADShl10hrf30_i",
#     "hmucbSolutsrxx_ixADShl0hrf5_i",
#     "hmucbSolu_Other_ixADShl10hrf30_i",
#     "hmucbASavttoxx_ixADShl30hrf15_i",
#     "hmucbAmerttvxx_ixADShl0hrf5_i",
#     "hmucbAmertdpxx_ixADShl0hrf5_i",
#     "hmucbAmertsrxx_ixADShl0hrf5_i",
#     "hmucbAmer_Other_ixADShl3hrf5_i"
#   )
#   is.multithread=F # T if do multithread computation
#   spec=rep("localhost",4) # default NULL
  ##########################################################
  library(data.table)
  library(doSNOW)
  if (length(var.test)==0) stop("Error: var.test cannot be empty.") else {
    comb <- function(x, ...) {
      lapply(seq_along(x),
             function(i) c(x[[i]], lapply(list(...), function(y) y[[i]])))
    }
    if (is.multithread){
      if (is.null(spec)) stop("Error: spec cannot be null if is.multithread is True.") else {
        cl=makeCluster(spec,type="SOCK",outfile="")
        registerDoSNOW(cl)
        test.result=foreach(i=1:length(var.test),.multicombine = T,.combine = "comb",.init=list(list(), list()),
                            .packages=c("data.table","RcppEigen")) %dopar% {
                              var.temp=c(var.fix,var.test[i])
                              fit=lm_pool(dep=dep,ind=var.temp,date.start=date.start,date.end=date.end,
                                          date.var=date.var,actual=actual,w=w,data=data,group=group,wols = wols,is.int=is.int)
                              t.test=as.data.table(cbind(fit$coef,fit$t.test),keep.rownames = T)[rn==var.test[i]]
                              list(t.test,fit)
                            }
        stopCluster(cl)
      }
    }else {
      test.result=foreach(i=1:length(var.test),.multicombine = T,.combine = "comb",.init=list(list(), list()),
                          .packages=c("data.table","RcppEigen")) %do% {
                            var.temp=c(var.fix,var.test[i])
                            fit=lm_pool(dep=dep,ind=var.temp,date.start=date.start,date.end=date.end,
                                        date.var=date.var,actual=actual,w=w,data=data,group=group,wols = wols,is.int=is.int)
                            t.test=as.data.table(cbind(fit$coef,fit$t.test),keep.rownames = T)[rn==var.test[i]]
                            list(t.test,fit)
                          }
    }
    t.test=rbindlist(test.result[[1]])
    fit=test.result[[2]]
    return(list(t.test=t.test,fit=fit))
  }
}
xinzhou1023/shrinkest documentation built on May 4, 2019, 1:07 p.m.