R/fct01_frameBuildingFuncs.R

Defines functions getRebDates getTS getTSR getTSR_decay getTSF

Documented in getRebDates getTS getTSF getTSR getTSR_decay

# ===================== xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ==============
# --------------------  get RebDates,TS,TSR,TSF,TSFR object ------------
# ===================== xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ==============

#' getRebDates
#'
#' get the rebalance dates(a \bold{RebDates} object) between begT and endT, and then shift it foreward if necessary.
#' @param begT the begin date of class Date
#' @param endT the end date of class Date
#' @param rebFreq the rebalancing freq. an interval specification, one of "day", "week", "month", "quarter" and "year", optionally preceded by an integer and a space, or followed by "s". (See \code{\link{cut.Date}} for detail.) Or a character string of "allspan". "allspan" means no rebalance during the whole period between begT and endT, which could be realized also by a very large interval specification, eg. "100 year".
#' @param shiftby a integer,how many days the rebalancing date be shifted foreward
#' @param dates a vector of class Date. 
#' @return a \bold{RebDates} object giving the rebalancing dates list,a vector with class of \code{Date}. If dates is not null, then return the dates themselves.
#' @author Ruifei.Yin
#' @export
#' @family frame building functions
#' @examples 
#' RebDates <- getRebDates(as.Date('2010-01-01'),as.Date('2012-06-30'),'3 month', 10)
#' getRebDates(as.Date('2010-01-01'),as.Date('2012-06-30'),'allspan')
#' getRebDates(as.Date('2010-01-01'),as.Date('2012-06-30'),'100 year')
#' getRebDates(dates=as.Date(c("2003-04-09","2003-07-06","2003-09-12")))
getRebDates <- function(begT,endT,rebFreq="month",shiftby=0, dates=NULL){
  if(!is.null(dates)){
    re <- trday.nearest(dates)
  } else {
    begT <- trday.nearest(begT)
    endT <- trday.nearest(endT)
    trdays <- trday.get(begT,endT)
    # cut by rebalancing frequency
    if(rebFreq != "allspan"){
      rebdays <- as.Date(unique(cut.Date2(trdays,rebFreq))) 
    } else {
      rebdays <- c(begT,endT)
    }   
    # add begT as begin point
    if(rebdays[1] != begT){
      rebdays <- c(begT,rebdays) 
    } 
    # shift the rebalancing dates foreward
    re <- trday.nearby(rebdays,shiftby)
  }
  
  return(re)
}




#' getTS
#'
#' get the 'time*stock frame'(a \bold{TS} object) given the rebalancing dates and the universe. The universe could be a index, a sector or a plate.
#' @param RebDates a \bold{RebDates} object. a vector,with class of Date,usually the rebalancing dates list
#' @param indexID a character string. The ID of the index, sector or plate. Could be a single-ID-code(eg. "EI000300","ES09440000",...) or a more complicated express containing some set operations and ID-codes(eg. "setdiff(union(EI000300,EI399006),ES09440000)"). See detail in \code{\link{getComps}}.
#' @param stocks a vector of stockIDs
#' @param rm could be one or more of "suspend","priceLimit".default is NULL
#' @return a \bold{TS} object. a dataframe,with cols:
#'   \itemize{
#'   \item date: the rebalance dates, with \code{Date} class
#'   \item stockID: the stockID 
#'   }.IF stocks is not null then return \code{expand.grid} of stocks and RebDates, else return components of index on RebDates. 
#' @author Ruifei.Yin
#' @seealso \code{\link{getComps}}
#' @export 
#' @family frame building functions
#' @examples
#' RebDates <- getRebDates(as.Date('2011-03-17'),as.Date('2012-04-17'),'month')
#' TS <- getTS(RebDates,'EI000300')
#' TS2 <- getTS(RebDates,'ES09440000')  # financial servive sector
#' TS3 <- getTS(RebDates,'setdiff(EI000300,ES09440000)')  # CSI300 ex. financial servive sector
#' TS4 <- getTS(RebDates,stocks=c("EQ000002","EQ000023","EQ600000"))
getTS <- function(RebDates,indexID="EI000300",stocks=NULL,rm=NULL){  
  RebDates <- trday.nearest(RebDates)
  if(!is.null(stocks)){
    TS <- expand.grid(date=RebDates,stockID=stocks)
  } else {
    TS <- getComps(ID=indexID,endT=RebDates,drop=FALSE) 
  }
  if("suspend" %in% rm){
    TS <- rm_suspend(TS, nearby = 1L)
  }
  if("priceLimit" %in% rm){
    TS <- rm_priceLimit(TS, nearby = 1L, lim = c(-Inf,10))
  }
  return (TS)
}




#' getTSR
#'
#' Get the \bold{TSR}('time-stock-rtn') or \bold{TSFR}('time-stock-factor-rtn') object by adding the period rtn between the rebalance date and the date_end(\strong{(close of "date_end")/(prevclose of the day after "date")-1)}) to the \bold{TS} or \bold{TSF} object.
#' @param TS a \bold{TS} or \bold{TSF} object (See detail in \code{\link{getTS}} and \code{\link{getTSF}})
#' @param dure a period object from package \code{lubridate}. (ie. \code{months(1),weeks(2)}. See example in \code{\link{trday.offset}}.) If null, then get periodrtn between \code{date} and the next \code{date}, else get periodrtn of '\code{dure}' starting from \code{date}.
#' @param date_end_pad a Date value, padding the NAs of the last date_end. if missing, calculated automatically.
#' @return given a \bold{TS} object,return a \bold{TSR} object,a dataframe containing cols:
#'   \itemize{ 
#'   \item stockID: the stockID contained in the index or BK
#'   \item date: the rebalance dates, with \code{Date} class 
#'   \item date_end: the next rebalance dates, with \code{Date} class
#'   \item periodrtn:the period rtn between the rebalance date and the date_end
#'   }
#'   given a \bold{TSF} object,return a \bold{TSFR} object,a dataframe containing cols:
#'   \itemize{ 
#'   \item stockID: the stockID contained in the index or BK
#'   \item date: the rebalance dates, with \code{Date} class
#'   \item factorscore:the factor score of the stocks on the  rebalancing date  
#'   \item date_end: the next rebalance dates, with \code{Date} class
#'   \item periodrtn:the period rtn between the rebalance date and the date_end
#'   }  
#' @author Ruifei.Yin
#' @export
#' @family frame building functions
#' @examples
#' ## -- get a TSR object
#' RebDates <- getRebDates(as.Date('2011-03-17'),as.Date('2012-04-17'),'month')
#' TS <- getTS(RebDates,'EI000300') 
#' TSR <- getTSR(TS) # rebalance properly joined
#' require(lubridate)
#' TSR2 <- getTSR(TS,weeks(2)); TSR3 <- getTSR(TS,months(2))  # rebalance not properly joined
#' ## -- get a TSFR object
#' factorFun <- "gf_demo"
#' factorPar <- list(0,1)
#' TSF <- getTSF(TS,factorFun,factorPar)
#' TSFR <-  getTSR(TSF)
getTSR <- function(TS, dure=NULL, date_end_pad){ 
  cat(paste("Function getTSR: getting the periodrtn ....\n"))
  check.TS(TS)
  # ---- Add the end date (and the decayed dates)
  date <- unique(TS$date)
  if (is.null(dure)){
    if(missing(date_end_pad)){ # calculate the padding value automatically
      date_end_pad <- trday.offset(max(date),by = lubridate::days(round(periodicity_Ndays(date))), dir = 1L)
    }
    date_end <- c(date[-1],date_end_pad)
  } else {
    date_end <- trday.offset(date,dure)
  }  
  merging <- data.frame(date,date_end)
  TS <- merge.x(TS,merging,by="date")  
  periodrtn <- data.frame(periodrtn=getPeriodrtn(renameCol(TS,c("date","date_end"),c("begT","endT")),drop=TRUE))
  TSR <- cbind(TS,periodrtn)
  return(TSR)
}


#' getTSR_decay
#' 
#' @param prdLists
#' @return a \bold{TSR} including the decayed period rtns
#' @export
getTSR_decay <- function(TS, prd_lists = list(w1=lubridate::weeks(1),
                                              w2=lubridate::weeks(2),
                                              m1=months(1),
                                              m2=months(2),
                                              m3=months(3),
                                              m6=months(6))){
  check.TS(TS)
  date <- unique(TS$date)
  if(TRUE){ # --- add endTs
    prd_names <- names(prd_lists)
    if(all(is.na(prd_names))){
      warning("The names of prd_lists is missing, getting the names automatically.")
      prd_names <- sapply(prd_lists,substr,1,5)
    }
    date.decay <- data.frame()
    for(ii in 1:length(prd_lists)){
      endT_i <- trday.offset(date,prd_lists[[ii]])
      if(ii==1L){
        date.decay <- data.frame(endT_i)
      } else {
        date.decay <- cbind(date.decay,endT_i)
      }
    }
    names(date.decay) <- paste("endT_",prd_names,sep = "")
    merging <- data.frame(date,date.decay)
  } 
  TS <- merge.x(TS,merging,by="date")   
  if(TRUE){ # --- add prdrtns
    cat("Getting the decayed rtn ....\n")
    pb_ <- txtProgressBar(min=0,max=length(prd_lists),initial=0,style=3)
    for(ii in 1:length(prd_lists)){   
      SP <- TS[,c("stockID","date",paste("endT_",prd_names[ii],sep=""))]
      names(SP) <- c("stockID","begT","endT")
      rtnI <- data.frame(rtn=getPeriodrtn(SP,drop=TRUE))      
      rtnI <- renameCol(rtnI,"rtn",paste("prdrtn_",prd_names[ii],sep=""))
      TS <- cbind(TS,rtnI)
      setTxtProgressBar(pb_,ii)
    }
    close(pb_)
    TSR <- TS[,!names(TS) %in% paste("endT_",prd_names,sep="")]
  }
}





#' getTSF
#'
#' get the \bold{TSF} ('time-stock-factor') or \bold{TSFR}('time-stock-factor-rtn') object,by adding the factor score to a \bold{TS} or \bold{TSR} object.If necessary,cleaning and stardizing the factor score.
#' @param TS a \bold{TS} or \bold{TSR} objectobject(See detail in \code{\link{getTS}} and  \code{\link{getTSR}})
#' @param factorFun a character string naming the function to get the factor scores.
#' @param factorPar either a list or a character string, containing the parameters of the \code{factorFun}. If a character string, parameters are seprated by commas and the character parameters must quoted by  "\""s, see examples for details.
#' @param factorDir a integer,should be 1 or -1 (1 for the positive factor,-1 for the negative one).
#' @param factorRefine a list.
#' @param factorList
#' @param splitbin a integer or a charactor string of 'year','month',...
#' @return given a \bold{TS} object,return a \bold{TSF} object,a dataframe containing cols:
#'   \itemize{
#'   \item date: the rebalance dates, with \code{Date} class
#'   \item stockID: the stockID contained in the index or BK
#'   \item factorscore:the factor score of the stocks on the  rebalancing date
#'   \item sector(optional):the sector of a stock
#'   }
#'   given a \bold{TSR} object,return a \bold{TSFR} object,a dataframe containing cols:
#'   \itemize{ 
#'   \item stockID: the stockID contained in the index or BK
#'   \item date: the rebalance dates, with \code{Date} class
#'   \item factorscore:the factor score of the stocks on the  rebalancing date  
#'   \item date_end(optional): the next rebalance dates, with \code{Date} class
#'   \item periodrtn:the period rtn between the rebalance date and the date_end
#'   \item sector(optional):the sector of a stock
#'   }
#' @author Ruifei.Yin
#' @export
#' @family frame building functions
#' @examples
#' # -- get the TSF step by step --
#' 
#' # - get a TSF object
#' RebDates <- getRebDates(as.Date('2011-03-17'),as.Date('2012-04-17'),'month')
#' TS <- getTS(RebDates,'EI000300')
#' factorFun <- "gf_demo"
#' factorPar <- list(mean=0,sd=1)
#' TSF <- getTSF(TS,factorFun,factorPar)
#' # - get a TSFR object
#' TSR <- getTSR(TS)
#' TSFR <- getTSF(TSR)
#' 
#' # -- you can also get the TSF through the modelPar object directively --
#' modelPar <- modelPar.default()
#' modelPar <- modelPar.factor(modelPar,factorFun="gf_demo",factorPar=list(0,1))
#' TSF <- Model.TSF(modelPar)
#' 
#' # -- factorPar as character string
#' TSF2 <- getTSF(TS,"gf_demo","0,1")
#' TSF2 <- getTSF(TS,"gf_demo","mean=0,sd=1")
#' TSF2 <- getTSF(TS,"gf.foo","2,3")
#' TSF2 <- getTSF(TS,"gf.bar","20,\"IF00\"")
getTSF <- function(TS,factorFun,factorPar=list(),factorDir=1,
                   factorRefine=refinePar_default("none"),
                   FactorList,
                   splitbin=100000L){
  if(!missing(FactorList)){
    factorFun <- FactorList$factorFun
    factorPar <- FactorList$factorPar
    factorName <- FactorList$factorName
    factorDir <- FactorList$factorDir
    factorRefine <- FactorList$factorRefine
  }
  if(! factorDir %in% c(1L,-1L)) stop("unsupported \"factorDir\" argument!")
  cat(paste("Function getTSF: getting the factorscore ....\n"))
  check.TS(TS)
  
  subFun <- function(TS){
    # ---- get the raw factorscore
    TSF <- getRawFactor(TS[,c("date","stockID")],factorFun,factorPar)
    # ---- refine the factorscore
    TSF <- factor_refine(TSF, refinePar = factorRefine)
    # ---- adjust the direction
    TSF$factorscore <- TSF$factorscore*factorDir
    # ---- re-order
    TSF <- merge.x(TS,TSF,by=c("date","stockID"))
    return(TSF)
  }
  
  if(is.numeric(splitbin)){
    cutN <- nrow(TS) %/% splitbin +1
    if(cutN > 1){
      idx <- cut(1:nrow(TS), breaks= cutN ,labels=FALSE)
    } else {
      idx <- factor(rep(1, nrow(TS)))
    }
  } else if(is.character(splitbin)){
    idx <- cut(TS$date, splitbin) 
  } else {
    stop("unsupported \"splitbin\" argument!")
  }
  if(length(levels(idx))>1){
    cat(paste('Possibly dealing with a big data. The process is splited to ',length(levels(idx)),'groups.\n'))
  }
  TS_ <- data.frame(idx,TS)
  TS_ <- dplyr::group_by(TS_,idx)
  re <- dplyr::do(TS_, subFun(as.data.frame(.)))
  re <- as.data.frame(re)
  re <- dplyr::select(re,-idx)
  return(re)
}



#' @rdname getTSF
#' @export
#' @details \code{getRawFactor} return a \bold{TSF} object, with 'raw' factorscore, which has not been standardized and not been deal with missing values and outliers. Function \code{getRawFactor} is often called intermediately by function \code{getTSF}.
#' @return \code{getRawFactor} return a \bold{TSF} object, with 'raw' factorscore. 
#' @examples
#' # -- get 'raw' factorscore
#' TSF_raw <- getRawFactor(TS,"gf.pct_chg_per","20")
getRawFactor <- function (TS,factorFun,factorPar,FactorList) {
  if("factorscore" %in% names(TS)){
    stop("There has existed a 'factorscore' field in the TS object. Please remove or rename it first!")
  }
  if(!missing(FactorList)){
    factorFun <- FactorList$factorFun
    factorPar <- FactorList$factorPar
  }
  if(missing(factorPar)){
    TSF <- do.call(factorFun,c(list(TS)))
  } else if(is.list(factorPar)){
    TSF <- do.call(factorFun,c(list(TS),factorPar))    
  } else if(is.character(factorPar)){
    if(is.na(factorPar)||factorPar==""){
      funchar <- paste(factorFun,"(TS)")
    } else {
      funchar <- paste(factorFun,"(","TS,",factorPar,")")
    }    
    TSF <- eval(parse(text=funchar))
  } else {
    stop("The factorPar must be a list or a character string!")
  }
  if(!("factorscore" %in% names(TSF))){
    stop("There must be a 'factorscore' field in the TSF result!")
  }
  return(TSF)
}


#' get multiFactors
#' 
#' get multiple single-factor-scores and the combined-factor-score.#' 
#' @param TS
#' @param FactorLists a list, with elements of FactorList(See detail in \code{\link{buildFactorList}}).
#' @param wgts a numeric vector with the same length of FactorLists(if the sum of wgts not equals to 1, the wgts will be rescaled to sum 1). If 'eq', \code{wgts <- rep(1/length(FactorLists),length(FactorLists))} ;If missing, then only return all the single-factor-scores, without the combined-factor-score.
#' @return  \code{getMultiFactor} return a \bold{mTSF} object including cols of all the single-factor-scores, and (if param \code{wgts} not missing) the \bold{raw} combined-factor-score .
#' @note  Function \code{getMultiFactor} not only could be used to get the "raw" combined-factor-score, but also could be used as the \code{"factorFun"} parametre in function \code{getTSF} to get the "clean and standardized" combined-factor-score. See more detail in the examples.
#' @details Function \code{getMultiFactor} firstly get all the single-factor-scores and then calculating the weighted sum of them to get the combined-factor-score.
#' @export
#' @examples
#' # -- get multi-factor by FactorLists 
#' FactorLists <- list(
#'   buildFactorList(factorFun="gf.PE_ttm",
#'                   factorPar=list(),
#'                   factorDir=-1),
#'   buildFactorList(factorFun="gf.pct_chg_per",
#'                   factorPar=list(N=20),
#'                   factorDir=-1),
#'   buildFactorList(factorFun="gf.pct_chg_per",
#'                   factorPar=list(N=60),
#'                   factorDir=-1),       
#'   buildFactorList(factorFun="gf.pct_chg_per",
#'                   factorPar=list(N=120),
#'                   factorDir=-1))
#' wgts <- c(0.25,0.25,0.25,0.25)
#' 
#' # -- 0. get the all the single-factor-scores
#' TSF <- getMultiFactor(TS,FactorLists)
#' 
#' # -- 1. get the "raw" combined-factor-score
#' TSF.multi <- getMultiFactor(TS,FactorLists,wgts)
#' 
#' # -- 2. get the "clean and standardized" combined-factor-score
#' # --- 2.1 by \code{getTSF}
#' TSF.multi2 <- getTSF(TS,factorFun="getMultiFactor",factorPar=list(FactorLists,wgts),factorRefine=refinePar_default("scale"))
#' # --- 2.2 by \code{Model.TSF}
#' modelPar <- modelPar.factor(modelPar.default(),factorFun="getMultiFactor",factorPar=list(FactorLists,wgts),factorRefine=refinePar_default("scale"))
#' TSF.multi3 <- Model.TSF(modelPar)
#' TSF.multi4 <- Model.TSF_byTS(modelPar, TS)
getMultiFactor <- function(TS,FactorLists,wgts,silence=FALSE){
  # ---- get all the single-factor-scores
  for(i in 1:length(FactorLists)){
    factorFun <- FactorLists[[i]]$factorFun
    factorPar <- FactorLists[[i]]$factorPar
    factorName <- FactorLists[[i]]$factorName
    factorDir <- FactorLists[[i]]$factorDir
    factorRefine <- FactorLists[[i]]$factorRefine
    if(!silence){
      cat(paste("Function getMultiFactor: getting the score of factor",factorName,"....\n"))
    }
    # ---- get the raw factorscore
    TSF <- getRawFactor(TS,factorFun,factorPar) 
    # ---- adjust the direction
    TSF$factorscore <- TSF$factorscore*factorDir
    # ---- standardize the factorscore 
    TSF <- factor_refine(TSF,refinePar = factorRefine)
    # ---- merge
    TSF <- renameCol(TSF,"factorscore",factorName)
    if(i==1L){
      re <- merge.x(TS,TSF[,c("date","stockID",factorName)],by=c("date","stockID"))
    } else {
      re <- merge.x(re,TSF[,c("date","stockID",factorName)],by=c("date","stockID"))
    }
  }
  # ---- get the combi-factor-score
  factorNames <- sapply(FactorLists,"[[","factorName")
  if(!missing(wgts)){
    re <- MultiFactor2CombiFactor(mTSF=re,wgts=wgts,factorNames=factorNames,keep_single_factors="TRUE")
  }
  
  return(re)
}



#' @rdname getMultiFactor
#' @export
#' @param TSFs a \bold{TSFs} object. see /code{/link{Model.TSFs}}
#' @return \code{getMultiFactorbyTSFs} return a \bold{TSF} object including cols of all the single-factor-scores, and (if param \code{wgts} not missing) the \bold{raw} combi-factor-score, from a \bold{TSFs} list. (If param TSFs is a TSFRs object, then will return a TSFR object.)
#' @examples 
#' # -- get multi-factor by TSFs 
#' MPs <- getMPs_FactorLists(FactorLists, modelPar.default())
#' TSFs <- Model.TSFs(MPs)
#' TSF.multi5 <- getMultiFactorbyTSFs(TSFs,wgts)
getMultiFactorbyTSFs <- function(TSFs,wgts,
                                 factorNames = names(TSFs)){
  
  # ---- get all the single-factor-scores
  re <- TSFs2mTSF(TSFs=TSFs, factorNames = factorNames)
  
  # ---- get the combi-factor-score(weighted sum of all the single-factor-scores)
  if(!missing(wgts)){
    re <- MultiFactor2CombiFactor(mTSF=re,wgts=wgts,factorNames=factorNames,keep_single_factors="TRUE")
  }
  
  return(re)
}


#' @rdname getMultiFactor
#' @export
getRawMultiFactor <- function(TS,FactorLists,name_suffix=FALSE){
  for(i in 1:length(FactorLists)){
    factorFun <- FactorLists[[i]]$factorFun
    factorPar <- FactorLists[[i]]$factorPar
    factorName <- FactorLists[[i]]$factorName
    cat(paste("Function getMultiFactor: getting the score of factor",factorName,"....\n"))
    # ---- get the raw factorscore
    TSF <- getRawFactor(TS,factorFun,factorPar) 
    # ---- merge
    if(name_suffix){
      factorName <- paste(factorName,"R",sep="_")
    }
    TSF <- renameCol(TSF,"factorscore",factorName)
    if(i==1L){
      re <- merge.x(TS,TSF[,c("date","stockID",factorName)],by=c("date","stockID"))
    } else {
      re <- merge.x(re,TSF[,c("date","stockID",factorName)],by=c("date","stockID"))
    }
  }
  return(re)
}


#' @rdname getTSF
#' @export
#' @examples 
#' # - 3. get multifactor through lcfs
#' factorIDs <- c("F000001","F000002","F000005")
#' # -- 3.1 getMultiFactor_lcfs
#' mTSF <- getMultiFactor_lcfs(TS,factorIDs,factorRefine = refinePar_default("reg"))
#' # -- 3.2 getRawMultiFactor_lcfs
#' mTSF <- getMultiFactor_lcfs(TS,factorIDs,dir_adj=FALSE,factorRefine = NULL)
getMultiFactor_lcfs <- function(TS, FactorIDs, dir_adj=TRUE, factorRefine = NULL, wgts, fname_out=c("name","ID")){
  fname_out <- match.arg(fname_out)
  # ---- raw factor getting
  check.TS(TS)  
  tmpdat <- transform(TS[,c("stockID","date")], date = rdate2int(date))
  tmpdat$PK_ <- 1:NROW(tmpdat)
  con <- db.local("fs")
  vars <- paste(FactorIDs, collapse=", ")      
  qr <- paste("select a.*,",vars,"from yrf_tmp a left join QT_FactorScore b on a.stockID=b.ID and a.date=b.TradingDay")
  dbWriteTable(con,name="yrf_tmp",value=tmpdat,row.names = FALSE,overwrite = TRUE)
  dbExecute(con, 'CREATE INDEX [IX_yrf_tmp] ON [yrf_tmp]([date],[stockID]);')
  re <- dbGetQuery(con,qr)
  re <- dplyr::arrange(re,PK_)[,FactorIDs,drop=FALSE]
  factorNames <- if(fname_out=="ID") FactorIDs else factorID2name(FactorIDs)
  colnames(re) <- factorNames
  re <- cbind(TS,re)
  dbDisconnect(con)
  # ---- dir adjusting & factor refining
  factorDirs <- CT_FactorLists(FactorIDs)$factorDir
  for(i in 1:length(factorNames)){
    re <- renameCol(re,factorNames[i],"factorscore")
    if(dir_adj){
      re$factorscore <- re$factorscore*factorDirs[i]
    }
    re <- factor_refine(re,refinePar = factorRefine)
    re <- renameCol(re,"factorscore",factorNames[i])
  }
  # ---- get the combi-factor-score
  if(!missing(wgts)){
    re <- MultiFactor2CombiFactor(mTSF=re,wgts=wgts,factorNames=factorNames,keep_single_factors="TRUE")
  }
  return(re)
}

#' @rdname getTSF
#' @aliases getRawFactor_lcfs
#' @export
gf_lcfs <- function(TS,factorID,dir_adj=FALSE,factorRefine = NULL){
  # re <- getTech(TS,variables=factorID,tableName="QT_FactorScore",datasrc = "local")
  # re <- renameCol(re,factorID,"factorscore")
  
  re <- getMultiFactor_lcfs(TS,factorID,dir_adj=dir_adj,factorRefine = factorRefine,fname_out="ID")
  re <- renameCol(re,factorID,"factorscore")
  return(re)
}

#' @rdname getMultiFactor
#' @param mTSF a dataframe of 'TS & MultiFactors'
#' @param factorNames
#' @param keep_single_factors
#' @export
MultiFactor2CombiFactor <- function(mTSF,
                                    wgts,
                                    factorNames = guess_factorNames(mTSF),
                                    na_deal=c("ignore","rm"),
                                    keep_single_factors=TRUE){
  na_deal <- match.arg(na_deal)
  # ---- get the combi-factor-score(weighted sum of all the single-factor-scores)
  TSF_mat <- mTSF[,factorNames,drop=FALSE]
  if(identical(wgts,'eq')){
    wgts <- rep(1/length(factorNames),length(factorNames))
  }
  if(ncol(TSF_mat) != length(wgts)) {
    stop("The numbers of factors and wgts are not equal!")
  }
  if(!isTRUE(all.equal(sum(wgts),1,tolerance=0.001))){
    warning("The sum of wgts is not 1. The wgts will be rescaled! ")
    wgts <- wgts/sum(wgts)
  }  
  
  TSF_mat <- as.matrix(TSF_mat)
  if(na_deal=="ignore"){ # ignore the na value of specific factorscore.
    is_na <- is.na(TSF_mat)
    N <- nrow(TSF_mat)
    M <- ncol(TSF_mat)
    wgts_mat <- kronecker(matrix(1,N,1),t(wgts))
    wgts_mat <- wgts_mat*(!is_na)
    wgts_mat <- wgts_mat/kronecker(matrix(1,1,M),rowSums(wgts_mat))
    TSF_mat[is_na] <- 0
    sumScore <- rowSums(TSF_mat*wgts_mat)
  } else if(na_deal=="rm"){ # return NA when any factorscore is NA.
    sumScore <- TSF_mat %*% wgts
  } else {
    stop("uncorrect value of param 'na_deal'!")
  }
  
  if(keep_single_factors){
    re <- cbind(mTSF,factorscore=sumScore)
  } else {
    re <- cbind(dplyr::select(mTSF,-one_of(factorNames)),factorscore=sumScore)
  }
  return(re)
}




#' @rdname getMultiFactor
#' @export
#' @examples 
#' #-- TSFs2mTSF
#' mTSF <- TSFs2mTSF(TSFs)
#' mTSFR <- TSFs2mTSF(TSFRs)
TSFs2mTSF <- function(TSFs, factorNames = names(TSFs)){
  nrows <- sapply(TSFs,NROW)
  if(!all(nrows[1]==nrows)){
    stop("NROWs of TSFs are not all equal!\n")
    print(nrows)
  }
  for(i in 1:length(TSFs)){
    factorName <- factorNames[i]
    TSF <- TSFs[[i]]
    TSF <- renameCol(TSF,"factorscore",factorNames[i])
    if(i==1L){
      kept_cols <- colnames(TSF)[is_usualcols(colnames(TSF))]
      re <- TSF[, c(kept_cols,factorNames[i])]
    } else {
      re <- merge.x(re,TSF[, c("date","stockID",factorNames[i])], by=c("date","stockID"))
    }
  } 
  return(re)
}


#' @rdname getMultiFactor
#' @export
#' @examples 
#' #-- mTSF2TSFs
#' TSFs <- mTSF2TSFs(mTSF)
#' TSFRs <- mTSF2TSFs(mTSFR)
mTSF2TSFs <- function(mTSF, factorNames = guess_factorNames(mTSF)){
  all_cols <- colnames(mTSF)
  # if("factorscore" %in% all_cols){
  #   stop("There should not be 'factorscore' in the cols of mTSF! Please try to rename it.")
  # }
  no_fnames <- setdiff(all_cols,factorNames)
  re <- list()
  for (i in 1:length(factorNames)){
    fname <- factorNames[i]
    re_i <- mTSF[,c(no_fnames,fname)]
    re_i <- renameCol(re_i,fname,"factorscore")
    re[[i]] <- re_i
  }
  names(re) <- factorNames
  return(re)
}


#' TS_filter
#' 
#' sample or filter the TS by date.
#' 
#' @param TS a TS, TSF, TSFR ...
#' @param sample_N integer. size of sample days
#' @export
#' @examples
#' re <- TS_filter(ts,sample_N=8)
#' re <- TS_filter(ts,as.Date("2010-01-01"),as.Date("2011-01-01"))
TS_filter <- function(TS, begT, endT, sample_N){
  if(!missing(sample_N) && (!missing(begT) | !missing(endT))){
    stop("sample_N and begT should have only one!")
  }
  if(!missing(sample_N)){
    dates <- unique(TS$date)
    dates <- sample(dates,min(sample_N, length(dates)))
    TS <- dplyr::filter(TS,date %in% dates)
  } else {
    begT <- if(missing(begT)) as.Date("1900-01-01") else  begT
    endT <- if(missing(endT)) as.Date("9999-01-01") else  endT
    TS <- dplyr::filter(TS, date>=begT & date<=endT)
  }
  # if(!missing(begT)){
  #   TS <- dplyr::filter(TS, date>=begT & date<=endT)
  # }
  return(TS)
}

# ===================== xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ==============
# --------------------  factor refining functions ------------
# ===================== xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ==============



#' factor_bcPower
#' 
#' @param lambda "auto" or a numeric.
#' @param out_type "factor", "lambda" or "both".
#' @export
#' @author Han.Qian
factor_bcPower <- function(TSF,lambda="auto",out_type=c("factor","lambda","both"),fname = "factorscore"){
  out_type <- match.arg(out_type)
  if(!all(TSF[,fname,drop = FALSE] > 0, na.rm = TRUE)) stop("The factorscore should all be positive to apply bcPower.")
  if(lambda != "auto"){
    TSF[,fname] <- car::bcPower(TSF[,fname], lambda = lambda)
    return(TSF)
  }else{
    check.colnames(TSF, c("date",fname))
    TSF <- renameCol(TSF, fname, "mazi_tmp_column")
    mdata <- dplyr::group_by(TSF, date)
    subfun <- function(TSF){
      lambda <- (car::powerTransform(TSF$mazi_tmp_column))$lambda
      TSF$lambda <- lambda
      TSF$mazi_tmp_column <- car::bcPower(TSF$mazi_tmp_column, lambda)
      return(TSF)
    }
    mdata <- dplyr::do(mdata, subfun(.))
    mdata <- as.data.frame(mdata)
    mdata <- renameCol(mdata, "mazi_tmp_column", fname)
    if(out_type == "factor"){
      # drop lambda
      mdata$lambda <- NULL
    }else if(out_type == "lambda"){
      # unique lambda
      mdata <- unique(mdata[,c("date","lambda")])
    }
    return(mdata)
  }
}




#' factor_outlier
#' 
#' @param method Currently support four types : none, sd, mad, boxplot.
#' @param par The parameters passed into the method.
#' @param sectorAttr could be a sectorAttr list or NULL.
#' @export
#' @author Han.Qian
#' @examples 
#' RebDates <- getRebDates(as.Date('2011-03-17'),as.Date('2012-04-17'),'month')
#' TS <- getTS(RebDates,'EI000300')
#' TSF <- gf.NP_YOY(TS)
#' TSF <- factor_outlier(TSF, method = "mad", par = 1.5)
factor_outlier <- function (TSF, method=c("none","sd","mad","boxplot","percentage"), par,
                            sectorAttr=defaultSectorAttr()) {
  method <- match.arg(method)
  # direct return
  if(method == "none"){
    return(TSF)
  }
  # shrink columns (merge back later)
  TSF_core <- TSF[,intersect(names(TSF), c("date","stockID","factorscore","sector"))]
  # sector splitting
  if(!is.null(sectorAttr)){
    TSF_core <- getSectorID(TS = TSF_core, sectorAttr = sectorAttr, fillNA = TRUE)
    key=c("date","sector")
  }else{
    key=c("date")
  }
  TSF_core <- data.table::as.data.table(TSF_core,key=key)
  # sub fun
  outlier_sub_fun <- function(sf, method, par){
    if(method == "sd"){
      outlier_u <- mean(sf, na.rm = TRUE) + par * sd(sf, na.rm = TRUE)
      outlier_l <- mean(sf, na.rm = TRUE) - par * sd(sf, na.rm = TRUE)
    }else if(method == "mad"){
      outlier_u <- median(sf, na.rm = TRUE) + par * mad(sf, na.rm = TRUE)
      outlier_l <- median(sf, na.rm = TRUE) - par * mad(sf, na.rm = TRUE)
    }else if(method == "boxplot"){
      boxplot_stat <- boxplot.stats(sf, coef = par)[[1]]
      outlier_u <- max(boxplot_stat)
      outlier_l <- min(boxplot_stat)
    }else if(method == "percentage"){
      outlier_u <- quantile(sf, probs = 1-par/100, na.rm = TRUE)
      outlier_l <- quantile(sf, probs = 0+par/100, na.rm = TRUE)
    }
    sf <- ifelse(sf > outlier_u, outlier_u,
                 ifelse(sf < outlier_l, outlier_l, sf))
    sf <- as.numeric(sf)
    return(sf)
  }
  # processing
  TSF_core <- TSF_core[,factorscore:=outlier_sub_fun(factorscore,method,par),by=key]
  # merge back
  TSF <- TSF[,setdiff(colnames(TSF), "factorscore")]
  TSF <- merge.x(TSF, TSF_core, by = c("date","stockID"))
  # remove sector column if its not from the input
  if(!identical(sectorAttr, "existing") & !is.null(sectorAttr)){
    TSF <- TSF[,setdiff(colnames(TSF), "sector")]
  }
  # output
  return(TSF)
}

#' factor_std
#' 
#' @param method Currently supporting four types : none, scale, robustScale, reg.
#' @param log Logical value. Default FALSE.
#' @param sectorAttr could be a sectorAttr list or NULL.
#' @param regLists factorLists. This argument will used when method is reg. The factorscore will be orthogonized by the factors in regLists.
#' @param regWgt NULL,"existing","sector","cap"
#' @export
#' @author Han.Qian
#' @examples 
#' RebDates <- getRebDates(as.Date('2011-03-17'),as.Date('2012-04-17'),'month')
#' TS <- getTS(RebDates,'EI000300')
#' TSF <- gf.NP_YOY(TS)
#' regLists <- buildFactorLists(buildFactorList(factorFun = 'gf.PE_ttm',factorDir = -1,factorRefine=refinePar_default("scale")))
#' TSF <- factor_std(TSF, method = "reg", log = FALSE, regLists = regLists)
factor_std <- function(TSF,method=c("none","scale","robustScale","reg"),
                       log=FALSE, 
                       sectorAttr=defaultSectorAttr(),
                       regLists=NULL,
                       regWgt=NULL
){
  method <- match.arg(method)
  
  # log
  if(log){
    TSF <- dplyr::mutate(TSF, factorscore=ifelse(factorscore==0,NA,log(factorscore)))
  }
  
  if(method == "none"){ # do nothing
    return(TSF)
    
  }else if(method == "reg"){ # reg part
    TSF <- renameCol(TSF, "factorscore", "Y_for_reg")
    
    # shrink columns (merge back later)
    TSF_core <- TSF[,intersect(names(TSF), c("date","stockID","Y_for_reg","sector","glm_wgt"))]
    
    # get glm_wgt
    if(!is.null(regWgt) & !identical(regWgt, "existing")){
      if(regWgt == "sector"){
        if(!identical(sectorAttr, "existing")){
          TSF_core <- getSectorID(TSF_core, sectorAttr = defaultSectorAttr("ind",336), fillNA = TRUE) 
          sectorAttr <- "existing"
        }
        glm_wgt_data <- dplyr::group_by(TSF_core, date, sector)
        glm_wgt_data <- dplyr::summarise(glm_wgt_data, glm_wgt = 1/var(Y_for_reg, na.rm = TRUE))
        TSF_core <- merge.x(TSF_core, glm_wgt_data, by = c("date","sector"))
        TSF_core <- as.data.frame(TSF_core)
        
      }else if(regWgt == "cap"){ # to be modified!
        TSF_core <- getSectorID(TSF_core, sectorAttr = defaultSectorAttr("fct",fct_std = list(fl_cap(var = "float_cap")),fct_level = 6), fillNA = TRUE) 
        glm_wgt_data <- dplyr::group_by(TSF_core, date, sector)
        glm_wgt_data <- dplyr::summarise(glm_wgt_data, glm_wgt = 1/var(Y_for_reg, na.rm = TRUE))
        TSF_core <- merge.x(TSF_core, glm_wgt_data, by = c("date","sector"))
        TSF_core <- as.data.frame(TSF_core)
      }else{
        stop("The glm_wgt could not be identified.")
      }
    }
    
    # glm_wgt checking
    if(!is.null(regWgt)){
      if(any(is.infinite(TSF_core$glm_wgt))){
        TSF_core$glm_wgt[is.infinite(TSF_core$glm_wgt)] <- NA
      }
      if(any(is.na(TSF_core$glm_wgt))){
        TSF_core <- dplyr::group_by(TSF_core, date)
        TSF_core <- dplyr::mutate(TSF_core,
                                  glm_wgt = ifelse(is.na(glm_wgt), yes = median(glm_wgt, na.rm = TRUE), no = glm_wgt))
        TSF_core <- as.data.frame(TSF_core)
      }
    }
    
    # get regList 
    if(!is.null(regLists)){
      TSF_core <- getMultiFactor(TSF_core, FactorLists = regLists, silence = TRUE)
      fnames_list <- sapply(regLists, '[[', 'factorName')
    }
    
    # orthogon
    if(is.null(regWgt)){
      TSF_core <- factor_orthogon_single(TSF_core, y = "Y_for_reg", sectorAttr = sectorAttr, regType = "lm")
    }else{
      TSF_core <- factor_orthogon_single(TSF_core, y = "Y_for_reg", sectorAttr = sectorAttr, regType = "glm")
    }
    TSF_core <- TSF_core[,c("date","stockID","Y_for_reg")]
    TSF_core <- renameCol(TSF_core, "Y_for_reg", "factorscore")
    
    # merge back
    TSF <- TSF[,setdiff(colnames(TSF), "Y_for_reg")]
    TSF <- merge.x(TSF, TSF_core, by = c("date","stockID"))
    
    # scale to N(0,1) 
    # if(is.null(regWgt)){ # by date*sector
    #   TSF <- factor_std(TSF, method = "robustScale", sectorAttr = sectorAttr)
    # } else { # by date
    #   TSF <- factor_std(TSF, method = "robustScale", sectorAttr = NULL)
    # }
    TSF <- factor_std(TSF, method = "robustScale", sectorAttr = sectorAttr)
    
    # output
    return(TSF)
    
  }else{ # scale part
    # shrink columns (merge back later)
    TSF_core <- TSF[,intersect(names(TSF), c("date","stockID","factorscore","sector"))]
    # sector splitting
    if(!is.null(sectorAttr)){
      TSF_core <- getSectorID(TS = TSF_core, sectorAttr = sectorAttr, fillNA = TRUE, ungroup=10)
      key=c("date","sector")
    }else{
      key=c("date")
    }
    TSF_core <- data.table::as.data.table(TSF_core,key=key)
    # sub fun
    std_sub_fun <- function(sf, method){
      if(method == "scale"){
        sf <- as.numeric(scale(sf))
      }else if(method == "robustScale"){
        median_ <- median(sf, na.rm = TRUE)
        sd_ <- sd(sf, na.rm = TRUE)
        if(is.na(sd_) | sd_ == 0){
          sf <- as.numeric(NA)
        }else{
          sf <- (sf - median_)/sd_
        }
      }
      return(sf)
    }
    # processing 
    TSF_core <- TSF_core[,factorscore:=std_sub_fun(factorscore,method),by=key]
    
    # merge back
    TSF <- TSF[,setdiff(colnames(TSF), "factorscore")]
    TSF <- merge.x(TSF, TSF_core, by = c("date","stockID"))
    
    # remove sector column if its not from the input
    if(!identical(sectorAttr, "existing") & !is.null(sectorAttr)){
      TSF <- TSF[,setdiff(colnames(TSF), "sector")]
    }
    # output
    return(TSF)
  }
}






#' factor_na
#' 
#' @param method Currently support three types : none, mean, median.
#' @param sectorAttr could be a sectorAttr list or NULL.
#' @export
#' @author Han.Qian
#' @examples 
#' RebDates <- getRebDates(as.Date('2011-03-17'),as.Date('2012-04-17'),'month')
#' TS <- getTS(RebDates,'EI000300')
#' TSF <- gf.NP_YOY(TS)
#' TSF <- factor_na(TSF, method = "median")
factor_na <- function (TSF, method=c("none","mean","median"),
                       sectorAttr=defaultSectorAttr()) {
  method <- match.arg(method)
  # direct return
  if(method == "none"){
    return(TSF)
  }
  # shrink columns (merge back later)
  TSF_core <- TSF[,intersect(names(TSF), c("date","stockID","factorscore","sector"))]
  # sector splitting
  if(!is.null(sectorAttr)){
    TSF_core <- getSectorID(TS = TSF_core, sectorAttr = sectorAttr, fillNA = TRUE)
    key=c("date","sector")
  }else{
    key=c("date")
  }
  TSF_core <- data.table::as.data.table(TSF_core,key=key)
  # sub fun
  na_sub_fun <- function(sf, method){
    # browser()
    if(method == "mean"){
      ind_na_ <- is.na(sf)
      mean_ <- mean(sf, na.rm = TRUE)
      sf[ind_na_] <- mean_
    }else if(method == "median"){
      ind_na_ <- is.na(sf)
      median_ <- median(sf, na.rm = TRUE)
      sf[ind_na_] <- median_
    }
    return(sf)
  }
  # batch processing
  TSF_core <- TSF_core[,factorscore:= na_sub_fun(factorscore,method),by=key]
  if(sum(is.na(TSF_core$factorscore))>0){ # If sector is too small, ungroup it.
    TSF_core <- TSF_core[,factorscore:= na_sub_fun(factorscore,method),by=c("date")]
  }
  # merge back
  TSF <- TSF[,setdiff(colnames(TSF), "factorscore")]
  TSF <- merge.x(TSF, TSF_core, by = c("date","stockID"))
  # remove sector column if its not from the input
  if(!identical(sectorAttr, "existing") & !is.null(sectorAttr)){
    TSF <- TSF[,setdiff(colnames(TSF), "sector")]
  }
  return(TSF)
}



#' refinePar_default
#' 
#' @param type Could be "none","scale","scale_sec","reg","reg_glm_sec","reg_glm_cap"
#' @param sectorAttr could be a sectorAttr list, or NULL(means no grouping by sector).
#' @param log TRUE or FALSE
#' @param regLists a \bold{factorLists}
#' @export
#' @author Han.Qian
refinePar_default <- function(type=c("none","scale","scale_sec","reg","reg_glm_sec","reg_glm_cap"), 
                              sectorAttr=defaultSectorAttr("ind"),
                              log=FALSE,
                              regLists=list(fl_cap(log = TRUE,var="float_cap",datasrc = "memory"))
                              ){
  type <- match.arg(type)
  if(type=="none"){
    re <- list(outlier=list(method = "none",
                            par=NULL,
                            sectorAttr= NULL),
               std=list(method = "none",
                        log=log, 
                        sectorAttr=NULL,
                        regLists=NULL),
               na=list(method = "none", 
                       sectorAttr=NULL)
               )
  }else if(type=="scale"){
    re <- list(outlier=list(method = "boxplot",
                            par=1.5,
                            sectorAttr= NULL),
               std=list(method = "robustScale",
                        log=log, 
                        sectorAttr=NULL,
                        regLists=NULL),
               na=list(method = "median", 
                       sectorAttr=sectorAttr)
    )
  }else if(type=="scale_sec"){
    re <- list(outlier=list(method = "boxplot",
                            par=1.5,
                            sectorAttr= NULL),
               std=list(method = "robustScale",
                        log=log, 
                        sectorAttr=sectorAttr,
                        regLists=NULL),
               na=list(method = "median", 
                       sectorAttr=sectorAttr)
    )
  }else if(type=="reg"){
    re <- list(outlier=list(method = "boxplot",
                            par=1.5,
                            sectorAttr= NULL),
               std=list(method = "reg",
                        log=log, 
                        sectorAttr=sectorAttr,
                        regLists=regLists,
                        regWgt=NULL),
               na=list(method = "median", 
                       sectorAttr=sectorAttr)
    )
  }else if(type=="reg_glm_sec"){
    re <- list(outlier=list(method = "boxplot",
                            par=1.5,
                            sectorAttr= NULL),
               std=list(method = "reg",
                        log=log, 
                        sectorAttr=sectorAttr,
                        regLists=regLists,
                        regWgt="sector"),
               na=list(method = "median", 
                       sectorAttr=sectorAttr)
               )
  }else if(type=="reg_glm_cap"){
    re <- list(outlier=list(method = "boxplot",
                            par=1.5,
                            sectorAttr= NULL),
               std=list(method = "reg",
                        log=log, 
                        sectorAttr=sectorAttr,
                        regLists=regLists,
                        regWgt="cap"),
               na=list(method = "median", 
                       sectorAttr=sectorAttr)
    )
  }
  return(re)
}

#' setrefinePar
#' 
#' @param refinePar A refinePar list.
#' @param all_sectorAttr set all sectorAttr of std, outlier, na, in the same time
#' @return A refinePar list.
#' @export
#' @author Han.Qian
setrefinePar <- function(refinePar=refinePar_default(),
                         outlier_method,
                         outlier_par,
                         outlier_sectorAttr,
                         std_method,
                         std_log,
                         std_sectorAttr,
                         std_regLists,
                         std_regWgt,
                         na_method,
                         na_sectorAttr,
                         all_sectorAttr){
  if(!missing(outlier_method)){
    refinePar$outlier$method <- outlier_method
  }
  if(!missing(outlier_par)){
    refinePar$outlier$par <- outlier_par
  }
  if(!missing(outlier_sectorAttr)){
    refinePar$outlier$sectorAttr <- outlier_sectorAttr
  }
  if(!missing(std_method)){
    refinePar$std$method <- std_method
  }
  if(!missing(std_log)){
    refinePar$std$log <- std_log
  }
  if(!missing(std_sectorAttr)){
    refinePar$std$sectorAttr <- std_sectorAttr
  }
  if(!missing(std_regLists)){
    refinePar$std$regLists <- std_regLists
  }
  if(!missing(std_regWgt)){
    refinePar$std$regWgt <- std_regWgt
  }
  if(!missing(na_method)){
    refinePar$na$method <- na_method
  }
  if(!missing(na_sectorAttr)){
    refinePar$na$sectorAttr <- na_sectorAttr
  }
  if(!missing(all_sectorAttr)){
    refinePar$outlier$sectorAttr <- all_sectorAttr
    refinePar$std$sectorAttr <- all_sectorAttr
    refinePar$na$sectorAttr <- all_sectorAttr
  }
  return(refinePar)
}



#' factor_refine
#' @rdname factor_refine
#' @name factor_refine
#' @param refinePar A refinePar list. Created by refinePar_default. Modified by setrefinePar.
#' @export
#' @author Han.Qian
factor_refine <- function(TSF, refinePar=refinePar_default(),drop_sector=TRUE){
  if(is.null(refinePar)){
    return(TSF)
  }
  sector_outlier <- refinePar$outlier$sectorAttr
  sector_std <- refinePar$std$sectorAttr
  sector_na <- refinePar$na$sectorAttr
  if(identical(sector_outlier,sector_std) & identical(sector_std,sector_na) & !is.null(sector_outlier)){
    TSF <- getSectorID(TSF,sectorAttr = sector_outlier, fillNA = TRUE)
    refinePar <- setrefinePar(refinePar,
                              outlier_sectorAttr = "existing",
                              std_sectorAttr = "existing",
                              na_sectorAttr = "existing")
  }
  
  TSF <- factor_outlier(TSF,
                        method = refinePar$outlier$method,
                        par = refinePar$outlier$par,
                        sectorAttr=refinePar$outlier$sectorAttr)
  
  TSF <- factor_std(TSF,
                    method = refinePar$std$method,
                    log = refinePar$std$log,
                    sectorAttr = refinePar$std$sectorAttr,
                    regLists = refinePar$std$regLists,
                    regWgt = refinePar$std$regWgt)
  
  TSF <- factor_na(TSF,
                   method = refinePar$na$method,
                   sectorAttr = refinePar$na$sectorAttr)
  if(drop_sector & ("sector" %in% colnames(TSF))){
    TSF <- dplyr::select(TSF,-sector)
  }
  return(TSF)
}

#' @param refinePar_lists A list of (refinePar)s, each refinePar is a list built by refinePar_default.
#' @param refinePar_names The character vector of names, could be missing.
#' @rdname factor_refine
#' @export
#' @examples 
#' rawTSF <- gf.NP_YOY(TS, src = "fin")
#' refinePar_lists <- list(refinePar_default(type = "none"),
#'                         refinePar_default(type = "reg"),
#'                         refinePar_default(type = "scale"))
#' factor_refine_MF(rawTSF, refinePar_lists)
factor_refine_MF <- function(TSF, refinePar_lists, refinePar_names){
  # ARGUMENTS CHECKING
  loop_length <- length(refinePar_lists)
  if(missing(refinePar_names)){
    refinePar_names <- names(refinePar_lists)
    if(is.null(refinePar_names)){
      refinePar_names <- paste0("refinePar_",1:loop_length)
    }
  }else{
    if(length(refinePar_lists) != length(refinePar_names)){
      stop("The length of refinePar_names does not match the length of refinePar_lists.")
    }
  }
  
  # REFINE PART
  for( i in 1:loop_length){
    refinePar_ <- refinePar_lists[[i]]
    TSF_ <- factor_refine(TSF, refinePar = refinePar_, drop_sector = TRUE)
    TSF_ <- renameCol(TSF_, "factorscore", refinePar_names[i])
    if( i == 1L){
      mTSF <- TSF_
    }else{
      mTSF <- merge.x(mTSF, TSF_, by = c("date", "stockID"))
    }
  }
  return(mTSF)
}





# ===================== xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ==============
# --------------------  TS removing functions ------------
# ===================== xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ==============
#' remove suspension stock from TS
#'
#' @param TS 
#' @param nearby a integer vector. default is 1L, which means remove the suspending of the next tradingday. see detail in \code{\link{trday.nearby}}.
#' @param datasrc
#' @examples
#' RebDates <- getRebDates(as.Date('2013-03-17'),as.Date('2016-04-17'),'month')
#' TS <- getTS(RebDates,'EI000985')
#' re <- rm_suspend(TS) # remove Suspend of nextday
#' re1 <- rm_suspend(TS,nearby=c(0,1))# remove Suspend of today and nextday
#' @export
rm_suspend <- function(TS,nearby=1L,
                       datasrc=defaultDataSRC()){
  TS_ <- is_suspend(TS=TS,nearby=nearby,datasrc = datasrc)
  TS <- TS[!TS_$sus,]
  return(TS)
}



#' remove price limits
#' 
#' @param nearby
#' @param lim a vector of length 2.
#' @param priceType "close" or "open".
#' @examples
#' RebDates <- getRebDates(as.Date('2013-03-17'),as.Date('2016-04-17'),'month')
#' TS <- getTS(RebDates,'EI000985')
#' re <- rm_priceLimit(TS)
#' re1 <- rm_priceLimit(TS,nearby=-1:1)
#' re2 <- rm_priceLimit(TS,lim=c(-Inf,10)) # remove limit-up
#' @export
rm_priceLimit <- function(TS,nearby=1,lim=c(-10, 10),priceType=c("close","open"),
                          datasrc=defaultDataSRC()){
  TS_ <- is_priceLimit(TS=TS,nearby = nearby,lim = lim, datasrc = datasrc)
  TS <- TS[!TS_$overlim,]
  return(TS)
}


#' @export
rm_blacklist <- function(TS){
  TS_ <- is_blacklist(TS=TS)
  TS <- TS[!TS_$is_blacklist,]
}

rm_not_in_whitelist <- function(TS){
  
}
rm_st <- function(TS){
  
}


#' rm_delist
#'
#' remove delisted stocks
#' @export
rm_delist <- function(TS,nearby=months(2),datasrc='jy'){
  TS_ <- is_delist(TS=TS,nearby=nearby,datasrc = datasrc)
  TS <- TS[!TS_$delist,]
  return(TS)
}


#' is_suspend
#'
#' @param nearby a integer vector. 0 means today, 1 means next tradingday. default is 0L. see detail in \code{\link{trday.nearby}}.
#' @author Ruifei.yin
#' @examples
#' RebDates <- getRebDates(as.Date('2013-03-17'),as.Date('2016-04-17'),'month')
#' TS <- getTS(RebDates,'EI000985')
#' re <- is_suspend(TS) #  Suspend of nextday
#' re1 <- is_suspend(TS,nearby=c(0,1))#  Suspend of today and nextday
#' @export
is_suspend <- function(TS,nearby=0,
                       datelist,stockID, 
                       drop,
                       datasrc=defaultDataSRC()){
  if (missing(TS) && any(missing(stockID),missing(datelist))) {
    stop("Param TS and combination of stockID and datelist should at least have one!")
  }
  if (!missing(TS) && !all(missing(stockID),missing(datelist))) {
    stop("Param TS and combination of stockID and datelist should only have one!")
  }
  if(missing(drop)){
    drop <- if(missing(TS)) TRUE else FALSE
  }
  
  if (missing(TS)){
    TS <- expand.grid(date=datelist, stockID=stockID)
  }
  
  check.TS(TS)
  if(datasrc=='ts'){
    sus <- rep(FALSE,nrow(TS))
    for (by in nearby){
      TS_ <- data.frame(date=trday.nearby(TS$date,by), stockID=TS$stockID)
      TS_ <- getTech_ts(TS_, funchar="istradeday4()",varname="trading")
      sus_ <- (!TS_$trading == 1) & TS_$date<=Sys.Date()
      sus <- sus|sus_
    }
  } else {
    sus <- rep(FALSE,nrow(TS))
    for (by in nearby){
      TS_ <- data.frame(date=trday.nearby(TS$date,by), stockID=TS$stockID)
      istradingday <- trday.is(TS=TS_, drop = TRUE)
      sus_ <- (!istradingday) 
      sus <- sus|sus_
    }
  }
  TS <- data.frame(TS,sus=ifelse(is.na(sus), FALSE, sus))
  
  
  if(drop){
    return(TS$sus)
  }else{
    return(TS)
  }
}


#' is_priceLimit
#' 
#' if over-price-limit
#' @param nearby a integer vector. 0 means today, 1 means next tradingday. default is 0L. see detail in \code{\link{trday.nearby}}.
#' @param lim a vector of length 2.
#' @param priceType "close" or "open".
#' @author Ruifei.yin
#' @examples
#' RebDates <- getRebDates(as.Date('2013-03-17'),as.Date('2016-04-17'),'month')
#' TS <- getTS(RebDates,'EI000985')
#' re <- is_priceLimit(TS)
#' re1 <- is_priceLimit(TS,nearby=-1:1)
#' re2 <- is_priceLimit(TS,lim=c(-Inf,10)) #  limit-up
#' is_priceLimit(stockID = "EQ300576",datelist = as.Date("2016-12-22"),priceType = "open") #  open-price over limit
#' @export
is_priceLimit <- function(TS,nearby=0,lim=c(-10, 10), priceType=c("close","open"),
                          datelist,stockID, 
                          drop,
                          datasrc=defaultDataSRC()){
  
  if (missing(TS) && any(missing(stockID),missing(datelist))) {
    stop("Param TS and combination of stockID and datelist should at least have one!")
  }
  if (!missing(TS) && !all(missing(stockID),missing(datelist))) {
    stop("Param TS and combination of stockID and datelist should only have one!")
  }
  if(missing(drop)){
    drop <- if(missing(TS)) TRUE else FALSE
  }
  
  if (missing(TS)){
    TS <- expand.grid(date=datelist, stockID=stockID)
  }
  
  check.TS(TS)
  priceType <- match.arg(priceType)
  
  if(priceType=="close"){
    if(datasrc=='ts'){
      overlim <- rep(FALSE,nrow(TS))
      for (by in nearby){
        TS_ <- data.frame(date=trday.nearby(TS$date,by), stockID=TS$stockID)
        TS_ <- getTech_ts(TS_, funchar=c("StockPrevClose3()","close()"),varname=c("pre_close","close"))
        in_lim <- TS_$close > round(TS_$pre_close*(1+lim[1]/100),2) & TS_$close < round(TS_$pre_close*(1+lim[2]/100),2)
        overlim_ <- (!in_lim)  & TS_$date<=Sys.Date()
        overlim <- overlim|overlim_
      }
    } else {
      overlim <- rep(FALSE,nrow(TS))
      for (by in nearby){
        TS_ <- data.frame(date=trday.nearby(TS$date,by), stockID=TS$stockID)
        TS_ <- getTech(TS_,variables=c("pre_close","close") ,datasrc = datasrc)
        in_lim <- TS_$close > round(TS_$pre_close*(1+lim[1]/100),2) & TS_$close < round(TS_$pre_close*(1+lim[2]/100),2)
        overlim_ <- (!in_lim) 
        overlim <- overlim|overlim_
      }
    }
  } else if(priceType=="open"){
    if(datasrc=='ts'){
      overlim <- rep(FALSE,nrow(TS))
      for (by in nearby){
        TS_ <- data.frame(date=trday.nearby(TS$date,by), stockID=TS$stockID)
        TS_ <- getTech_ts(TS_, funchar=c("StockPrevClose3()","open()"),varname=c("pre_close","open"))
        in_lim <- TS_$open > round(TS_$pre_close*(1+lim[1]/100),2) & TS_$open < round(TS_$pre_close*(1+lim[2]/100),2)
        overlim_ <- (!in_lim)  & TS_$date<=Sys.Date()
        overlim <- overlim|overlim_
      }
    } else {
      overlim <- rep(FALSE,nrow(TS))
      for (by in nearby){
        TS_ <- data.frame(date=trday.nearby(TS$date,by), stockID=TS$stockID)
        TS_ <- getTech(TS_,variables=c("pre_close","open") ,datasrc = datasrc)
        in_lim <- TS_$open > round(TS_$pre_close*(1+lim[1]/100),2) & TS_$open < round(TS_$pre_close*(1+lim[2]/100),2)
        overlim_ <- (!in_lim) 
        overlim <- overlim|overlim_
      }
    }
  } else {
    stop("Invalid param of 'priceTpye'!")
  }
  
  
  TS <- data.frame(TS,overlim=ifelse(is.na(overlim), FALSE, overlim)) # if NA, set to FALSE
  
  if(drop){
    return(TS$overlim)
  }else{
    return(TS)
  }
}



#' @export
is_blacklist <- function(TS,
                         datelist,stockID, 
                         drop,
                         datasrc=defaultDataSRC()){
  
  blklist=c("EQ600061","EQ600886","EQ600216") # temporally
  
  if (missing(TS) && any(missing(stockID),missing(datelist))) {
    stop("Param TS and combination of stockID and datelist should at least have one!")
  }
  if (!missing(TS) && !all(missing(stockID),missing(datelist))) {
    stop("Param TS and combination of stockID and datelist should only have one!")
  }
  if(missing(drop)){
    drop <- if(missing(TS)) TRUE else FALSE
  }
  
  if (missing(TS)){
    TS <- expand.grid(date=datelist, stockID=stockID)
  }
  
  check.TS(TS)
  
  TS$is_blacklist <- TS$stockID %in% blklist
  
  
  if(drop){
    return(TS$is_blacklist)
  }else{
    return(TS)
  }
  
}

#' is_st
#' 
#' is st stock
#' @author Ruifei.yin
#' @examples
#' RebDates <- getRebDates(as.Date('2013-03-17'),as.Date('2016-04-17'),'month')
#' TS <- getTS(RebDates,'EI000985')
#' re <- is_st(TS)
#' @export
is_st <- function(TS,
                  datelist,stockID, 
                  drop,
                  datasrc=defaultDataSRC()){
  if (missing(TS) && any(missing(stockID),missing(datelist))) {
    stop("Param TS and combination of stockID and datelist should at least have one!")
  }
  if (!missing(TS) && !all(missing(stockID),missing(datelist))) {
    stop("Param TS and combination of stockID and datelist should only have one!")
  }
  if(missing(drop)){
    drop <- if(missing(TS)) TRUE else FALSE
  }
  
  if (missing(TS)){
    TS <- expand.grid(date=datelist, stockID=stockID)
  }
  
  check.TS(TS)
  
  if(datasrc=='ts'){
    TS_ <- TS.getTech_ts(TS, funchar="IsST_()")
    TS_ <- renameCol(TS_, c("IsST_()"), c("is_st"))
    TS_$is_st <- ifelse(TS_$is_st==1,TRUE,FALSE)
  } else if(datasrc=="local"){
    TS_ <- transform(TS,date=rdate2int(date))
    con <- db.local("qt")
    RSQLite::dbWriteTable(con,"yrf_tmp",TS_,overwrite=TRUE,row.names=FALSE)
    qr <- "select y.*,q.SecuAbbr 'is_st' from yrf_tmp y
    left join QT_DailyQuote2 q on y.date=q.TradingDay and y.stockID=q.ID"
    TS_ <- RSQLite::dbGetQuery(con,qr)
    RSQLite::dbDisconnect(con)
    TS_ <- transform(TS_,date=intdate2r(date),
                     is_st=stringr::str_detect(is_st,'ST'))
  }
  
  
  if(drop){
    return(TS_$is_st)
  }else{
    return(TS_)
  }
  
}
QuantAndrew/QDataGet documentation built on May 14, 2019, 7:35 a.m.