R/clean-1-clean_f.R

Defines functions clean_f impute.f

Documented in clean_f impute.f impute.f

#' clean_f function (still in progress)
#' 
#' Clean subraw_df (with warrant) OR stock_df (stock only)
#' Rename variale name to lower case; impute "last."; 
#' Dependencies: library0("lubridate"), impute.f (this package)
#' Return df (output: clean.df)
#' @importFrom magrittr %>%
#' @param df default to raw_final_df
#' @keywords clean raw final df
#' @export
#' @examples
#' clean_f(df= raw_final_df)
clean_f <- function(df = stock_df){
      #Convert all factors to character
      var <- names(df)
      for(i in var){
            if(is.factor(df[,i])){
                  df[,i] <- as.character(df[,i])     
            }
      }

      #1. Rename variable name to lower case
      names(df) <- tolower(names(df))
      
      #2a. Impute with "prv.close."
      for(var in c("last.", "high", "low", "open")){
            df <- impute.f(var, df, option = 1)
      }
      
       for(var in c("div.", "l4q.div")){
            df <- impute.f(var, df, option = 2)
       }
      
      #2bb. Impute and fix (chg..)
      for(var in c("chg..", "dir.shares.")){
            df <- impute.f(var, df, option = 4)
      }
      #2bc. Impute and fix (chg.bid)
      for(var in c("chg.bid")){
            df <- impute.f(var, df, option = 5)
      }
      #2c. Impute 0 VWAP with prv.close
      for(var in c("vwap")){
            df <- impute.f(var, df, option = 3)
      }
      
      #3. Important variable change to numeric
      variabletonumeric.v <- c("prv.close.", "open", "high", "low", "last.", "chg..", "chg.bid", "vwap",
                               "l4q.nab", "l4q.net.profit", "l4q.revenue", "nab.", "net.profit", 
                               "revenue", "div.", "l4q.div"
                               );
      for(var in variabletonumeric.v){
            df <- variabletonumeric.f(var, df);
      }
      
      #4. convert date to date class
            df[,"date"] <- lubridate::ymd(df[,"date"])
            df[,"fye"] <- lubridate::dmy(df[,"fye"])
            df[,"l4q.date"] <- lubridate::dmy(df[,"l4q.date"])
            
      #5. sort by date
            df <- as.data.frame(df %>% dplyr::arrange(date))
            
            
      #not fixed variables
            #status, ent, listed.on, member.of.indices, sector,
            
            #status.ind
            #div.
            #fye
            #l4q.date, l4q.div
            #l4q.nab, l4q.net.profit, l4q.revenue, nab., net.proit
            #revenue
            #dir.shares.
            
            
      #no need to fix
            #s.issued.
            #ref.
            
      ###RETURN###
      return(df)     
}


#' Sub-function: impute.f
#' 
#' impute
#' 
#' @param var var
#' @param df df
#' @param option option
#' @keywords impute
#' @export
impute.f <- function(var, df, option){
      if(option == 1){
            #prv.close
            u1 <- df[,var] %in% "-"
            df[u1,var] <- df[u1, "prv.close."]  
            
      }else if(option == 2){
            #impute 0
            u1 <- df[,var] %in% "-"
            df[u1,var] <- "0"
      }else if(option == 3){
            #impute with prv.close if vol=0
            u1 <- df[,var] %in% 0
            df[u1,var] <- df[u1, "prv.close."]  
      }else if(option == 4){
            #impute and fix chg.. (chg%)
            ret.v <- df[,var]
            retfix1.v <- gsub(pattern = "[+]|[%]", replacement = "", ret.v)
            retfix1.v[retfix1.v == "-"] <- 0
            retfix2.v <- as.numeric(retfix1.v)/100
            df[,var]  <- retfix2.v
      }else if(option == 5){
            #impute and fix chg.bid (ticks)
            chgbid.v <- df[,var]
            fix1.v <- gsub(pattern = "[+]", replacement = "", chgbid.v)
            fix1.v[fix1.v == "-"] <- 0
            fix2.v <- as.numeric(fix1.v)
            df[,var] <- fix2.v
      }
      return(df)
}
junyitt/tfunction documentation built on May 4, 2019, 4:23 p.m.