R/z_dep-deprecated_t_general.R

Defines functions importrawdf.f clean.f mutateselect.f impute.f variabletonumeric.f returncode.c.f returnstock.c.f returnclose.n.f nowarrant.df.f uniquecode.v.f subcode.df.f closemod.v.f funlag funfwd

Documented in clean.f closemod.v.f closemod.v.f funfwd funfwd funlag funlag importrawdf.f impute.f impute.f mutateselect.f nowarrant.df.f returnclose.n.f returnclose.n.f returncode.c.f returncode.c.f returnstock.c.f returnstock.c.f subcode.df.f subcode.df.f uniquecode.v.f uniquecode.v.f variabletonumeric.f variabletonumeric.f

#' Import raw df
#'
#' Import raw df from a specified directory with ^201*.csv M+ Data.
#' Dependencies: library0("plyr", "function0")
#' Return df
#' @param data.dir directory of M+ Data
#' @param ndays number of days to import
#' @keywords import raw df
#' @export
#' @examples
#' importrawdf.f(data.dir = "C:/Users/User/Desktop/tempmplus", length = ndays)
importrawdf.f <- function(data.dir, ndays = 60){
      
      # warning("importrawdf.f() has been deprecated, use importrawdf_f() instead.")
      .Deprecated("importrawdf_f")
      
      # setwd(data.dir)     
      libraryf(c("plyr", "dplyr", "function0"))
      
      data.dir <- formatdir.f(data.dir)
      
      flist <- list.files(path = data.dir, pattern = "^201.*.csv")
      N <- length(flist); n0 <- N - ndays + 1
      
      if(N == 0 | ndays == 0 ){
            stop("N = 0, No M+ Data! or length = 0! ")     
      }
      
      if(N > ndays){
            df.list <- lapply(n0:N, FUN =function(j){
                  df2 <- read.csv(paste0(data.dir, flist[j]), stringsAsFactors = F)
                  df2[,"Date"] <- fname2 <- strsplit(flist[j], split = "\\.")[[1]][1]
                  return(df2)
            }) #return list of df by date
      }else{
            #WARNING Message
            print(paste0("WARNING: N < n0, loaded  ", N, " files only"))
            
            df.list <- lapply(1:N, FUN =function(j){
                  df2 <- read.csv(paste0(data.dir,flist[j]), stringsAsFactors = F)
                  df2[,"Date"] <- fname2 <- strsplit(flist[j], split = "\\.")[[1]][1]
                  return(df2)
            }) #return list of df by date
            
      }

      return(rbind.fill(df.list)) ###dependent on plyr (LOAD plyr BEFORE dplyr)
}


#' clean.f function (still in progress)
#' 
#' Clean the raw.final.df (impute, and convert columns type)
#' Rename variale name to lower case; impute "last."; 
#' Dependencies: library0("lubridate"), impute.f (this package)
#' Return df (output: clean.df)
#' 
#' @param df default to raw.final.df
#' @keywords clean raw final df
#' @export
#' @examples
#' clean.f(df= raw.final.df)
clean.f <- function(df = raw.final.df){
      .Deprecated("clean_f")
      
      libraryf("lubridate")
      
      #Convert all factors to character
      var <- names(df)
      for(i in var){
            if(is.factor(df[,i])){
                  df[,i] <- as.character(df[,i])     
            }
      }
      
      #1. Rename variale name to lower case
      names(df) <- tolower(names(df))
      
      #2a. Impute with "prv.close."
      for(var in c("last.", "high", "low", "open")){
            df <- impute.f(var, df, option = 1)
      }
      #2b. Impute with 0
      for(var in c("chg.")){
            df <- impute.f(var, df, option = 2)
      }
      #2bb. Impute and fix (chg..)
      for(var in c("chg..")){
            df <- impute.f(var, df, option = 4)
      }
      #2bc. Impute and fix (chg.bid)
      for(var in c("chg.bid")){
            df <- impute.f(var, df, option = 5)
      }
      #2c. Impute 0 VWAP with prv.close
      for(var in c("vwap")){
            df <- impute.f(var, df, option = 3)
      }
      
      #3. Important variable change to numeric
      variabletonumeric.v <- c("prv.close.", "open", "high", "low", "last.", "chg.", "chg..", "chg.bid", "l4q.eps", "vwap");
            for(var in variabletonumeric.v){
                  df <- variabletonumeric.f(var, df);
            }
      
      #4. convert date to date class
            df[,"date"] <- ymd(df[,"date"])
            
            
            
      ###RETURN###
      return(df)     
}


#' mutateselect.f function (still in progress)
#' 
#' select useful columns and mutate/add new columns (lastmod)
#' 
#' Dependencies: library0("plyr", "dplyr"), closemod.v.f (this package)
#' Return df (output: final.df)
#' 
#' @param df default to clean.df
#' @keywords mutate select new columns
#' @export
#' @examples
#' mutateselect.f(df= clean.df)
mutateselect.f <- function(df = clean.df){
      .Deprecated("mutate_f")
      
      requiredcolumns <-  c("code.", "stock.", "date", 
                          "prv.close.", "open", "high", "low", "last.", "ref.", 
                          "chg..", "chg.", "chg.bid", "vol.", 
                          "vwap", "swg.", "swg..",
                          "value.", "buy.value", "sell.value",
                          "s.issued", "m.cap.", 
                          "status", "status.ind", 
                          "sector", 
                          "l4q.date", 
                          "l4q.eps", "l4q.revenue", "l4q.net.profit",
                          "nab.", "revenue", "net.profit" )
      u1 <- colnames(df) %in% requiredcolumns
      df1 <- df[,u1]
      
      #add lastmod
      df2 <- df1 %>% group_by(code.) %>% mutate(lastmod = closemod.v.f(chg..)) 
      
      
      #filter off delisted company
      datelatest <- as.data.frame(df2)[nrow(df2),"date"] - 20 #20days tolerance
      df3 <- df2 %>% group_by(code.) %>% mutate(status2 = max(date) >= datelatest)
      df3 <- df3 %>% filter(status2)
      df3 <- as.data.frame(df3)
      
      return(df3)

}

##########################
###SUB FUNCTION###########
##########################

#' Sub-function: impute.f
#' 
#' impute
#' 
#' @param var var
#' @param df df
#' @param option option
#' @keywords impute
#' @export
impute.f <- function(var, df, option){
      if(option == 1){
            #prv.close
            u1 <- df[,var] %in% "-"
            df[u1,var] <- df[u1, "prv.close."]  
            
      }else if(option == 2){
            #impute 0
            u1 <- df[,var] %in% "-"
            df[u1,var] <- "0"
      }else if(option == 3){
            #impute with prv.close if vol=0
            u1 <- df[,var] %in% 0
            df[u1,var] <- df[u1, "prv.close."]  
      }else if(option == 4){
            #impute and fix chg.. (chg%)
            ret.v <- df[,var]
            retfix1.v <- gsub(pattern = "[+]|[%]", replacement = "", ret.v)
            retfix1.v[retfix1.v == "-"] <- 0
            retfix2.v <- as.numeric(retfix1.v)/100
            df[,var]  <- retfix2.v
      }else if(option == 5){
            #impute and fix chg.bid (ticks)
            chgbid.v <- df[,var]
            fix1.v <- gsub(pattern = "[+]", replacement = "", chgbid.v)
            fix1.v[fix1.v == "-"] <- 0
            fix2.v <- as.numeric(fix1.v)
            df[,var] <- fix2.v
      }
      return(df)
}
      
#' Sub-function: variabletonumeric.f
#' 
#' convert variable type to numeric
#' 
#' @param var var
#' @param df df
#' @keywords variable to numeric
#' @export
#' 
variabletonumeric.f <- function(var, df){
      df[, var] <- suppressWarnings(as.numeric(df[,var]));
      return(df)
}

#' Sub-function: returncode.c.f
#' 
#' return code from stock name
#' 
#' @param stockname stockname
#' @param df df
#' @keywords returncode
#' @export
#' 
returncode.c.f <- function(stockname, df = final.df){
      u1 <- df[, "stock."] %in% toupper(stockname)
      if(sum(u1) > 0){
            return(df[u1, "code."][1])
      }else{
            print(paste0("No such stock exist!"));
            return(NA);
      }
}

#' Sub-function: returnstock.c.f
#' 
#' return latest stock name from stock code"
#' 
#' return code from stock name
#' @param code code 
#' @param df final.df
#' @keywords returnstock
#' @export
#' 
returnstock.c.f <- function(code, df = final.df){
      u1 <- df[, "code."] %in% toupper(code)
      if(sum(u1) > 0){
            return(df[u1, "stock."][sum(u1)])
      }else{
            print(paste0("No such stock exist!"));
            return(NA);
      }
}

#' Sub-function: returnstock.c.f
#' 
#' return the closing price n days before today"
#' @param code code
#' @param df default to final.df
#' @param n default to 0
#' @keywords returnclose
#' @export
#'
returnclose.n.f <- function(code, df = final.df, n = 0){
            u1 <- df[, "code."] %in% toupper(code)
            if(sum(u1) > n){
                  return(df[u1, "last."][sum(u1)-n])
            }else{
                  return(NA);
            }
}


#' Sub-function: nowarrant.df.f
#' 
#' return df without warrant"
#' @param df df
#' @keywords no warrant
#' @export
#'
nowarrant.df.f <- function(df = final.df){
      u1 <- nchar(df[,"code."])==4
      u2 <- df[,"code."] %in% "5235SS"  ##special for KLCC property
      
      nowarrant.df <- df[u1 | u2, ];
      return(nowarrant.df);
}


#' Sub-function: uniquecode.v.f
#' 
#' return unique vector of stock's code (With/without warrant)
#' @param df df
#' @param warrant true/false
#' @keywords uniquecode
#' @export
#'
uniquecode.v.f <- function(df = final.df, warrant = F){
      if(warrant){
            uniquecode.v <- unique(df[, "code."])
      }else{
            uniquecode.v <- unique( nowarrant.df.f(df)[, "code."] )
      }
      return(uniquecode.v);
}

#'Sub-function: subcode.df.f
#'
#' return subset df" - subset df based on code
#' @param code code
#' @param df default to final.df
#' @keywords subcode
#' @export
#'
subcode.df.f <- function(code, df = final.df){
      u <- df[,"code."] %in% code
      subcode.df <- df[u,]
      return(subcode.df);
}

#' Sub-function: closemod.v.f
#' 
#' modify closing price using return data, i.e. all stocks start from closing price of 1.00 and the next closing price is the product of previous closing price and (1+return)
#' @param return.v return.v
#' @keywords closemod
#' @export
#'
closemod.v.f <- function(return.v){
      L <- length(return.v)
      
      closemod.v <- sapply(1:L, FUN = function(x){
            if(x == 1){
                  return(1)
            }else{
                  prod(1+return.v[2:x])     
            }
            
      })
      
      # closemod.v <- formattable(closemod.v, digits = 3, format = "f")
      return(closemod.v)
}

#' funlag
#' 
#' return 1 value = fun(vector[L-n-lag+1: L-lag])
#' 
#' @export
funlag <- function(last.v, lag, n, fun){
      L <- length(last.v)
      if(L-n-lag+1 > 0){
            return(   fun( last.v[ (L-n-lag+1):(L-lag) ]  )   )
      }else{
            return(NA)
      }
}


#' funfwd
#' 
#' @export
#' 
funfwd <- function(return.v, predday, fwdn, fun = returnprod.f){
      L <- length(return.v)
      
      if( -1*predday + fwdn <= 0 & L-predday > 0 ){
            return(   fun( return.v[ (L-predday+1):(L-predday + fwdn)] )  )
      }else{
            return(NA)
      }
      
      
}
junyitt/tfunction documentation built on May 4, 2019, 4:23 p.m.