R/Data_P.R

Defines functions process_P

# options(datatable.fread.datatable=TRUE)
# options("datatable.fread.datatable")

#' @import data.table
#' @importFrom utils str globalVariables
#' @importFrom lubridate myd

Performance_Variables <-
  c(
    'LOAN_ID',
    'Monthly.Rpt.Prd',
    'Servicer.Name',
    'LAST_RT',
    'LAST_UPB',
    'Loan.Age',
    'Months.To.Legal.Mat',
    'Adj.Month.To.Mat',
    'Maturity.Date',
    'MSA',
    'Delq.Status',
    'MOD_FLAG',
    'Zero.Bal.Code',
    'ZB_DTE',
    'LPI_DTE',
    'FCC_DTE',
    'DISP_DT',
    'FCC_COST',
    'PP_COST',
    'AR_COST',
    'IE_COST',
    'TAX_COST',
    'NS_PROCS',
    'CE_PROCS',
    'RMW_PROCS',
    'O_PROCS',
    'NON_INT_UPB',
    'PRIN_FORG_UPB',
    'RMW_PROCS_FLAG',
    'FORECL_PRIN_WRITEOFF_AMT',
    'SERVICING_ACTIVITY_INDICATOR'
  )
if(getRversion() >= "2.15.1") utils::globalVariables(Performance_Variables)


#' Parse Performance text file
#'
#' @param perf_txt path to Performance text file, e.g. Performance_2000Q1.txt
#' @param verbose logical
#'
#' @return A data.table
#' @export
#'
process_P <- function(perf_txt, verbose=FALSE) {
  stopifnot(file.exists(perf_txt))
  if(verbose) cat(perf_txt, file.size(perf_txt),"bytes\n")

  Performance_ColClasses <-
    c(
      'character',
      'character',
      'factor',
      'numeric',
      'numeric',
      'integer',
      'integer',
      'integer',
      'character', # Maturity.Date
      'factor', # MSA
      'character', # Delq.Status
      'factor', # MOD_FLAG
      'character', # Zero.Bal.Code
      'character', # ZB_DTE
      'character', # LPI_DTE
      'character', # FCC_DTE
      'character', # DISP_DT
      'numeric', # FCC_COST
      'numeric', # PP_COST
      'numeric', # AR_COST
      'numeric', # IE_COST
      'numeric', # TAX_COST
      'numeric',
      'numeric',
      'numeric',
      'numeric',
      'numeric',
      'numeric',
      'factor', # RMW_PROCS_FLAG
      'numeric',
      'factor' # SERVICING_ACTIVITY_INDICATOR
    )


  Data_P <- fread(perf_txt,sep = "|",stringsAsFactors = FALSE,
                  colClasses=Performance_ColClasses,
                  col.names = Performance_Variables,
                  showProgress=FALSE, data.table=TRUE)
  # setDT(Data_P)

  if(verbose) { utils::str(Data_P) }

  Data_P[, `:=`(
    'Monthly.Rpt.Prd'=as.IDate(Monthly.Rpt.Prd, "%m/%d/%Y"),
    'Maturity.Date'=as.IDate(lubridate::myd(Maturity.Date, truncated = 1)),
    "ZB_DTE"=as.IDate(lubridate::myd(ZB_DTE, truncated = 1)),
    "LPI_DTE"=as.IDate(LPI_DTE, "%m/%d/%Y"),
    "FCC_DTE"=as.IDate(FCC_DTE, "%m/%d/%Y"),
    "DISP_DT"=as.IDate(DISP_DT, "%m/%d/%Y")
  )]
  setkeyv(Data_P, c("LOAN_ID","Monthly.Rpt.Prd"))

  Data_P[, c("MOD_FLAG","RMW_PROCS_FLAG","SERVICING_ACTIVITY_INDICATOR"):=list(
    factor(MOD_FLAG, levels=c("N","Y")),
    factor(RMW_PROCS_FLAG, levels=c("N","Y")),
    factor(SERVICING_ACTIVITY_INDICATOR, levels=c("N","Y")))]

  # # Create function to handle missing Current UPBs in the last record by setting
  # # them to the record prior
  # na.lomf <- function(x) {
  #   na.lomf.0 <- function(x) {
  #     #    non.na.idx <- intersect(which(!is.na(x)),which(x>0))
  #     non.na.idx <- which(!is.na(x))
  #     if (is.na(x[1L]) || x[1L]==0) {
  #       non.na.idx <- c(1L, non.na.idx)
  #     }
  #     rep.int(x[non.na.idx], diff(c(non.na.idx, length(x) + 1L)))
  #   }
  #
  #   dim.len <- length(dim(x))
  #
  #   if (dim.len == 0L) {
  #     na.lomf.0(x)
  #   } else {
  #     apply(x, dim.len, na.lomf.0)
  #   }
  # }
  #
  # na.lomf_L <- function(x) {
  #   #  non.na.idx <- intersect(which(!is.na(x)),which(x[length(x)-1]>0))
  #   # non.na.idx <- which(!is.na(x))
  #   if (is.na(x[length(x)]) || x[length(x)]==0) {
  #     XX<-c(x[1:length(x)-1], rep.int(x[length(x)-1], 1))
  #   } else {
  #     XX<-x
  #   }
  # }
  #
  #
  # Data_P[,
  #   c("LAST_UPB", "NON_INT_UPB") :=list(na.lomf(LAST_UPB), na.lomf(NON_INT_UPB)),
  #   by = "LOAN_ID"]
  # rm(na.lomf, na.lomf_L)

  if(verbose) print(sort(table(Data_P$Delq.Status, useNA = "ifany"), decreasing = TRUE))
  Data_P[,Delq.Status:=as.integer(ifelse(Delq.Status=="X", NA_integer_, Delq.Status))]

  if(verbose) print(sort(table(Data_P$Zero.Bal.Code, useNA = "ifany"), decreasing = TRUE))
  Data_P[, Zero.Bal.Code:=factor(Zero.Bal.Code,
                                 levels = c('','01','02','03','06','09','15','16'),
                                 labels = c('','P','T','S','R','F','N','L'),
                                 ordered = FALSE)]
  if(verbose) {
    utils::str(Data_P)
    summary(Data_P)
  }
  return(Data_P)
}
canarionyc/loanroll documentation built on Sept. 7, 2020, 4:50 a.m.