R/combineAggregate.R

#' combine.aggregate
#'
#' @param Acquisition Acquisition dataset
#' @param Performance Performance dataset
#' @param ... not used
#'
#' @return A data table
#' @export
#'
setGeneric("combine.aggregate", def=function(Acquisition, Performance, ...) {
  standardGeneric("combine.aggregate")
}, valueClass = "data.table")

#' aggregate last snapshot from Loan database
#'
#' aggregate combined Loan Acquisition and Performance dataset
#'
#' @param Acquisition a LoanAcquisitionDataset
#' @param Performance a LoanSnapshotDataset
#' @param ... not used
#'
#' @return A data.table
#' @import data.table
#' @import fst
#' @export
#'
#' @examples
#' \dontrun{
#' LA <- LoanAcquisitionDataset(2019)
#' LS <- LoanSnapshotDataset(2019)
#' combine.aggregate(LA, LS)
#' }
setMethod("combine.aggregate",
          signature(Acquisition = "LoanAcquisitionDataset", Performance = "LoanSnapshotDataset"),
          function(Acquisition, Performance, ...) {
  cat(Acquisition@path , file.size(Acquisition@path), "bytes\n")
  print(fst::metadata_fst(Acquisition@path ))
  cat("------\n")
  cat(Performance@path , file.size(Performance@path), "bytes\n")
  print(fst::metadata_fst(Performance@path ))
  #  print(summary(x))

  # browser()
  Data_A <- read_fst(Acquisition@path, to=1e3L, columns = c('LOAN_ID', 'ORIG_DTE', 'ORIG_AMT','ORIG_RT','CSCORE_B','CSCORE_C','OLTV','OCLTV','DTI','PURPOSE','OCC_STAT','MI_PCT','ORIG_CHN'), as.data.table = TRUE)
  Last_P <- read_fst(Performance@path, to=1e3L, columns = c('LOAN_ID', 'last_upb','Zero.Bal.Code','MOD_FLAG',
                                                            'D60_DTE','D60_UPB',
                                                            'D90_DTE','D90_UPB',
                                                            'D180_DTE','D180_UPB'), as.data.table = TRUE)
  Combined_Data <- merge(Data_A, Last_P, all.x = TRUE, by='LOAN_ID', sort = FALSE)

  if(! "OrigYr" %in% names(Combined_Data) ) {
    Combined_Data[, "OrigYr":=factor(data.table::year(ORIG_DTE))]
  }
  Combined_Data[, LAST_UPB:=fcoalesce(LAST_UPB, ORIG_AMT)]

  aggregate.Combined_Data(Combined_Data)
})

#' combine and aggregate Acquisition and Monthly Performance data
#'
#' @param Acquisition a LoanAcquisitionDataset
#' @param Performance a LoanPerformanceDataset
#' @param xvar time series variable
#' @param ... not used
#' @return A data.table
#'
#' @examples
#' \dontrun{
#' LA <-  LoanAcquisitionDataset(2019)
#' LP <- LoanPerformanceDataset(2019)
#' combine.aggregate(LA, LP)
#' }
setMethod("combine.aggregate",
          signature = c(Acquisition = "LoanAcquisitionDataset", Performance = "LoanPerformanceDataset"),
          definition = function(Acquisition, Performance,  xvar='Monthly.Rpt.Prd',  ...) {

  in.args <- list(...)
  subset <- in.args[["subset"]]

  # if(! "Prev_Sched_Bal1" %in% names(x)) {
  #   # Data_P <- x[, c('LOAN_ID', 'Monthly.Rpt.Prd','LAST_RT', 'Months.To.Legal.Mat', 'LAST_UPB', 'MOD_FLAG')]
  #   # setDT(Data_P, key=c("LOAN_ID", "Monthly.Rpt.Prd"))
  #   # set_Prev_Sched_Bal1(Data_P)
  #   # path <- paste0(tempfile(), ".fst")
  #   # write_fst(cbind(x, Data_P$Prev_Sched_Bal1), path)
  #
  # }
  Data_P <- read_fst( Performance@path, c('LOAN_ID', xvar, 'LAST_RT', 'Months.To.Legal.Mat', 'last_upb',
                                          'ZB_DTE' ,'Zero.Bal.Code', 'DISP_DT',
                                          'Delq.Status', 'max.Delq.Status',
                                          'MOD_FLAG','prev_upb', "Prev_Sched_Bal1"), as.data.table = TRUE)
  setDT(Data_P, key=c('LOAN_ID', xvar))
  # browser()
  if(is.expression(subset)) {
    Data_A_fst <- Acquisition@path
    all_cols <- names(fst( Data_A_fst))
    xtra_cols <- all_cols[sapply(all_cols, function(pattern) grepl(pattern,as.character(subset)))]
    Data_A <- read_fst(Data_A_fst, columns = c('LOAN_ID', 'ORIG_DTE', 'ORIG_AMT', xtra_cols), as.data.table = TRUE)
    Data_A <- subset(Data_A, eval(subset))
    print(summary(Data_A))
    llmon_stats <-aggregate.llmon(Data_P=Data_P[Data_A, nomatch=NULL], xvar=xvar, by.vars=in.args[["by.vars"]])

    return(llmon_stats)
  }
# llmon_stats <- do.call(aggregate.llmon, list(Data_P[1:100]), quote = FALSE)

  llmon_stats <- aggregate.llmon(Data_P=Data_P, xvar=xvar, by.vars=in.args[["by.vars"]])

  return(llmon_stats)
})
canarionyc/loanroll documentation built on Sept. 7, 2020, 4:50 a.m.