R/Data_A_stats.R

Defines functions aggregate.Data_A

#' @importFrom stats na.omit weighted.mean

j.Data_A <- quote(
  list(
    'Loans'=.N,
    'Orig_Amt.M'=round(sum(ORIG_AMT)/1e6,1),
    'AOLS.k'=round(sum(ORIG_AMT)/.N /1e3,0),
    #       'WA Loan Size ($k)'=sum(ORIG_AMT*ORIG_AMT/(OLTV/100), na.rm = TRUE) /  sum(ifelse(!is.na(OLTV),ORIG_AMT,0))/1e3,
    'Note_Rate'=round(weighted.mean(ORIG_RT,ORIG_AMT, na.rm = TRUE),2),

    'Borrower_Credit_Score'=round(weighted.mean(CSCORE_B,ORIG_AMT, na.rm = TRUE),0),
    'Co-borrower_Credit_Score'=round(weighted.mean( CSCORE_C,ORIG_AMT, na.rm = TRUE),0),

    'Orig_LTV'=round(weighted.mean( OLTV, ORIG_AMT, na.rm = TRUE),0),
    'Orig_Combined_LTV'=round(weighted.mean( ifelse(is.na(OCLTV), OLTV, OCLTV), ORIG_AMT, na.rm = TRUE),0),

    'DTI'=round(weighted.mean(DTI,ORIG_AMT, na.rm = TRUE),0),
    #  '2ndLien.UPB_Pct'=sum( ifelse(OCLTV-OLTV>3, ORIG_AMT, 0)) / sum(ORIG_AMT),

    'Refinance.UPB_Pct'= round(100*sum(ifelse(PURPOSE %in% c('R'), ORIG_AMT, 0), na.rm = TRUE) /
                                 sum(ifelse(!is.na(PURPOSE), ORIG_AMT, 0)),1),
    'Cash-Out.UPB_Pct'= round(100*sum(ifelse(PURPOSE %in% c('C'), ORIG_AMT, 0), na.rm = TRUE) /
                                sum(ifelse(!is.na(PURPOSE), ORIG_AMT, 0)),1),

    'Investor.UPB_Pct'=round(100*sum(ifelse(OCC_STAT=='I',ORIG_AMT,0), na.rm = TRUE) /
                               sum(ifelse(is.na(OCC_STAT),0, ORIG_AMT)),1),
    'Second_Home.UPB_Pct'=round(100*sum(ifelse(OCC_STAT=='S',ORIG_AMT,0), na.rm = TRUE) /
                                  sum(ifelse(is.na(OCC_STAT),0, ORIG_AMT)),1),

    'Mortgage_Insurance'=round(weighted.mean(MI_PCT, ORIG_AMT, na.rm = TRUE),1),

    'TPO.UPB_Pct'=100*round(sum(ifelse(ORIG_CHN %in% c('B','C'), ORIG_AMT,0),na.rm = TRUE) /
                              sum(ifelse(!is.na(ORIG_CHN), ORIG_AMT,0)),1)
  )
)

aggregate.Data_A <- function(Data_A, xvar=NULL, by.vars=NULL){
  if(!is.data.table(Data_A)) {
    setDT(Data_A, key='LOAN_ID')
  }

  # browser()
  if(! "OrigYr" %in% names(Data_A) ) {
    Data_A[, "OrigYr":=factor(year(ORIG_DTE))]
  }

  # if(is.null(xvar) || xvar=="") xvar <- "OrigYr"
  # if(! xvar %in% names(Data_A) ) {
  #   warning(xvar, "not in Data_A")
  #   return()
  # }
  # browser()
  if(is.null(xvar) && is.null(by.vars)) { by <- "OrigYr"}
  else { by <- c(xvar ,by.vars)}

  out <- Data_A[, j=eval(j.Data_A), keyby=by]
  #  setorderv(out, c('grouping', xvar,by.vars), order = c(-1L,1L, rep.int(1L, length(by.vars))))
  # if(is.null(xvar)) {
  # if(!is.null(by.vars)) setorderv(out, c(by.vars), order =  rep.int(1L, length(by.vars)))
  # } else {
  #   if(!is.null(by.vars)) setorderv(out, c( xvar,by.vars), order = c(1L, rep.int(1L, length(by.vars))))
  # }
  return(out)
}

if(getRversion() >= "3.6.0") {
  .S3method("aggregate", "Data_A")
}
canarionyc/loanroll documentation built on Sept. 7, 2020, 4:50 a.m.