#' @import data.table
#' @importFrom utils str globalVariables
#' @importFrom lubridate myd
Acquisitions_Variables <-
c(
'LOAN_ID',
'ORIG_CHN',
'Seller.Name',
'ORIG_RT',
'ORIG_AMT',
'ORIG_TRM',
'ORIG_DTE',
'FRST_DTE',
'OLTV',
'OCLTV',
'NUM_BO',
'DTI',
'CSCORE_B',
'FTHB_FLG',
'PURPOSE',
'PROP_TYP',
'NUM_UNIT',
'OCC_STAT',
'STATE',
'ZIP_3',
'MI_PCT',
'Product.Type',
'CSCORE_C',
'MI_TYPE',
'RELOCATION_FLG'
)
if(getRversion() >= "2.15.1") utils::globalVariables(Acquisitions_Variables)
process_A <- function(acq_txt, verbose=FALSE) {
if(!file.exists(acq_txt)) {
stop(simpleError(paste(acq_txt, "not found")))
}
if(verbose) cat(acq_txt, file.size(acq_txt),'bytes\n')
Acquisition_ColClasses <-
c(
'character', # 'LOAN_ID',
'character', # 'ORIG_CHN',
'factor', # 'Seller.Name',
'numeric', # 'ORIG_RT',
'numeric', # 'ORIG_AMT',
'integer', # 'ORIG_TRM',
'character', # 'ORIG_DTE',
'character', # 'FRST_DTE',
'integer', # 'OLTV',
'integer', # 'OCLTV',
'integer', # 'NUM_BO',
'integer', # 'DTI',
'integer', # 'CSCORE_B',
'character', # 'FTHB_FLG',
'character', # 'PURPOSE',
'character', # 'PROP_TYP',
'integer', # 'NUM_UNIT',
'character', # 'OCC_STAT',
'factor', # 'STATE',
'factor', # 'ZIP_3',
'integer', # 'MI_PCT',
'factor', # 'Product.Type',
'integer', # 'CSCORE_C',
'integer', # 'MI_TYPE',
'character' # 'RELOCATION_FLG'
)
# stopifnot(unique(count.fields(acq_txt,sep = '|'))==length(Acquisitions_Variables))
Data_A <- fread(file=acq_txt,sep = '|',stringsAsFactors = FALSE,
col.names = Acquisitions_Variables,colClasses=Acquisition_ColClasses,
data.table=TRUE,
key='LOAN_ID')
setDT(Data_A, key='LOAN_ID')
# stopifnot(nrow(Data_A)==R.utils::countLines(acq_txt))
Data_A[ ,c('ORIG_DTE','FRST_DTE'):=list(
as.IDate(lubridate::myd(ORIG_DTE,truncated = 1)),
as.IDate(lubridate::myd(FRST_DTE,truncated = 1)))
]
if(verbose) print(table(Data_A$ORIG_DTE, useNA = 'ifany'))
# Data_A[,'OrigYr':=factor(year(ORIG_DTE))]
Data_A[,'ORIG_CHN':=factor(ORIG_CHN,levels = c('R','B','C'),ordered = FALSE)]
Data_A[,'Seller.Name':=as.factor(Seller.Name)]
# Data_A[,NUM_BO:=factor(
# NUM_BO,levels = as.character(1:10),labels = c('1','2',rep('3+',8)), ordered = TRUE)]
Data_A[,c(
'RELOCATION_FLG','FTHB_FLG') :=list(
factor(RELOCATION_FLG, levels=c('N','Y')),
factor(FTHB_FLG,levels = c('N','Y'),ordered = FALSE)
)]
if(verbose) {
cat("Purpose:\n")
print(table(Data_A$PURPOSE, useNA = 'ifany'))
}
Data_A[,'PURPOSE':=factor(PURPOSE,levels = c('P','R','C','U'),ordered = FALSE)]
if(verbose) {
cat("Property Type:\n")
print(sort(table(Data_A$PROP_TYP, useNA = 'ifany'), decreasing = TRUE))
}
Data_A[,PROP_TYP:=factor(PROP_TYP,levels = c('SF','CO','CP','MH','PU'), ordered = FALSE)]
if(verbose) {
cat("Occupancy Status:\n")
print(sort(table(Data_A$OCC_STAT, useNA = 'ifany'), decreasing = TRUE))
}
Data_A[,OCC_STAT:=factor(OCC_STAT,levels = c('P','S','I'),ordered = FALSE)]
if(verbose) {
cat("Product Type:\n")
print(table(Data_A$Product.Type, useNA = 'ifany'))
}
if(verbose) {
cat("MI Type:\n")
print(sort(table(Data_A$MI_TYPE, useNA = 'ifany'), decreasing = TRUE))
}
Data_A[ ,MI_TYPE:=factor(MI_TYPE, levels=c(1:3), labels = c("Borrower_Paid", "Lender_Paid", "Investor_Paid"))]
if(verbose) {
cat("Num Unit:\n")
print(table(Data_A$NUM_UNIT, useNA = 'ifany'))
}
Data_A[, NUM_UNIT:=factor(NUM_UNIT, levels = 1:4)]
# Data_A[, c('CSCORE_MN', 'ORIG_VAL'):= list(
# pmin(CSCORE_B,CSCORE_C, na.rm = TRUE),
# (ORIG_AMT/(OLTV/100)))]
if(verbose) {
utils::str(Data_A)
}
return(Data_A)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.