Nothing
cm.matrix.counts <- function (mtrix)
{
# tests if the given matrix mtrix is a migration matrix. So the dimensions of the migration matrix should
# be at least 2 times 2 and the row and column dimensions must be equal. Further the values in the migration
# matrix should be between 0 and 1. And the sum of each row should be 1.
if (!is.matrix(mtrix))
stop("mtrix is not a matrix")
if (dim(mtrix)[1] < 2 ||
dim(mtrix)[2] < 2 || dim(mtrix)[1] != dim(mtrix)[2])
stop("incorrect dimension of migration matrix")
if (length(mtrix[mtrix < 0]) != 0)
stop("negative value in input data")
}
cm.matrix <- function (mtrix)
{
# tests if the given matrix mtrix is a migration matrix. So the dimensions of the migration matrix should
# be at least 2 times 2 and the row and column dimensions must be equal. Further the values in the migration
# matrix should be between 0 and 1. And the sum of each row should be 1.
if (!is.matrix(mtrix))
stop("mtrix is not a matrix")
if (dim(mtrix)[1] < 2 ||
dim(mtrix)[2] < 2 || dim(mtrix)[1] != dim(mtrix)[2])
stop("incorrect dimension of migration matrix")
if (length(mtrix[mtrix < 0]) != 0)
stop("negative value in input data")
y <- numeric(dim(mtrix)[1])
for (i in 1:dim(mtrix)[1]) {
y[i] <- sum(mtrix[i,])
}
y <- round(y, digits = 12)
}
cm.vector.counts <- function (vctr)
{
# tests if the given matrix mtrix is a migration matrix. So the dimensions of the migration matrix should
# be at least 2 times 2 and the row and column dimensions must be equal. Further the values in the migration
# matrix should be between 0 and 1. And the sum of each row should be 1.
if (!is.vector(vctr))
stop("mtrix is not a vector")
if (length(vctr[vctr < 0]) != 0)
stop("negative value in input data")
}
# Get totals per ID, cohort method
getidTotCntCohort <-
function(numDate,
numericalRating,
StartPos,
sDate,
eDate,
RatingsCat,
snapshots) {
idTotCnt_method = list()
idTotCnt = list()
nIDs <- nrow(StartPos)
algo <- 'cohort'
#Introduce an artificial 'NaN' state - time window can be larger than some
#companies' migration history
nRtgsCatNaN <- RatingsCat + 1
if (snapshots == 12) {
eDate <- POSIXTomatlab(as.POSIXlt(as.Date(matlabToPOSIX(eDate))))
}
sDates_yrs <-
as.numeric(format(as.Date(matlabToPOSIX(sDate)), '%Y'))
sDates_mns <-
as.numeric(format(as.Date(matlabToPOSIX(sDate)), '%m'))
sDates_dys <-
as.numeric(format(as.Date(matlabToPOSIX(sDate)), '%d'))
if (sDates_mns == 1 & sDates_dys == 31) {
snapshotDates <- cfdates(sDate, eDate, snapshots)
} else {
snapshotDates <- cfdates(sDate, eDate, snapshots)
}
idTotCntRCPP <-
getidTotCntCohortRCPP(nIDs,
numDate,
numericalRating,
as.numeric(StartPos[, 1]),
snapshotDates,
nRtgsCatNaN)
idTotCnt_totalsVec <- idTotCntRCPP$idTotCnt_totalsVec
idTotCnt_totalsMat <- idTotCntRCPP$idTotCnt_totalsMat
idTotCnt <- list(
totalsVec = idTotCnt_totalsVec,
totalsMat = idTotCnt_totalsMat,
method = idTotCntRCPP$idTotCnt_methods
)
return(idTotCnt)
}
# Get totals per ID, duration method
getidTotCntDuration <-
function(numDate,
numericalRating,
StartPos,
sDate,
eDate,
RatingsCat) {
idTotCnt_totalsVec = list()
idTotCnt_totalsMat = list()
idTotCnt_method = list()
idTotCnt = list()
rating = list()
nIDs <- nrow(StartPos)
algo <- 'duration'
nRtgsCatNaN <- RatingsCat + 1
idTotCntRCPP <-
getidTotCntDurationRCPP(
nIDs,
numDate,
numericalRating,
as.numeric(StartPos[, 1]),
nRtgsCatNaN,
algo,
sDate,
eDate
)
idTotCnt_totalsVec <- idTotCntRCPP$idTotCnt_totalsVec
idTotCnt_totalsMat <- idTotCntRCPP$idTotCnt_totalsMat
idTotCnt_method <- idTotCntRCPP$idTotCnt_methods
idTotCnt <- list(totalsVec = idTotCnt_totalsVec,
totalsMat = idTotCnt_totalsMat,
method = idTotCnt_method)
return(idTotCnt)
}
is.leapyear = function(year) {
#http://en.wikipedia.org/wiki/Leap_year
return(((year %% 4 == 0) &
(year %% 100 != 0)) | (year %% 400 == 0))
}
fixDate <- function(data_set) {
text_features <-
c(names(data_set[sapply(data_set, is.character)]), names(data_set[sapply(data_set, is.factor)]))
for (feature_name in text_features) {
feature_vector <- as.character(data_set[, feature_name])
date_pattern_m_dd_YYYY1 <-
'[0-9]/[0-9][0-9]/[0-9][0-9][0-9][0-9]'
date_pattern_dd_mm_YYYY1 <-
'[0-9][0-9]/[0-9][0-9]/[0-9][0-9][0-9][0-9]'
date_pattern_dd_mm_YYYY2 <-
'[0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9]'
date_pattern_YYYY_mm_dd1 <-
'[0-9][0-9][0-9][0-9]/[0-9][0-9]/[0-9][0-9]'
date_pattern_YYYY_mm_dd2 <-
'[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]'
validDate = "NO"
if (any(grepl(date_pattern_m_dd_YYYY1, data_set[[feature_name]]))) {
date_pattern <- date_pattern_m_dd_YYYY1
validDate = "Yes"
type = "m_dd_YYYY1"
} else if (any(grepl(date_pattern_dd_mm_YYYY1, data_set[[feature_name]]))) {
date_pattern <- date_pattern_dd_mm_YYYY1
validDate = "Yes"
type = "dd_mm_YYYY1"
} else if (any(grepl(date_pattern_dd_mm_YYYY2, data_set[[feature_name]]))) {
date_pattern <- date_pattern_dd_mm_YYYY2
validDate = "Yes"
type = "dd_mm_YYYY2"
} else if (any(grepl(date_pattern_YYYY_mm_dd1, data_set[[feature_name]]))) {
date_pattern <- date_pattern_YYYY_mm_dd1
validDate = "Yes"
type = "YYYY_mm_dd1"
} else if (any(grepl(date_pattern_YYYY_mm_dd2, data_set[[feature_name]]))) {
date_pattern <- date_pattern_YYYY_mm_dd2
validDate = "Yes"
type = "YYYY_mm_dd2"
}
if (validDate == "Yes") {
if (max(nchar(feature_vector)) == 10) {
if (sum(grepl(date_pattern, feature_vector)) > 0) {
#print(paste('Casting dates:',feature_name))
if (type == "m_dd_YYYY1") {
data_set[, feature_name] <-
as.Date(feature_vector, format = "%m/%d/%Y")
} else if (type == "YYYY_mm_dd1") {
data_set[, feature_name] <-
as.Date(feature_vector, format = "%Y/%m/%d")
} else if (type == "YYYY_mm_dd2") {
data_set[, feature_name] <-
as.Date(feature_vector, format = "%Y-%m-%d")
} else if (type == "dd_mm_YYYY1") {
data_set[, feature_name] <-
as.Date(feature_vector, format = "%d/%m/%Y")
} else if (type == "dd_mm_YYYY2") {
data_set[, feature_name] <-
as.Date(feature_vector, format = "%d-%m-%Y")
}
}
}
}
}
return (data_set)
}
forecast_mnl <-
function(transData,
histData,
predData,
startDate,
endDate,
ref,
depVar,
indVars,
ratingCat,
wgt) {
if (is.data.frame(transData) &&
nrow(transData) == 0) {
stop("Error: this function requires historical loan tra")
}
if ((is.null(startDate))) {
stop("Error: 'startDate' is missing")
}
if ((is.null(endDate))) {
stop("Error: 'endDate' is missing")
}
if ((is.null(ref))) {
stop("Error: 'ref' is missing. A reference category must be specified")
}
if (length(depVar) != 1) {
stop("Error: A dependent variable is required")
}
#if (length(indVars)<=1){ stop("Error: A list of independent variables is required")}
if (length(ratingCat) <= 1) {
stop("Error: A list of rating categories is required")
}
#format specific data in histData and transData
transData$endDate <- as.character(transData$endDate)
transData$endDate <- as.Date(transData$endDate, format = "%Y-%m-%d")
#merge histData and transData
histData$Date <-
as.Date(histData$Date, format = "%Y-%m-%d") #This ensures the 'Date' column is in Date Format
trainData <-
merge(
x = transData,
y = histData,
by.x = c("endDate"),
by.y = c("Date")
)
trainData <-
subset(trainData,
trainData$endDate >= startDate & trainData$endDate <= endDate)
trainData <- subset(trainData, trainData[[wgt]] > 0)
#assign rownames to the prediction data
trainData$start_Rating <-
as.factor(trainData$start_Rating) #make sure ratings info is a factor
trainData$end_rating <-
as.factor(trainData$end_rating) #make sure ratings info is a factor
rn <-
ratingCat #ratingCat[-(length(ratingCat))]
rnr <-
rn[rn != ref] #remove the ref category from the list of levels
rn <-
c(rnr, ref) #put the ref category back in the last position of the list
row.names(predData) <-
rn #assign rownames to the prediction data
dummyList <- c()
#create dummy variables for the off-reference categories
for (t in unique(rnr)) {
trainData[paste("D", t, sep = "_")] <-
ifelse(trainData$start_Rating == t, 1, 0)
dummyList <- c(dummyList, paste("D", t, sep = "_"))
}
#set reference category
trainData$end_rating <- as.factor(trainData$end_rating)
trainData$end_rating <-
stats::relevel(trainData$end_rating, ref = ref)
#create regression formula
count <- 1
formula.init <- paste(depVar, " ~ ", sep =)
for (v in unique(indVars)) {
if (count == 1) {
formula.init <- paste(formula.init, v, sep = "")
} else {
formula.init <- paste(formula.init, " + ", v, sep = "")
}
count <- count + 1
}
for (d in unique(dummyList)) {
formula.init <- paste(formula.init, " + ", d, sep = "")
}
formula.init <- stats::as.formula(formula.init)
#running MNL regression
mnlReg <-
nnet::multinom(formula.init,
data = trainData,
weights = trainData[[wgt]],
maxit = 1000000)
mnlSummary <- summary(mnlReg)
mnlZ <-
summary(mnlReg)$coefficients / summary(mnlReg)$standard.errors
mnlP <- (1 - stats::pnorm(abs(mnlZ), 0, 1)) * 2
mnlExp <-
exp(cbind(stats::coef(mnlReg), stats::confint(mnlReg))) #extract the coefficients from the model and exponentiate
mnlFitted <- stats::fitted(mnlReg)
mnlPredict <- stats::predict(mnlReg, newdata = predData, "probs")
#Note: in the output of 'predict' the A to A reference is the last row because it goes through all of the explicit transitions then produces the one for
# A to A. Also, there is not row signifying the transition 'from' G because G is an absorbing state and the data does not have any thing that has
# G as a start state.
#sort the predicted results by row then by column
mnlPredict <-
mnlPredict[match(ratingCat, row.names(mnlPredict)), ] #by row
mnlPredict <-
subset(mnlPredict, select = ratingCat) #by column
mnlPredict <- as.data.frame(mnlPredict)
#add Default column
mnlPredict['Def'] <- mnlPredict[[ratingCat[length(ratingCat)]]]
mnlPredict['Def'][mnlPredict['Def'] > 0] <- 0
#add Default row
temprow <-
matrix(c(rep.int(NA, length(mnlPredict))), nrow = 1, ncol = length(mnlPredict))
temprow <- as.data.frame(temprow)
names(temprow) <- names(mnlPredict)
row.names(temprow) <- "Def"
temprow[is.na(temprow)] <- 0
mnlPredict <- rbind(mnlPredict, temprow)
#fill in default values
mnlPredict$Def <- 1 - rowSums(mnlPredict)
mnlOutput <- list(
mnl_Reg = mnlReg,
mnl_Summary = mnlSummary,
mnl_Z = mnlZ,
mnl_P = mnlP,
mnl_Exp = mnlExp,
mnl_Fitted = mnlFitted,
mnl_Predict = mnlPredict
)
return(mnlOutput)
}
forecast_svm <-
function(transData,
histData,
predData_svm,
startDate,
endDate,
depVar,
indVars,
ratingCat,
pct,
tuning,
kernelType,
cost,
cost.weights,
gamma,
gamma.weights) {
## data type transformations - factoring
to.factors <- function(df, variables) {
for (variable in variables) {
df[[variable]] <- as.factor(df[[variable]])
}
return(df)
}
## normalizing - scaling
scale.features <- function(df, variables) {
for (variable in variables) {
df[[variable]] <- scale(df[[variable]], center = T, scale = T)
}
return(df)
}
#format specific data in histData and transData
transData$endDate <- as.character(transData$endDate)
transData$endDate <- as.Date(transData$endDate, format = "%Y-%m-%d")
histData$Date <- as.Date(histData$Date, format = "%Y-%m-%d")
#merge histData and transData
trainData <-
trainData <-
merge(
x = transData,
y = histData,
by.x = "endDate",
by.y = "Date"
)
trainData_1 <-
subset(trainData,
trainData$endDate >= startDate & trainData$endDate <= endDate)
LvL <- ratingCat
#create a matrix to hold the final transition matrix
TransitionMatrix_SVM <-
matrix(
numeric(0),
length(LvL),
length(LvL),
dimnames = list(LvL, LvL),
byrow = TRUE
)
if (tuning == "TRUE") {
TransitionMatrix_SVM.Tuned <-
matrix(
numeric(0),
length(LvL),
length(LvL),
dimnames = list(LvL, LvL),
byrow = TRUE
)
}
for (k in 1:(length(LvL) - 1)) {
trainData <-
subset(trainData_1, trainData_1$start_Rating %in% c(LvL[k]))
trainData$start_Rating <-
as.factor(trainData$start_Rating) #make sure ratings info is a factor
trainData$end_rating <-
as.factor(trainData$end_rating) #make sure ratings info is a factor
trainData.df <- trainData
trainData.df <- trainData[c(depVar, indVars)]
# split data into training and test datasets. Extract the forecast data from the training data this way the forecast dataset
# will have the same number as factors are the training dataset....
# 2) setup training and testing data
trainData.df <-
utils::head(trainData.df, -1) #return all rows except the last row (which contains the forecast data)
set.seed(1000)
indexes <-
sample(1:nrow(trainData.df),
size = pct * nrow(trainData.df),
replace = FALSE)
train.data <- trainData.df[sort(indexes), ]
test.data <- trainData.df[-indexes, ]
test.feature.vars <-
test.data[, -1] #all other variables (independent vars)
test.class.var <-
test.data[, 1] #credit rating (dependent var)
#import the forecast data and append it to the training dataset
forecast.data <- predData_svm
test.feature.vars <-
rbind(test.feature.vars, forecast.data) #append the forecast data to the last row of the larger dataset
forc.feature.vars <- utils::tail(test.feature.vars, 1)
test.feature.vars <- utils::head(test.feature.vars, -1)
#-------------------------------SVM (before Tuning) using selected features ---------------------------------
## build initial model with training data
#create regression formula
count <- 1
formula.init <- paste(depVar, " ~ ", sep =)
for (v in unique(indVars)) {
if (count == 1) {
formula.init <- paste(formula.init, v, sep = "")
} else {
formula.init <- paste(formula.init, " + ", v, sep = "")
}
count <- count + 1
}
if (tuning == 'FALSE') {
#if the dependent variable is of one type skip running svm function so it will not 'trip-up'
if (length(unique(train.data[, 1])) > 1) {
formula.init <- stats::as.formula(formula.init)
svm.model <- e1071::svm(
formula = formula.init,
data = train.data,
kernel = kernelType,
cost = cost,
gamma = gamma,
probability = TRUE
)
## view inital model details
summary(svm.model)
## predict and evaluate results (using real forecasting data)
svm.predictions <-
stats::predict(svm.model, forc.feature.vars, probability = TRUE)
svm.predictions <- attr(svm.predictions, "probabilities")
## predict and evaluate results (using test data)
svm.predictions.test <-
stats::predict(svm.model, test.feature.vars)
#get names from the svm.predictions
pred.LvL <- dimnames(svm.predictions)[[2]]
}
#if the dependent variable is of one type skip running svm function so it will not 'trip-up'
if (length(unique(train.data[, 1])) > 1) {
for (trans in 1:length(LvL)) {
try(TransitionMatrix_SVM[k, trans] <-
svm.predictions[, LvL[trans]], TRUE)
}
} else {
for (trans in 1:length(ratingCat)) {
if (k == trans) {
try(TransitionMatrix_SVM[k, trans] <- 1)
} else {
try(TransitionMatrix_SVM[k, trans] <- 0)
}
}
}
#---------------------------------------------------------------------------------------------
} else {
#-------------------------SVM USing Tuning----------------------------------------------------
#if (tuning == 'TRUE') {
## hyperparameter optimizations
# run grid search
formula.init <- stats::as.formula(formula.init)
tuning.results <- e1071::tune(
e1071::svm,
formula.init,
data = train.data,
kernel = kernelType,
ranges = list(
cost = cost.weights,
gamma = gamma.weights,
probability = TRUE
)
)
# get best model and evaluate predictions
svm.model.best = tuning.results$best.model
#print("Best SVM model:")
#print(svm.model.best)
## predict and evaluate results (using real forecasting data)
svm.predictions.best <-
stats::predict(svm.model.best, forc.feature.vars, probability = TRUE)
svm.predictions.best <-
attr(svm.predictions.best, "probabilities")
## predict and evaluate results (using test data)
svm.predictions.best.test <-
stats::predict(svm.model.best, test.feature.vars)
#get names from the svm.predictions
pred.LvL <- dimnames(svm.predictions.best)[[2]]
for (trans in 1:length(LvL)) {
try(TransitionMatrix_SVM.Tuned[k, trans] <-
svm.predictions.best[, LvL[trans]],
TRUE)
}
# plot best model evaluation metric curves
svm.predictions.best <-
stats::predict(svm.model.best, test.feature.vars, decision.values = T)
svm.prediction.values <-
attributes(svm.predictions.best)$decision.values
}
rm(trainData)
rm(trainData.df)
rm(forecast.data)
}
#sort the predicted results by row then by column
TransitionMatrix_SVM[is.na(TransitionMatrix_SVM)] <- 0
TransitionMatrix_SVM[length(TransitionMatrix_SVM)] <- 1
svmOutput <- list(TransitionMatrix_SVM = TransitionMatrix_SVM)
if (tuning == "TRUE") {
TransitionMatrix_SVM.Tuned[is.na(TransitionMatrix_SVM.Tuned)] <- 0
TransitionMatrix_SVM.Tuned[length(TransitionMatrix_SVM.Tuned)] <-
1
svmOutput <- list(TransitionMatrix_SVM = TransitionMatrix_SVM,
TransitionMatrix_SVM.Tuned = TransitionMatrix_SVM.Tuned)
}
return(svmOutput)
}
forecast_lda <-
function(transData,
histData,
predData_lda,
startDate,
endDate,
depVar,
indVars,
pct,
ratingCat) {
## data type transformations - factoring
to.factors <- function(df, variables) {
for (variable in variables) {
df[[variable]] <- as.factor(df[[variable]])
}
return(df)
}
## normalizing - scaling
scale.features <- function(df, variables) {
for (variable in variables) {
df[[variable]] <- scale(df[[variable]], center = T, scale = T)
}
return(df)
}
#format specific data in histData and transData
transData$endDate <- as.character(transData$endDate)
transData$endDate <- as.Date(transData$endDate, format = "%Y-%m-%d")
histData$Date <- as.Date(histData$Date, format = "%Y-%m-%d")
#merge histData and transData
trainData <-
trainData <-
merge(
x = transData,
y = histData,
by.x = "endDate",
by.y = "Date"
) #trainData <- trainData<-merge(x=transData,y=histData, by.x="Qtr_Year", by.y = "Date")
trainData_1 <-
subset(trainData,
trainData$endDate >= startDate & trainData$endDate <= endDate)
LvL <- ratingCat
#create a matrix to hold the final transition matrix
TransitionMatrix_lda <-
matrix(
numeric(0),
length(LvL),
length(LvL),
dimnames = list(LvL, LvL),
byrow = TRUE
)
for (k in 1:(length(LvL) - 1)) {
trainData <-
subset(trainData_1, trainData_1$start_Rating %in% c(LvL[k]))
trainData$start_Rating <-
as.factor(trainData$start_Rating) #make sure ratings info is a factor
trainData$end_rating <-
as.factor(trainData$end_rating) #make sure ratings info is a factor
trainData.df <- trainData
trainData.df <- trainData[c(depVar, indVars)]
# split data into training and test datasets. Extract the forecast data from the training data this way the forecast dataset
# will have the same number as factors are the training dataset....
# 2) setup training and testing data
trainData.df <-
utils::head(trainData.df, -1) #return all rows except the last row (which contains the forecast data)
indexes <-
sample(1:nrow(trainData.df), size = pct * nrow(trainData.df))
train.data <- trainData.df[sort(indexes), ]
test.data <- trainData.df[-indexes, ]
test.feature.vars <-
test.data[, -1] #all other variables (independent vars)
test.class.var <-
test.data[, 1] #credit rating (dependent var)
#import the forecast data and append it to the training dataset
forecast.data <- predData_lda #predData_lda[,-1]
test.feature.vars <-
rbind(test.feature.vars, forecast.data) #append the forecast data to the last row of the larger dataset
forc.feature.vars <- utils::tail(test.feature.vars, 1)
test.feature.vars <- utils::head(test.feature.vars, -1)
if (length(unique(train.data[[1]])) > 1) {
#create regression formula
count <- 1
formula.init <- paste(depVar, " ~ ", sep =)
for (v in unique(indVars)) {
if (count == 1) {
formula.init <- paste(formula.init, v, sep = "")
} else {
formula.init <- paste(formula.init, " + ", v, sep = "")
}
count <- count + 1
}
formula.init <- stats::as.formula(formula.init)
#running lda regression
lda.model <- MASS::lda(formula.init, data = train.data)
## view inital model details
summary(lda.model)
## predict and evaluate results (using real forecasting data)
lda.predictions <-
stats::predict(lda.model, forc.feature.vars, probability = TRUE)
#print(lda.predictions)
## predict and evaluate results (using test data)
lda.predictions.test <-
stats::predict(lda.model, test.feature.vars)
#get names from the lda.predictions
pred.LvL <- dimnames(lda.predictions)[[2]]
for (trans in 1:length(LvL)) {
try(TransitionMatrix_lda[k, trans] <-
lda.predictions$posterior[, LvL[trans]],
TRUE)
}
} else {
for (trans in 1:length(LvL)) {
if (trans < length(LvL)) {
try(TransitionMatrix_lda[k, trans] <- 0, TRUE)
} else {
try(TransitionMatrix_lda[k, trans] <- 1, TRUE)
}
}
}
#---------------------------------------------------------------------------------------------
rm(trainData)
rm(trainData.df)
rm(forecast.data)
}
#sort the predicted results by row then by column
TransitionMatrix_lda[is.na(TransitionMatrix_lda)] <- 0
TransitionMatrix_lda[length(TransitionMatrix_lda)] <- 1
ldaOutput <- list(TransitionMatrix_lda = TransitionMatrix_lda)
return(ldaOutput)
}
forecast_ann <-
function(transData,
histData,
predData_ann,
startDate,
endDate,
depVar,
indVars,
ratingCat,
pct,
hiddenlayers,
activation,
stepMax,
rept,
calibration) {
## data type transformations - factoring
to.factors <- function(df, variables) {
for (variable in variables) {
df[[variable]] <- as.factor(df[[variable]])
}
return(df)
}
## normalizing - scaling
scale.features <- function(df, variables) {
for (variable in variables) {
df[[variable]] <- scale(df[[variable]], center = T, scale = T)
}
return(df)
}
#format specific data in histData and transData
transData$endDate <- as.character(transData$endDate)
transData$endDate <- as.Date(transData$endDate, format = "%Y-%m-%d")
histData$Date <- as.Date(histData$Date, format = "%Y-%m-%d")
#merge histData and transData
trainData <-
trainData <-
merge(
x = transData,
y = histData,
by.x = "endDate",
by.y = "Date"
)
trainData_1 <-
subset(trainData,
trainData$endDate >= startDate & trainData$endDate <= endDate)
LvL <- ratingCat
#create a matrix to hold the final transition matrix
TransitionMatrix_ann <-
matrix(
numeric(0),
length(LvL),
length(LvL),
dimnames = list(LvL, LvL),
byrow = TRUE
)
for (k in 1:(length(LvL) - 1)) {
trainData <-
subset(trainData_1, trainData_1$start_Rating %in% c(LvL[k]))
#-------New---------------------------------------------------------
trainData$start_Rating <-
trainData$start_Rating #make sure ratings info is a factor
trainData$end_rating <-
trainData$end_rating #make sure ratings info is a factor
trainData.df <- trainData
trainData.df <- trainData[c(depVar, indVars)]
# split data into training and test datasets. Extract the forecast data from the training data this way the forecast dataset
# will have the same number as factors are the training dataset....
# 2) setup training and testing data
trainData.df <-
utils::head(trainData.df, -1) #return all rows except the last row (which contains the forecast data)
indexes <-
sample(1:nrow(trainData.df), size = pct * nrow(trainData.df))
train.data <- as.data.frame(trainData.df[sort(indexes), ])
test.data <- trainData.df[-indexes, ]
test.feature.vars <-
test.data[, -1] #all other variables (independent vars)
test.class.var <-
test.data[, 1] #credit rating (dependent var)
#import the forecast data and append it to the training dataset
forecast.data <- predData_ann #predData_ann[,-1]
test.feature.vars <-
rbind(test.feature.vars, forecast.data) #append the forecast data to the last row of the larger dataset
forc.feature.vars <- utils::tail(test.feature.vars, 1)
test.feature.vars <- utils::head(test.feature.vars, -1)
train.data.keep <- train.data
## Create "one hot" vector
hotVec <- nnet::class.ind(as.factor(train.data[, 1]))
hotVec <- data.frame(hotVec)
## Get original names (exclude dependent Variable name)
origNames <- colnames(train.data)[-1]
# Create complete table with "one hot" vector and independent variables
train.data <- cbind(hotVec, train.data[origNames])
#
# names(train.data) <- origNames
formula.init.dep <- ""
count <- 1
for (v in names(train.data)) {
if (substr(v, 1, 1) == "X") {
if (count == 1) {
formula.init.dep <- paste(names(train.data)[count], sep = "")
} else {
formula.init.dep <-
paste(formula.init.dep, " + ", names(train.data)[count], sep = "")
}
count <- count + 1
}
}
formula.init.indep <- ""
count2 <- 1
for (v in names(train.data)) {
if (substr(v, 1, 1) != "X") {
if (count2 == (count)) {
formula.init.indep <- paste(names(train.data)[count2], sep = "")
} else {
formula.init.indep <-
paste(formula.init.indep, " + ", names(train.data)[count2], sep = "")
}
}
count2 <- count2 + 1
}
formula.init <-
paste(formula.init.dep, " ~ ", formula.init.indep, sep = "")
formula.init <- stats::as.formula(formula.init)
if (calibration == "TRUE") {
## --------- Find the best tuning parameters -------------------------
dnnta <-
caret::train(formula.init, data = train.data, method = "neuralnet")
# DNN Regression optimal training parameters
dnnta$bestTune
plot(dnnta)
print(dnnta$bestTune)
## ---------------------------------------------------------------------
} else {
# running ann regression
ann.model <- neuralnet::neuralnet(
formula.init,
data = train.data,
act.fct = activation,
hidden = hiddenlayers,
linear.output = TRUE,
rep=rept,
stepmax = stepMax
)
## plot neuralnet
## plot(ann.model)
## view inital model details
print(summary(ann.model))
## predict and evaluate results (using real forecasting data)
ann.predictions <-
neuralnet::compute(ann.model, forc.feature.vars)$net.result
#get names from the ann.predictions
pred.LvL <- dimnames(ann.predictions)[[2]]
# get the unique categories
LvLcategories <- unique(train.data.keep$end_rating)
LvLcategories <- sort(LvLcategories)
for (trans in 1:length(LvLcategories)) {
try(TransitionMatrix_ann[k, LvLcategories[trans]] <-
ann.predictions[, trans], TRUE)
}
}
rm(trainData)
rm(trainData.df)
rm(forecast.data)
}
#sort the predicted results by row then by column
TransitionMatrix_ann[is.na(TransitionMatrix_ann)] <- 0
TransitionMatrix_ann[length(TransitionMatrix_ann)] <- 1
annOutput <- list(TransitionMatrix_ann = TransitionMatrix_ann)
return(annOutput)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.