Nothing
#' RF model in inner CV loop
#' @param xTrain Training data (samples as rows; variables as columns)
#' @param yTrain Training response
#' @param xVal Validation data
#' @param yVal Validation response (for tuning)
#' @param DA Logical for discriminant analysis (classification)
#' @param fitness Fitness function ('MISS', 'AUROC' or 'RMSEP')
#' @param ntree See original function (`randomForest`). Passed from wrapper.
#' @param method Choice of Random Forest implementation (randomForest, ranger or Rborist). Passed from wrapper.
#' @param mtry See original function (`randomForest`). Passed from wrapper.
#' @param weigh_added To add a weighing matrix when it is classfication
#' @param weighing_matrix The matrix used for get a miss classfication score
#' @return An object containing:
#' @return (`miss`, `auc` or `rmsep`) A fitness metric
#' @return `virank` variable importance rankings
#' @examples
#' data("freelive2")
#' xTrain<-XRVIP2[1:40,]
#' yTrain<-YR2[1:40]
#' xVal<-XRVIP2[41:nrow(XRVIP2),]
#' yVal<-YR2[41:length(YR2)]
#' rfInner_object<-
#' MUVR2:::rfInner(xTrain=xTrain,
#' yTrain=yTrain,
#' xVal=xVal,
#' yVal=yVal,
#' mtry=150,
#' ntree=150,
#' fitness="RMSEP")
#' @noRd
rfInner <- function(xTrain,
yTrain,
xVal,
yVal,
DA,
fitness,
ntree,
mtry,
method,
weigh_added,
weighing_matrix) {
# Allocate return object
returnIn <- list()
#library(randomForest)
if (missing(method)) {
method <- "randomForest"
}
# Different functions for different RF implementations
if (method == 'randomForest') {
# safeguard measure in case some class is not appear after separating segment
if (is.factor(yTrain)) {
if (length(levels(yTrain)) != length(levels(droplevels(yTrain)))) {
#xTrain<-Xotu[1:15,]
#yTrain<-Yotu[1:15]
#xVal<-Xotu[16:29,]
#yVal<-Yotu[16:29]
#original_levels_yTrain<-levels(yTrain)
#original_levels_yVal<-levels(yVal)
#real_levels_yTrain<-levels(droplevels(yTrain))
#real_levels_yVal<-levels(droplevels(yVal))
yTrain <- droplevels(yTrain)
xVal <-
xVal[!is.na(factor(yVal, levels = levels(yTrain))), ]
yVal <-
factor(yVal[!is.na(factor(yVal, levels = levels(yTrain)))], levels = levels(yTrain))
}
}
rfModIn <- randomForest(xTrain,
yTrain,
xVal,
yVal,
ntree = ntree,
mtry = mtry)
#ntree: Number of trees to grow. This should not be set to too small a number,
#to ensure that every input row gets predicted at least a few times.
#mtry: Number of variables randomly sampled as candidates at each split. Note that the default values
#are different for classification (sqrt(p) where p is number of variables in x) and regression (p/3)
# Extract predictions
yValInner <- rfModIn$test$predicted
# Variable importance rank
returnIn$virank <- rank(-rfModIn$importance)
names(returnIn$virank) <- rownames(rfModIn$importance)
#Importance:a matrix with nclass + 2 (for classification) or two (for regression) columns.
#For classification, the first nclass columns are the class-specific measures computed as mean descrease in accuracy.
#The nclass + 1st column is the mean descrease in accuracy over all classes.
#The last column is the mean decrease in Gini index.
#For Regression, the first column is the mean decrease in accuracy and the second the mean decrease in MSE.
#If importance=FALSE, the last measure is still returned as a vector
} else if (method == 'ranger') {
rfModIn <- ranger(
x = xTrain,
y = yTrain,
num.trees = ntree,
importance = 'impurity',
# respect.unordered.factors = 'order', # Sort this out as "ifany"
#The 'impurity' measure is the Gini index for classification, the variance of the
#responses for regression and the sum of test statistics (see splitrule) for survival.
mtry = mtry
)
# Extract predictions
yValInner <- predict(rfModIn, data = xVal)$predictions
# Variable importance rank
returnIn$virank <- rank(-rfModIn$variable.importance)
} else {
stop('other RF methods not yet implemented')
}
#####################################################################################################
#In the main MUVR model, for the relationship with Y data type and DA value:
#When DA=TRUE, y is factor, run
#When DA=TRUE, y is numeric, y is transformed to factor. Is there a step in the main MUVR that check for the maximum number of classes?
#When Y is factor, DA must be TRUE
#When Y is numeric, DA=FALSE, run
#No combination of DA=FALSE and Y is factor
#
#Here I have some questions
# From what I understand, MISS, BER, AUROC are for classifications, RSMEP is for regression
#Problem1: When fitness is MISS,
# yValInner could be more than 2 category and the value could vary, Why set 0 as the cut off for predicted values?
#
#Problem 2: When fitness is BER, what if Y is numeric (continuous data)?
#Problem 3: When fitness is AUROC,what if Y is numeric
#Problem 4: When fitness is RMSEP, what is Y is factor?
#
#Problem 5: when method=randomForest, yValInner <- rfModIn$test$predicted, what about $voted for classification
#Problem 6:What is the Difference between rfInner and rfPredict? For me the only differences seems to be that rfInner calculated the fitness
#######################################################################################################################################
if (fitness == 'MISS' | fitness == 'wMISS') {
# cat(' miss',count)
if (DA == TRUE) {
returnIn$miss <- sum(yValInner != yVal)
if (fitness == 'wMISS') {
returnIn$wmiss <- getMISS(
actual = yVal,
predicted = yValInner,
weigh_added = weigh_added,
weighing_matrix = weighing_matrix
)
}
}
else {
###ML
yClassInner <-
ifelse(yValInner > 0, 1, -1) #####classification binary??????Why?????
returnIn$miss <- sum(yClassInner != yVal)
if (fitness == 'wMISS') {
returnIn$wmiss <- getMISS(
actual = yVal,
predicted = yValInner,
weigh_added = weigh_added,
weighing_matrix = weighing_matrix
)
}
}
}
if (fitness == 'BER' | fitness == 'wBER') {
returnIn$ber <- getBER(actual = yVal,
predicted = yValInner) ###getBER from MUVR
if (weigh_added == FALSE) {
weighing_matrix <-
diag(1, length(levels(yVal)), length(levels(yVal)))
} else{
if (is.null(weighing_matrix)) {
# warning("Missing weighing_matrix,weighing_matrix will be diagnoal")
weighing_matrix <-
diag(1, length(levels(yVal)), length(levels(yVal)))
}
if (dim(weighing_matrix)[1] != length(levels(yVal))) {
stop("The dimension of weighing_matrix is not correct")
}
if (dim(weighing_matrix)[2] != length(levels(yVal))) {
stop("The dimension of weighing_matrix is not correct")
}
for (i in 1:nrow(weighing_matrix)) {
if (weighing_matrix[i, i] != 1) {
stop("diagonal values must be 1")
}
for (j in 1:ncol(weighing_matrix)) {
if (weighing_matrix[i, j] < 0 | weighing_matrix[i, j] > 1) {
stop("Values in the weighing matrix must between 0 and 1")
}
}
}
}
}
if (fitness == 'wBER') {
returnIn$wber <- getBER(
actual = yVal,
predicted = yValInner,
weigh_added = weigh_added,
weighing_matrix = weighing_matrix
) ###getBER from MUVR
}
##Balance error rate
if (fitness == 'AUROC') {
returnIn$auc <- roc(yVal,
rfModIn$test$votes[, 1])$auc ####what is votes
}
# if test set is given (through the xtest or additionally ytest arguments), this component is a list which contains the
# corresponding predicted, err.rate, confusion, votes (for classification) or predicted, mse and rsq (for regression) for
# the test set. If proximity=TRUE, there is also a component, proximity, which contains the proximity among the test set
# as well as proximity between test and training data.
if (fitness == 'RMSEP') {
returnIn$rmsep <-
sqrt(sum((yVal - yValInner) ^ 2, na.rm = TRUE) / (length(yVal) - sum(is.na(yValInner))))
}
##RMSEP root mean squared error of prediction
return(returnIn)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.