Nothing
#' Fuzzy Gaussian Naive Bayes Classifier with Fuzzy parameters
#'
#' \code{GauNBFuzzyParam} Fuzzy Gaussian Naive Bayes Classifier with Fuzzy parameters
#'
#'
#' @param train matrix or data frame of training set cases.
#' @param cl factor of true classifications of training set
#' @param alphacut value of the alpha-cut parameter, this value is between 0 and 1.
#' @param metd Method of transforming the triangle into scalar, It is the type of data entry for the test sample, use metd 1 if you want to use the Yager technique, metd 2 if you want to use the Q technique of the uniformity test (article: Directional Statistics and Shape analysis), and metd 3 if you want to use the Thorani technique
#' @param cores how many cores of the computer do you want to use to use for prediction (default = 2)
#'
#' @return A vector of classifications
#'
#' @references
#' \insertRef{moraes2021new}{FuzzyClass}
#'
#' @examples
#'
#' set.seed(1) # determining a seed
#' data(iris)
#'
#' # Splitting into Training and Testing
#' split <- caTools::sample.split(t(iris[, 1]), SplitRatio = 0.7)
#' Train <- subset(iris, split == "TRUE")
#' Test <- subset(iris, split == "FALSE")
#' # ----------------
#' # matrix or data frame of test set cases.
#' # A vector will be interpreted as a row vector for a single case.
#' test <- Test[, -5]
#' fit_FGNB <- GauNBFuzzyParam(
#' train = Train[, -5],
#' cl = Train[, 5], metd = 1, cores = 2
#' )
#'
#' pred_FGNB <- predict(fit_FGNB, test)
#'
#' head(pred_FGNB)
#' head(Test[, 5])
#' @importFrom stats cov dnorm qchisq qnorm
#' @importFrom foreach '%dopar%'
#' @importFrom Rdpack reprompt
#'
#' @export
GauNBFuzzyParam <- function(train, cl, alphacut = 0.0001, metd = 2, cores = 2) {
UseMethod("GauNBFuzzyParam")
}
#' @export
GauNBFuzzyParam.default <- function(train, cl, alphacut = 0.0001, metd = 2, cores = 2) {
# --------------------------------------------------------
# Estimating class parameters
p_data <- predata(train,cl)
# --
train <- p_data$train
cols <- p_data$cols
dados <- p_data$dados
M <- p_data$M
intervalos <- p_data$intervalos
#--------------------------------------------------------
# --------------------------------------------------------
# Finding Mu and Sigma for each class
medias <- lapply(1:length(unique(M)), function(i) colMeans(subset(dados, M == unique(M)[i])))
varian <- lapply(1:length(unique(M)), function(i) diag(diag(cov(subset(dados, M == unique(M)[i]))), (cols), (cols)))
# --------------------------------------------------------
# --------------------------------------------------------
# Estimating Triangular Parameters
# --------------------------------------------------------
if(metd > 3){ stop("metd argument must be <= 3.") }
# --------------------------------------------------------
alp=1
w=1
# --------------------------------------------------------
alpha = alp
if (metd != 4) {
alpha <- seq(alphacut, 1.1, 0.1)
alpha <- ifelse(alpha > 1, 1, alpha)
}
# -------------------------------
N <- nrow(dados) # Number of observations
# -------------------------------
# Average Parameters
# ------------------
Parameters_media <- lapply(1:length(medias), function(i) { # loop to groups
lapply(1:length(medias[[1]]), function(k) { # loop to dimensions
round(
t(sapply(1:length(alpha), function(j) {
c(
medias[[i]][k] - (qnorm(1 - alpha[j] / 2) * (sqrt(varian[[i]][k, k] / N))),
medias[[i]][k] + (qnorm(1 - alpha[j] / 2) * (sqrt(varian[[i]][k, k] / N)))
)
})),
5
)
})
})
# -------------------------------
# Variance Parameters
# ------------------
Parameters_varian <- lapply(1:length(medias), function(i) { # loop to groups
lapply(1:length(medias[[1]]), function(k) { # loop to dimensions
round(
t(sapply(1:length(alpha), function(j) {
beta <- 0.05 # previously fixed
lambda <- alpha[j]
# ------
L <- (1 - lambda) * qchisq(p = 1 - (beta / 2), N - 1) + (lambda * (N - 1))
R <- (1 - lambda) * qchisq(p = beta / 2, N - 1) + (lambda * (N - 1))
# ------
c(
((N - 1) * varian[[i]][k, k]) / L,
((N - 1) * varian[[i]][k, k]) / R
)
# ------
})),
5
)
})
})
# -------------------------------------------------------
structure(list(
Parameters_varian = Parameters_varian,
Parameters_media = Parameters_media,
medias = medias,
varian = varian,
cols = cols,
M = M,
alpha = alpha,
metd = metd,
cores = cores,
w = w
),
class = "GauNBFuzzyParam"
)
}
# -------------------------
#' @export
print.GauNBFuzzyParam <- function(x, ...) {
# -----------------
cat("\nFuzzy Gaussian Naive Bayes Classifier for Discrete Predictors\n\n")
# -----------------
cat("Variables:\n")
print(names(x$medias[[1]]))
cat("Class:\n")
print(levels(x$M))
# -----------------
}
#' @export
predict.GauNBFuzzyParam <- function(object,
newdata,
type = "class",
...) {
# --------------------------------------------------------
test <- as.data.frame(newdata)
# --------------------------------------------------------
Parameters_varian <- object$Parameters_varian
Parameters_media <- object$Parameters_media
medias <- object$medias
varian <- object$varian
cols <- object$cols
M <- object$M
alpha <- object$alpha
metd <- object$metd
cores <- object$cores
w <- object$w
# --------------------------------------------------------
# --------------------------------------------------------
# Calculation of triangles for each test observation
# sum of Logs and calculation of Barycenter
# --------------
N_test <- nrow(test)
# --------------
# Defining how many CPU cores to use
core <- parallel::makePSOCKcluster(cores)
doParallel::registerDoParallel(core)
# --------------
# loop start
R_M_obs <- foreach::foreach(h = 1:N_test, .combine = rbind) %dopar% {
# ------------
x <- test[h, ]
# ------------
triangulos_obs <-
lapply(1:length(medias), function(i) { # loop to groups
trian <- lapply(1:length(medias[[1]]), function(k) { # loop to dimensions
nn <- length(alpha)
if(metd != 4){ nn <- 2 }
t(sapply(1:nn, function(j) {
if((j == 2) && (metd != 4) ){ j = length(alpha) }
# ------------
a <- dnorm(x = as.numeric(x[k]), mean = as.numeric(Parameters_media[[i]][[k]][j, 1]), sd = sqrt(as.numeric(Parameters_varian[[i]][[k]][j, 1])))
b <- dnorm(x = as.numeric(x[k]), mean = as.numeric(Parameters_media[[i]][[k]][j, 2]), sd = sqrt(as.numeric(Parameters_varian[[i]][[k]][j, 2])))
# ------------
c(min(a, b), max(a, b))
# ------------
}))
})
if (length(trian) > 1) {
return(Reduce("+", trian))
} else {
return(Reduce("+", trian))
}
})
# ------------
# Center of Mass Calculation
vec_trian <- triangulos_obs
if(metd != 4) vec_trian <- lapply(1:length(unique(M)), function(i) c(triangulos_obs[[i]][1, 1], triangulos_obs[[i]][2, 1], triangulos_obs[[i]][1, 2]))
# --------------------------------------------------------
# Transforming Vector to Scalar
# ------------
R_M <- switch(metd,
# ------------
# Barycenter
# yager Distance
"1" = {
# ------------
Yagerdistance(vec_trian, M)
# ------------
},
"2" = {
# ------------
# Using distance Q
Qdistance(vec_trian, M)
# ------------
},
"3" = {
# ------------
# Thorani Distance
Thoranidistance(vec_trian, M)
# ------------
}
)
# --------------------------------------------------------
R_M_class <- R_M
# --------------------------------------------------------
return(R_M_class)
}
# ------------
# -------------------------
parallel::stopCluster(core)
# ---------
if (type == "class") {
# -------------------------
R_M_obs <- matrix(R_M_obs,nrow = N_test)
R_M_obs <- sapply(1:nrow(R_M_obs), function(i) which.max(R_M_obs[i, ]))
resultado <- unique(M)[R_M_obs]
return(as.factor(c(resultado)))
# -------------------------
} else {
# -------------------------
Infpos <- which(R_M_obs==Inf)
R_M_obs[Infpos] <- .Machine$integer.max;
R_M_obs <- matrix(unlist(R_M_obs),ncol = length(unique(M)), nrow = N_test)
R_M_obs <- R_M_obs/rowSums(R_M_obs,na.rm = T)
# ----------
colnames(R_M_obs) <- unique(M)
return(R_M_obs)
# -------------------------
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.