#' Stepwise discriminant analysis
#' @export
stepdisc.calc <- function(object, FToEnter = 0.15, FToStay = 0.15) {
# matica musi byt plna
if (any(is.na(object$data))) stop("NA values in 'object'.", call. = FALSE)
# find and report constant columns
constantColumns = colnames(object$data)[apply(object$data, 2, function(x) (abs(max(x)-min(x)))==0 )]
if (length(constantColumns)>0) {
stop(paste("Characters \"", paste(constantColumns, collapse = "\", \""), "\" are invariant.", sep = ""), call. = FALSE)
}
saturatedModel = FALSE
X = as.matrix(object$data) # data matrix
grouping = object$Taxon # vector of groupings
k = length(levels(grouping)) # number of existing groups/populations
n = nrow(X) # number of observations
Entered = Removed = character()
if(k < 2) stop("At least two levels required to do anything meaningful.", call. = FALSE)
if(n < 2) stop("n > 1 observations required to do anything meaningful.", call. = FALSE)
# finding the first variable to accept in the model:
p = ncol(X) # number of samples
enter.Fstat = enter.PrF = enter.R2 = numeric(p)
for(j in 1:p){
enterSum = summary(stats::aov(X[,j] ~ grouping))
enter.Fstat[j] = enterSum[[1]]$`F value`[1]
enter.PrF[j] = enterSum[[1]]$`Pr(>F)`[1]
enter.R2[j] = (enterSum[[1]]$`Sum Sq`[1]/sum(enterSum[[1]]$`Sum Sq`))
}
if(min(enter.PrF) < FToEnter) { # condition for stopping the forward-selection
a = which.min(enter.PrF)
Entered = c(Entered, colnames(X)[a])
Removed = c(Removed, "")
# pick variable which seperates the groups most, this is variable a, the one with the smallest p-value
auswahl = colnames(X)[a]
weiter = TRUE # parameter to report the necessity of another variable selection
X.mod = as.matrix(X[,auswahl]) # model of the registered variables
colnames(X.mod) = auswahl
X = X[, colnames(X) != auswahl] # data-matrix of the selection of variables
p = ncol(X)
Fstat = enter.Fstat[a] # approx. F-statistic of the Wilks lambda of the selected model / partial Wilks' lambda
pwert = enter.PrF[a] # appropriate p-value of Fstat and Fstat2
R2 = enter.R2[a]
} else {weiter = FALSE}
# ---REMOVE VARIABLE ? ee
# finding the next variable to accept in the model:
while(weiter && !is.null(p) && p>0 && ncol(X.mod) < (n-k))
{
auswahl = NULL # vector for the names of the significant variables of the selection
enter.Fstat = enter.PrF = enter.R2 = numeric(p)
# ---NAJDI NAJMENSIE F to ENTER----------------MK------- #
for(j in 1:p){
enterSum = summary(stats::aov(X[,j] ~ X.mod+grouping))
if ((!is.null(enterSum[[1]]$`F value`[2])) && (!is.na(enterSum[[1]]$`F value`[2]))) { # model is not saturated
enter.Fstat[j] = enterSum[[1]]$`F value`[2]
enter.PrF[j] = enterSum[[1]]$`Pr(>F)`[2]
enter.R2[j] = (enterSum[[1]]$`Sum Sq`[2]/ (sum(enterSum[[1]]$`Sum Sq`)-enterSum[[1]]$`Sum Sq`[1]))
} else saturatedModel = TRUE
}
if (saturatedModel) break
# ---ENTER VARIABLE----------------------------------- #
a = which.min(enter.PrF)[1] # MK # most significant variable a (with the smalles Pr(>F))
if(enter.PrF[a] < FToEnter)
{ # condition for stopping the forward-selection
Entered = c(Entered, colnames(X)[a])
Removed = c(Removed, "")
namen2 = colnames(X)
auswahl = namen2[a] # see a
namen = colnames(X.mod)
X.mod = as.matrix(cbind(X.mod, X[,auswahl]))
colnames(X.mod) = c(namen, auswahl)
X = as.matrix(X[, namen2 != auswahl])
p = ncol (X)
if(p == 1) colnames(X) = namen2[namen2 != auswahl]
if(p == 0) weiter = FALSE # the case of selecting all variables: non is left to be selected
Fstat = c(Fstat, enter.Fstat[a]) # F-value
pwert = c(pwert, enter.PrF[a]) # appropriate p-values of Fstat
R2 = c(R2, enter.R2[a])
} else {weiter = FALSE}
# ---REMOVE VARIABLE---------------------MK----------- #
while(TRUE){
# Statistics for Removal
rem.Fstat = rem.PrF = rem.R2 = numeric(ncol(X.mod))
for(j in 1:ncol(X.mod)){
remSum = summary(stats::aov(X.mod[,j] ~ X.mod[,-j]+grouping))
rem.Fstat[j] = remSum[[1]]$`F value`[2]
rem.PrF[j] = remSum[[1]]$`Pr(>F)`[2]
rem.R2[j] = (remSum[[1]]$`Sum Sq`[2]/ (sum(remSum[[1]]$`Sum Sq`)-remSum[[1]]$`Sum Sq`[1]))
}
# ---REMOVE VARIABLE----------------------------------- #
a = which.max(rem.PrF)[1]
if(rem.PrF[a] > FToStay){
Entered = c(Entered, "")
Removed = c(Removed, colnames(X.mod)[a])
# remove from X.mod = add back to X
xnames = colnames(X)
X = cbind(X, X.mod[,a])
colnames(X) = c(xnames, colnames(X.mod)[a])
p = ncol (X)
X.mod = X.mod[,-a]
Fstat = c(Fstat, rem.Fstat[a]) # F-value
pwert = c(pwert, rem.PrF[a]) # appropriate p-values of Fstat
R2 = c(R2, rem.R2[a])
} else break
}
}
if(!exists("X.mod")) stop("Unable to perform required calculations, perhaps not enough observations?", call. = FALSE)
resDat = data.frame(Entered, Removed, R2, Fstat, pwert)
colnames(resDat) = c("Entered", "Removed", "Partial R-square", "F-value", "Pr > F")
print(resDat)
cat("\nSelected characters:\n")
cat(paste(Entered[Entered != ""], collapse = ", "))
cat("\n")
#cat("\n")
#cat(paste("\"", paste(Entered[Entered != ""], collapse = "\", \""), "\"\n"), sep = "")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.