makeSimcaModel_inner <- function(dataset, groupBy, k=0, version, stnLoc) {
tol <- stnLoc$simca_tolerance
kmax <- stnLoc$simca_kMax
flatDf <- makeFlatDataFrame(dataset, groupBy)
X <- flatDf[,-1] # all the NIR data
grp <- flatDf[,1] # only the grouping
if (version == "classic") {
if (k == 0) {
simcaMod <- rrcovHD::CSimca(grouping ~ ., data=flatDf, kmax=kmax, tol=tol) ## k=0 does not work, kmax does not work
# simcaMod <- rrcovHD::CSimca(X, grp, kmax=kmax, tol=tol) ## k=0 does not work, kmax does not work
} else {
simcaMod <- rrcovHD::CSimca(grouping ~ ., data=flatDf, k=k, kmax=kmax, tol=tol)
# simcaMod <- rrcovHD::CSimca(X, grp, k=k, kmax=kmax, tol=tol)
}
} else {
if (version == "robust") {
if (k == 0) {
simcaMod <- rrcovHD::RSimca(grouping ~ ., data=flatDf, kmax=kmax, tol=tol) ## k=0 does not work, but still calculating k
# simcaMod <- rrcovHD::RSimca(X, grp, kmax=kmax, tol=tol) ## k=0 does not work, but still calculating k
} else {
simcaMod <- rrcovHD::RSimca(grouping ~ ., data=flatDf, k=k, kmax=kmax, tol=tol)
# simcaMod <- rrcovHD::RSimca(X, grp, k=k, kmax=kmax, tol=tol)
}
} else {
stop("Please check the settings for SIMCA. One of 'classic' or 'robust' has to be supplied.", call.=FALSE)
}
}
return(simcaMod)
} # EOF
makeSimcaModels <- function(dataset, groupingVector, k=0, version, inCV=FALSE) {
stn <- stnLoc <- getstn()
i <- NULL; modelList <- list()
leng <- length(groupingVector)
modMsg <- " mod"
if (inCV) {
modMsg <- " CV"
}
if (!stn$allSilent) {cat(paste(modMsg, "...", sep="")) }
#
for (i in 1: leng) {
# mod <- makeSimcaModel_inner(dataset, groupingVector[i], k, version, stnLoc)
a <- try(makeSimcaModel_inner(dataset, groupingVector[i], k, version, stnLoc), silent=TRUE)
if (is(a, "try-error")) {
assign(".gs", "\n ***error***:", pos=gl_ap2GD)
if (inCV) {aa <- "cv."} else {aa <- ""}
add <- paste(aa, groupingVector[i], sep="")
assign(".charCollect", paste(get(".charCollect", pos=gl_ap2GD), add, sep=", "), pos=gl_ap2GD)
modelList <- c(modelList, list(NULL))
} else {
modelList <- c(modelList, list(a))
}
# modelList <- c(modelList, list(mod))
} # end for i
#
return(modelList)
} # EOF
makeSimcaPrediction_inner <- function(SimcaModel, newFlatData, newCorrectGrouping) {
if (is.null(newFlatData)) {
pred <- rrcov::predict(SimcaModel, method=2)
} else {
pred <- rrcov::predict(SimcaModel, newFlatData, method=2)
# print(pred); print(str(pred)); print(length(pred)); wait()
# print(newCorrectGrouping); print(length(newCorrectGrouping)); wait()
predTable <- rrcov::mtxconfusion(newCorrectGrouping, pred@classification, prior = NULL, printit=FALSE)
pred@ct <- predTable
}
return(pred)
} # EOF
makeSimcaPredictions <- function(SimcaModelList, newFlatData=NULL, newCorrectGrouping=NULL, indNew=NULL, inCV=FALSE) {
stn <- getstn()
predictionsList <- list()
leng <- length(SimcaModelList)
msg <- "predictions"
dataset <- NULL
if (is(newFlatData, "aquap_data")) {
msg <- "CV-predictions"
dataset <- newFlatData
simcClasses <- newCorrectGrouping
} # end if
if (!stn$allSilent & !inCV) {cat(paste(" pred...", sep="") )}
for (i in 1: leng) {
if (is(dataset, "aquap_data")) {
a <- makeFlatDataFrame(dataset, simcClasses[i])
newCorrectGrouping <- a[indNew,1] # the grouping is in the first column # otherwise they both stay at "NULL"
newFlatData <- a[indNew,-1] # the NIR is in everything except the first column
} # end if
if (is.null(SimcaModelList[[i]])) {
predictionsList <- c(predictionsList, list(NULL))
} else {
pred <- makeSimcaPrediction_inner(SimcaModelList[[i]], newFlatData, newCorrectGrouping)
predictionsList <- c(predictionsList, list(pred))
}
} # end for i
return(predictionsList)
} # EOF
####
calc_interclassResid <- function(XSimcaObject) {
simOb <- XSimcaObject
counts <- simOb@counts
nrGroups <- length(counts)
Ks <- simOb@k
rawData <- simOb@X
groups <- simOb@grp
resMat <- matrix(NA, nrow=nrGroups, ncol=nrGroups)
for (ii in 1: nrGroups) {
for (pp in 1: nrGroups) {
# ind <- as.numeric(names(simOb@pcaobj[[ii]]@od))
ind <- which(groups == levels(groups)[ii])
Center1 <- simOb@pcaobj[[ii]]@center
Center2 <- simOb@pcaobj[[pp]]@center
# X <- scale(as.matrix(rawData[ind,]), center=T, scale=F)
X <- scale(as.matrix(rawData[ind,]), center=Center2, scale=F)
L1 <- simOb@pcaobj[[ii]]@loadings
L2 <- simOb@pcaobj[[pp]]@loadings
X_mod <- X %*% L2 %*% t(L2)
E <- X_mod - X ## Residuals matrix
Esum <- sum(t(apply(E, 1, function(x) x*t(x))))
n <- counts[ii] ## the number of samples in the ii-Group
k <- Ks[pp] ## the number of factor in the pp-Group
m <- ncol(X) ## the number of variables in the ii-Group
a <- 1/((m-k)*n)
resMat[ii,pp] <- (a * Esum)^0.5
} # end for pp
} # end for ii
grpChar <- levels(groups)
rownames(resMat) <- grpChar
colnames(resMat) <- paste(grpChar, "@", Ks, sep="")
out <- resMat
} # EOF
calc_interclassDist_mat <- function(residMatrix) {
a <- nrow(residMatrix)
resMat <- matrix(NA, a, a)
mat <- residMatrix
# for (i in 1: (a-1)) {
# for (k in (i+1): a) {
# dist <- ((mat[]^2 + mat[]^2) /(mat[i,i]^2 + mat[]^2))^0.5 - 1
# } # end k
# } # end i
for (i in 1: (a)) {
for (k in (1): a) {
distance <- ((mat[i,k]^2 + mat[k,i]^2) /(mat[i,i]^2 + mat[k,k]^2))^0.5 - 1
resMat[i,k] <- distance
} # end k
} # end i
colnames(resMat) <- colnames(mat)
rownames(resMat) <- rownames(mat)
return(resMat)
} # EOF
## Core ##
calc_interclassDist <- function(XSimcaObject) {
resids <- calc_interclassResid(XSimcaObject)
out <- calc_interclassDist_mat(resids)
} # EOF
####
calculateInterclassDistances <- function(modelList, inCV=FALSE) {
stn <- getstn()
distList <- list()
leng <- length(modelList)
if (!stn$allSilent & !inCV) {cat(paste(" dist...", sep="") )}
for (i in 1: leng) {
if (is.null(modelList[[i]])) {
distList <- c(distList, list(NULL))
} else {
x <- calc_interclassDist(modelList[[i]])
distList <- c(distList, list(x))
}
} # end for i
return(distList)
} # EOF
correctSimcaGroupingForDataset <- function(dataset, groupingVector) {
stn <- getstn()
# we have to find at least 2 groups of data when using each element of the grouping vector on the dataset
# within these groups, there must be at least the minium amount of spectra as defined int he settings available.
# those elements of the grouping vector that do not meet these requirements will be excluded
goodGrps <- NULL
minSpec <- stn$simca_minSpectraEachGroup
if (minSpec < 3) {
minSpec <- 3
}
for (i in 1: length(groupingVector)) {
ind <- which(colnames(dataset$header) == groupingVector[i])
values <- dataset$header[,ind]
nrLevels <- nlevels(values)
if (nrLevels > 1) { # so we have at least two groups in the dataset
# now check for the number of spectra in each single group
RLE <- rle(as.character(sort(values))) # counts the number of occurence of every unique value
lengths <- RLE$lengths
# print(groupingVector[i]); print(RLE); wait()
if (min(lengths) >= minSpec) {
goodGrps <- c(goodGrps, groupingVector[i])
}
}
} # end for i
return(goodGrps) # will return NULL if nothing fits the requirements
} # EOF
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.