Nothing
#********************************* REQUIRED LIBRARIES *********************************#
# library(stats)
# library(funHDDC)
# library(fda)
# library(mclust)
# library(tclust)
# library(stringr)
#********************************* Clustering Function *********************************#
# /* BEGIN FROM FUNHDDC (MODIFIED) */
# /*
# * Authors: Bouveyron, C. Jacques, J.
# * Date Taken: 2022-01-01
# * Original Source: funHDDC (modified)
# * Address: https://github.com/cran/funHDDC
# *
# */
tfunHDDC <-
function(data, K=1:10, model="AkjBkQkDk", known=NULL,dfstart=50, dfupdate="approx",
dfconstr="no", threshold=0.1, itermax=200, eps=1e-6, init='random',
criterion="bic", d_select="Cattell", init.vector=NULL,
show=TRUE, mini.nb=c(5, 10), min.individuals=2, mc.cores=1, nb.rep=2,
keepAllRes=TRUE, kmeans.control = list(), d_max=100, d_range=2,
verbose = TRUE){
# IAIN d_range default 2 so it must explicitly be set to cause any type of error
#Options removed from call
com_dim <- NULL
noise.ctrl <- 1e-8
#
# CONTROLS
#
call <- match.call() # macthes values to prameters
.T_hddc_control(call)
# Control of match.args:
criterion <- .T_myAlerts( criterion, "criterion", "singleCharacterMatch.arg",
"HDDC: ", c("bic", "icl") )
d_select <- .T_myAlerts( d_select, "d_select", "singleCharacterMatch.arg",
"HDDC: ", c("cattell", "bic", "grid") )
init <- .T_myAlerts( init, "init", "singleCharacterMatch.arg",
"HDDC: ", c('random', 'kmeans', 'tkmeans',
'mini-em', "vector") )
dfconstr <- .T_myAlerts( dfconstr, "dfconstr", "singleCharacterMatch.arg",
"HDDC: ", c('yes', 'no') )
dfupdate <- .T_myAlerts( dfupdate, "dfupdate", "singleCharacterMatch.arg",
"HDDC: ", c('approx', 'numeric') )
# We get the model names, properly ordered
model <- .T_hdc_getTheModel(model, all2models = TRUE)
#nb.rep parameter
if ( (init == "random") & (nb.rep < 20) ){
nb.rep <- 20
}
# set verbose to false if using multiple cores, no point in timing
if(mc.cores > 1) {
verbose <- FALSE
}
# kmeans controls
# A@R should we include the alpha for trimming in this or just use a default?
kmeans.control <- .T_default_kmeans_control(kmeans.control)
BIC <- ICL <- c()
fdobj = data #for univariate model it is NOT a list
Wlist<-list()
if (!inherits(fdobj, 'list'))
{
x <- t(fdobj$coefs)
p <- ncol(x)
#Constructing the matrix with inner products of the basis functions
W <- inprod(fdobj$basis,fdobj$basis)
W[W < 1e-15] <- 0
#Construction of the triangular matrix of Choleski
W_m <- chol(W)
dety=det(W)
Wlist<-list(W=W,
W_m=W_m,
dety=dety
)
}
else {
x = t(fdobj[[1]]$coefs);
for (i in 2:length(fdobj)) x <- cbind(x,t(fdobj[[i]]$coefs))
p <- ncol(x)
for ( i in 1:length(fdobj) ){
name <- paste('W_var', i, sep = '')
#Constructing the matrix with inner products of the basis functions
W_fdobj <- inprod(fdobj[[i]]$basis, fdobj[[i]]$basis)
assign(name, W_fdobj)
}
#Add 0 ? left and right of W before constructing the matrix phi
prow <- dim(W_fdobj)[[1]]
pcol <- length(fdobj) * prow
W1 <- cbind( W_fdobj,
matrix( 0, nrow = prow, ncol = ( pcol - ncol(W_fdobj) ) ) )
W_list <- list()
for ( i in 2:( length(fdobj) ) ){
W2 <- cbind( matrix( 0, nrow = prow, ncol = (i - 1) * ncol(W_fdobj) ),
get( paste('W_var', i, sep = '') ),
matrix( 0, nrow = prow, ncol = ( pcol - i * ncol(W_fdobj) ) ) )
W_list[[i - 1]] <- W2
}
#Constructing the matrix phi
W_tot <- rbind(W1,W_list[[1]])
if (length(fdobj) > 2){
for( i in 2:(length(fdobj) - 1) ){
W_tot <- rbind(W_tot, W_list[[i]])
}
}
W_tot[W_tot < 1e-15] <- 0
#Construction of the triangular matrix of Choleski
W_m <- chol(W_tot)
dety<-det(W_tot)
Wlist<-list(W=W_tot,
W_m=W_m,
dety=dety
)
}
#
# Preparing the parallel
#
# bic and grid dimension selection do not bother with a threshold
if(d_select == "bic"){
threshold <- "bic"
} else if(d_select == "grid"){
threshold <- "grid"
}
if(max( table(K) ) > 1) warning("The number of clusters, K, is made unique (repeated values are not tolerated).")
K <- sort( unique(K) )
if(d_select == "grid") {
# set up the grid of values for d_set
if(length(K) > 1) stop("tfunHDDC: Using d_select='grid' K must be only 1
value (not a list).")
mkt_list <- list(model = model, K = K, threshold = threshold)
for(i in 1:K) mkt_list[[paste0("d", i)]] <- d_range
mkt_Expand <- do.call(expand.grid, mkt_list)
} else {
mkt_list <- list(model = model, K = K, threshold = threshold)
# default needs to be a number set as a number just in case
for( i in 1:max(K) ) mkt_list[[paste0("d", i)]] <- 2
mkt_Expand <- do.call(expand.grid, mkt_list)
}
mkt_Expand <- do.call( rbind, replicate(nb.rep, mkt_Expand,
simplify = FALSE) )
mkt_Expand <- rbind(c(), mkt_Expand) #no need for several runs for K==1
model <- as.character(mkt_Expand$model)
K <- mkt_Expand$K
threshold <- mkt_Expand$threshold
d <- list()
for( i in 1:max(K) ) {
d[[i]] <- mkt_Expand[, 3 + i]
}
# timing should be initialized and total time details should be created
if(verbose) {
backspace.amount <- .T_estimateTime("init")[2]
mkt_Expand <- cbind( mkt_Expand, modelcount = 0:(nrow(mkt_Expand)-1) )
}
# We transform it into an univariate form
mkt_univariate <- apply(mkt_Expand, 1, paste, collapse = "_")
# Mon 'caller' for LTBM that will be used in multi-cores lapply
hddcWrapper <- function(mkt_univariate, verbose,
start.time=0, totmod=1, backspace.amount=0, ...){
mkt_splitted <- strsplit(mkt_univariate, "_")
# we find model, K and threshold
model <- sapply(mkt_splitted, function(x) x[1])
K <- sapply( mkt_splitted, function(x) as.numeric(x[2]) )
threshold <- sapply(mkt_splitted, function(x) ifelse( is.numeric(x[3]),
as.numeric(x[3]),
x[3]) )
# note d_set is ignored unless run in d_select = "grid"
d_set <- rep(2, K)
for(i in 1:K) d_set[i] <- sapply(mkt_splitted,
function(x) as.numeric(x[3+i]))
# (::: is needed for windows multicore)
res <- "unknown error"
try(res <- .T_funhddc_main1(model = model, K = K,dfstart = dfstart,
dfupdate = dfupdate, dfconstr = dfconstr,
threshold = threshold,
d_set = d_set,...), silent=TRUE)
if(verbose) {
# update the timing
modelcount <- sapply( mkt_splitted, function(x) as.numeric(x[length(mkt_splitted[[1]])]) )
rv <- .T_estimateTime(modelcount, start.time, totmod, backspace = backspace.amount)
}
res
}
# We reset the number of cores to use
nRuns <- length(mkt_univariate)
if(nRuns < mc.cores) mc.cores <- nRuns
# We swicth to the right number of cores + a warning if necessary
max_nb_of_cores <- parallel::detectCores()
if(mc.cores > max_nb_of_cores){
warning("The argument mc.cores is greater than its maximun.
\nmc.cores was set to ", max_nb_of_cores)
mc.cores <- max_nb_of_cores
}
#
# Parallel estimations
#
start.time <- Sys.time() # get a starting time
if(mc.cores == 1){
# If there is no need for parallel, we just use lapply // in order to have the same output
# details for timing should be passed to lapply
par.output <- lapply(mkt_univariate, hddcWrapper, fdobj = fdobj,Wlist=Wlist, known=known,
method = d_select, eps = eps,
init = init, init.vector = init.vector,
mini.nb = mini.nb, min.individuals = min.individuals,
noise.ctrl = noise.ctrl, com_dim = com_dim,
kmeans.control = kmeans.control, d_max = d_max,
start.time = start.time, verbose = verbose,
backspace.amount = backspace.amount, totmod = nrow(mkt_Expand))
# here mkt_univariate is applied to to fdobj
} else {
# we use parLapply:
## create clusters
cl <- parallel::makeCluster(mc.cores)
parallel::clusterEvalQ( cl, library(TFunHDDC) )
parallel::clusterExport( cl, ls( environment() ), envir = environment() )
## run the parallel
par.output <- NULL
try(par.output <- parallel::parLapplyLB( cl, mkt_univariate, hddcWrapper,
fdobj=fdobj,Wlist=Wlist, method=d_select,
known=known,
itermax=itermax,
eps=eps, init=init,
init.vector=ifelse(missing(init.vector),
NA, init.vector),
mini.nb=mini.nb,
min.individuals=min.individuals,
noise.ctrl=noise.ctrl,
com_dim=com_dim,
kmeans.control=kmeans.control,
d_max=d_max, start.time = start.time, verbose = FALSE) )
## Stop the clusters
parallel::stopCluster(cl)
if( is.null(par.output) ) stop("Unknown error in the parallel computing.
Try mc.cores=1 to detect the problem.")
}
# The results are retrieved
#
getElement <- function(x, what, valueIfNull = -Inf){
# atention if x is the null model
if(length(x) == 1) return(valueIfNull)
if(!is.list(x) && !what %in% names(x)) return(NA)
x[[what]][length(x[[what]])]
}
getComment <- function(x){
# we get the error message
if(length(x) == 1) return(x)
return("")
}
# All likelihoods
LL_all <- sapply(par.output, getElement, what = "loglik")
comment_all <- sapply(par.output, getComment)
# If no model is valid => problem
if( all( !is.finite(LL_all) ) ){
warning("All models diverged.")
allCriteria <- data.frame(model = model, K = K, threshold = threshold,
LL = LL_all, BIC = NA, comment = comment_all)
res <- list()
res$allCriteria <- allCriteria
return(res)
}
# We select, for each (Q,K), the best run
n <- nrow(mkt_Expand)
modelKeep <- sapply(unique(mkt_univariate),
function(x) (1:n)[mkt_univariate==x][which.max(LL_all[mkt_univariate==x])])
# => we select only the best models
LL_all <- LL_all[modelKeep]
comment_all <- comment_all[modelKeep]
par.output <- par.output[modelKeep]
BIC <- sapply(par.output, getElement, what = "BIC")
ICL <- sapply(par.output, getElement, what = "ICL")
comp_all <- sapply(par.output, getElement, what = "complexity",
valueIfNull = NA)
model <- model[modelKeep]
threshold <- threshold[modelKeep]
K <- K[modelKeep]
d_keep <- list()
for(i in 1:max(K)) {
d_keep[[i]] <- d[[i]][modelKeep]
}
# We define the criterion of model selection
CRIT <- switch(criterion,
bic = BIC,
icl = ICL)
# The order of the results
myOrder <- order(CRIT, decreasing = TRUE)
# we keep the good model + creation of output
qui <- which.max(CRIT)
prms <- par.output[[qui]]
prms$criterion <- CRIT[qui]
names(prms$criterion) <- criterion
# Other output
prms$call <- call
# We add the complexity
names(comp_all) <- mkt_univariate[modelKeep]
prms$complexity_allModels <- comp_all
# Display
if(show){
if(n > 1) cat("tfunHDDC: \n")
model2print <- sapply(model,
function(x) sprintf( "%*s", max( nchar(model) ), x) )
K2print <- as.character(K)
K2print <- sapply(K2print,
function(x) sprintf( "%*s", max( nchar(K2print) ), x) )
thresh2print <- as.character(threshold)
thresh_width <- max( nchar(thresh2print) )
thresh2print <- sapply(thresh2print,
function(x) sprintf( "%s%s", x,
paste0( rep("0",
thresh_width - nchar(x) ),
collapse = "") ) )
# we make a data.frame
d_names <- c()
myResMat <- cbind( model2print[myOrder], K2print[myOrder],
thresh2print[myOrder],
.T_addCommas(prms$complexity_allModels[myOrder]),
.T_addCommas(CRIT[myOrder]) )
if(d_select == "grid") {
# only display d_set values when they are relevant
for(i in 1:max(K)) {
d2print <- as.character(d_keep[[i]])
d2print <- sapply( d2print,
function(x) sprintf( "%*s", max( nchar(d2print) ), x) )
myResMat <- cbind(myResMat, d2print[myOrder])
d_names <- append( d_names, paste0("d", i) )
}
}
myResMat <- cbind(myResMat, comment_all[myOrder])
myResMat <- as.data.frame(myResMat)
names(myResMat) <- c("model", "K", "threshold", "complexity",
toupper(criterion), d_names, "comment")
row.names(myResMat) <- 1:nrow(myResMat)
# if no problem => no comment
if( all(comment_all == "") ) myResMat$comment <- NULL
print(myResMat)
msg <- switch(criterion, bic = "BIC", icl = "ICL")
cat("\nSELECTED: model ", prms$model, " with ", prms$K, " clusters.\n")
cat("Selection Criterion: ", msg, ".\n", sep="")
} # end of if(show)
# We also add the matrix of all criteria
allCriteria <- data.frame( model = model[myOrder],
K = K[myOrder],
threshold = threshold[myOrder],
LL = LL_all[myOrder],
complexity = prms$complexity_allModels[myOrder],
BIC = BIC[myOrder],
ICL = ICL[myOrder],
rank = 1:length(myOrder) )
# we add the comments if necessary
if( any(comment_all != "") ) allCriteria$comment <- comment_all[myOrder]
prms$allCriteria <- allCriteria
# If all the results are kept
if(keepAllRes){
all_results <- par.output
names(all_results) <- mkt_univariate[modelKeep]
prms$all_results <- all_results
}
# Other stuff
prms$threshold <- threshold[qui]
prms$data<-fdobj
class(prms)<-"t-funHDDC"
return(prms)
} # end of tfunHDDC
.T_funhddc_main1 <- function(fdobj,Wlist, K, dfstart=dfstart,dfupdate=dfupdate,
dfconstr=dfconstr,model, itermax=200,threshold,
method, eps=1e-6, init, init.vector, mini.nb,
min.individuals, noise.ctrl, com_dim=NULL,
kmeans.control, d_max, d_set=c(2,2,2,2),known=NULL, ...){
ModelNames <- c("AKJBKQKDK", "AKBKQKDK", "ABKQKDK", "AKJBQKDK", "AKBQKDK", "ABQKDK", "AKJBKQKD", "AKBKQKD", "ABKQKD", "AKJBQKD", "AKBQKD", "ABQKD")
if (!inherits(fdobj, 'list')) {DATA <- t(fdobj$coefs)}
else {DATA = t(fdobj[[1]]$coefs); for (i in 2:length(fdobj)) DATA <- cbind(DATA,t(fdobj[[i]]$coefs))}
p <- ncol(DATA)
N <- nrow(DATA)
com_ev <- NULL
# We set d_max to a proper value
d_max <- min(N, p, d_max)
#################################clasification######################
########################################################
# /*
# * Authors: Andrews, J. Wickins, J. Boers, N. McNicholas, P.
# * Date Taken: 2023-01-01
# * Original Source: teigen (modified)
# * Comment: Known and training are treated as in teigen, matchtab and matchit
# * are also treated like in teigen
# * Address: https://github.com/cran/teigen
# *
# */
if(is.null(known)){
#this is the clustering case
clas <- 0
kno <- NULL
testindex <- NULL
}
else
#various sorts of classification
{
if(length(known)!=N){
return("Known classifications vector not given, or not the same length as the number of samples (see help file)")
}else
{
if(!anyNA(known)){
#Gs <- length(unique(known))
warning("No NAs in 'known' vector supplied, all values have known classification (parameter estimation only)")
testindex <- 1:N
kno <- rep(1, N)
unkno <- (kno-1)*(-1)
K <- length(unique(known))
init.vector <- as.numeric(known)
init <-"vector"
}
else{
training <- which(!is.na(known))
testindex <- training
kno <- vector(mode="numeric", length=N)
kno[testindex] <- 1
unkno <- (kno-1)*(-1)##it is 0 for training and 1 for the rest
}
}
clas <- 1
}
############################################
#################################################
if (K > 1){
t <- matrix(0, N, K)
tw <- matrix(0, N, K)
##############################################
#initialization for t
if(init == "vector"){
if(clas>0){
cn1=length(unique(known[testindex]))
matchtab=matrix(-1,K,K)
matchtab[1:cn1,1:K] <- table(known,init.vector)
rownames(matchtab)<-c(rownames(table(known,init.vector)),which(!(1:K %in% unique( known[testindex]))))
matchit<-rep(0,1,K)
while(max(matchtab)>0){
ij<-as.integer(which(matchtab==max(matchtab), arr.ind=T)[1,2])
ik<-which.max(matchtab[,ij])
matchit[ij]<-as.integer(rownames(as.matrix(ik)))
matchtab[,ij]<-rep(-1,1,K)
matchtab[as.integer(ik),]<-rep(-1,1,K)
}
matchit[which(matchit==0)]=which(!(1:K %in% unique(matchit)))
initnew <- init.vector
for(i in 1:K){
initnew[init.vector==i] <- matchit[i]
}
init.vector <- initnew
}
for (i in 1:K) t[which(init.vector == i), i] <- 1
}
else {if (init == "kmeans") {
kmc <- kmeans.control
cluster <- kmeans(DATA, K, iter.max = kmc$iter.max, nstart = kmc$nstart,
algorithm = kmc$algorithm, trace = kmc$trace)$cluster
if(clas>0)
{
cn1=length(unique(known[testindex]))
matchtab=matrix(-1,K,K)
matchtab[1:cn1,1:K] <- table(known,cluster)
rownames(matchtab)<-c(rownames(table(known,cluster)),which(!(1:K %in% unique( known[testindex]))))
matchit<-rep(0,1,K)
while(max(matchtab)>0){
ij<-as.integer(which(matchtab==max(matchtab), arr.ind=T)[1,2])
ik<-which.max(matchtab[,ij])
matchit[ij]<-as.integer(rownames(as.matrix(ik)))
matchtab[,ij]<-rep(-1,1,K)
matchtab[as.integer(ik),]<-rep(-1,1,K)
}
matchit[which(matchit==0)]=which(!(1:K %in% unique(matchit)))
knew <- cluster
for(i in 1:K){
knew[cluster==i] <- matchit[i]
}
cluster <- knew
}
for (i in 1:K)
t[which(cluster == i), i] <- 1
#A@5 initialize t with the result of k-means
} else
{if (init=="tkmeans"){ # TRIMMED COMMENT
kmc <- kmeans.control
# added default alpha
cluster <- tkmeans(DATA, k = K, iter.max = kmc$iter.max,
nstart = kmc$nstart, trace = kmc$trace,
alpha = kmc$alpha)$cluster
for (i in 1:K)
t[which(cluster == i), i] <- 1
# random assignment for trimmed values
rand_assign <- sample(1:K, sum(cluster == 0), replace = T)
ind <- which(cluster == 0)
for ( j in 1:length(rand_assign) )
t[ ind[j], rand_assign[j] ] <- 1
if(clas>0)
{
cluster <- max.col(t)
cn1=length(unique(known[testindex]))
matchtab=matrix(-1,K,K)
matchtab[1:cn1,1:K] <- table(known,cluster)
rownames(matchtab)<-c(rownames(table(known,cluster)),which(!(1:K %in% unique( known[testindex]))))
matchit<-rep(0,1,K)
while(max(matchtab)>0){
ij<-as.integer(which(matchtab==max(matchtab), arr.ind=T)[1,2])
ik<-which.max(matchtab[,ij])
matchit[ij]<-as.integer(rownames(as.matrix(ik)))
matchtab[,ij]<-rep(-1,1,K)
matchtab[as.integer(ik),]<-rep(-1,1,K)
}
matchit[which(matchit==0)]=which(!(1:K %in% unique(matchit)))
knew <- cluster
for(i in 1:K){
knew[cluster==i] <- matchit[i]
}
cluster <- knew
for (i in 1:K)
t[which(cluster == i), i] <- 1
}
}
else {if (init=="mini-em"){
prms_best <- 1
for (i in 1:mini.nb[1]){
prms <- .T_funhddc_main1(fdobj,Wlist, K, known=known,dfstart = dfstart, dfupdate = dfupdate,
dfconstr = dfconstr, model = model,
threshold = threshold, method = method,
itermax = mini.nb[2],
init.vector = 0,
init = 'random', mini.nb = mini.nb,
min.individuals = min.individuals,
noise.ctrl = noise.ctrl,
kmeans.control = kmeans.control,
com_dim = com_dim, d_max = d_max, d_set = d_set)
if(length(prms) != 1){
if (length(prms_best) == 1) prms_best <- prms
else if (prms_best$loglik[length(prms_best$loglik)] <
prms$loglik[length(prms$loglik)]) prms_best <- prms
}
}
if (length(prms_best) == 1) return(1)
t <- prms_best$posterior
if(clas>0)
{
cluster <- max.col(t)
cn1=length(unique(known[testindex]))
matchtab=matrix(-1,K,K)
matchtab[1:cn1,1:K] <- table(known,cluster)
rownames(matchtab)<-c(rownames(table(known,cluster)),which(!(1:K %in% unique( known[testindex]))))
matchit<-rep(0,1,K)
while(max(matchtab)>0){
ij<-as.integer(which(matchtab==max(matchtab), arr.ind=T)[1,2])
ik<-which.max(matchtab[,ij])
matchit[ij]<-as.integer(rownames(as.matrix(ik)))
matchtab[,ij]<-rep(-1,1,K)
matchtab[as.integer(ik),]<-rep(-1,1,K)
}
matchit[which(matchit==0)]=which(!(1:K %in% unique(matchit)))
knew <- cluster
for(i in 1:K){
knew[cluster==i] <- matchit[i]
}
cluster <- knew
for (i in 1:K)
t[which(cluster == i), i] <- 1
}
}
else {if (init=="random"){ #INIT IS RANDOM
t <- t( rmultinom( N, 1, rep(1 / K, K) ) ) # some multinomial
compteur <- 1
while(min( colSums(t) ) < 1 && (compteur <- compteur + 1) < 5)
t <- t( rmultinom( N, 1, rep(1 / K, K) ) )
if(min( colSums(t) ) < 1)
return("Random initialization failed (n too small)")
if(clas>0)
{
cluster <- max.col(t)
cn1=length(unique(known[testindex]))
matchtab=matrix(-1,K,K)
matchtab[1:cn1,1:K] <- table(known,cluster)
rownames(matchtab)<-c(rownames(table(known,cluster)),which(!(1:K %in% unique( known[testindex]))))
matchit<-rep(0,1,K)
while(max(matchtab)>0){
ij<-as.integer(which(matchtab==max(matchtab), arr.ind=T)[1,2])
ik<-which.max(matchtab[,ij])
matchit[ij]<-as.integer(rownames(as.matrix(ik)))
matchtab[,ij]<-rep(-1,1,K)
matchtab[as.integer(ik),]<-rep(-1,1,K)
}
matchit[which(matchit==0)]=which(!(1:K %in% unique(matchit)))
knew <- cluster
for(i in 1:K){
knew[cluster==i] <- matchit[i]
}
cluster <- knew
for (i in 1:K)
t[which(cluster == i), i] <- 1
}
}
}}}}}
else {
t <- matrix(1, N, 1) # K IS 1
tw <- matrix(1, N, 1)
}
if(clas>0){
t <- unkno*t
for(i in 1:N){
if(kno[i]==1){
t[i, known[i]] <- 1
}
}
}
nux <- c( rep.int(dfstart, K) )
initx <- .T_funhddt_init(fdobj,Wlist, K,t,nux, model, threshold, method, noise.ctrl,
com_dim, d_max, d_set)
tw <- .T_funhddt_twinit(fdobj,Wlist, initx,nux)
likely <- c()
I <- 0
test <- Inf
while ( (I <- I + 1) <= itermax && test >= eps ){
# loops here until itermax or test <eps
# Error catching
if (K > 1){
if( any( is.na(t) ) ) return("unknown error: NA in t_ik")
if( any(colSums(t > 1 / K) < min.individuals) )
return("pop<min.individuals")
}
m <- .T_funhddt_m_step1(fdobj,Wlist, K, t, tw, nux, dfupdate, dfconstr, model,
threshold, method, noise.ctrl, com_dim, d_max, d_set) # mstep is applied here
nux <- m$nux
to <- .T_funhddt_e_step1(fdobj,Wlist, m,clas, known, kno) # E-step is applied here
L <- to$L # this s the likelihood
t <- to$t# this is the new t
tw <- to$tw
#likelihood contrib=L
likely[I] <- L
if (I == 2) test <- abs(likely[I] - likely[I - 1])
else if (I > 2)
{
lal <- (likely[I] - likely[I - 1]) / (likely[I - 1] - likely[I - 2])
lbl <- likely[I - 1] + (likely[I] - likely[I - 1]) / (1.0 - lal)
test <- abs(lbl - likely[I - 1])
}
}
# a
if ( model%in%c("AKBKQKDK", "AKBQKDK", "AKBKQKD", "AKBQKD") ) {
a <- matrix(m$a[, 1], 1, m$K, dimnames=list(c("Ak:"), 1:m$K))
} else if(model=="AJBQD") {
a <- matrix(m$a[1, ], 1, m$d[1], dimnames=list(c('Aj:'), paste('a', 1:m$d[1], sep='')))
} else if ( model%in%c("ABKQKDK", "ABQKDK", "ABKQKD", "ABQKD", "ABQD") ) {
a <- matrix(m$a[1], dimnames=list(c('A:'), c('')))
} else a <- matrix(m$a, m$K, max(m$d), dimnames=list('Class'=1:m$K, paste('a', 1:max(m$d), sep='')))
# b
if ( model%in%c("AKJBQKDK", "AKBQKDK", "ABQKDK", "AKJBQKD", "AKBQKD", "ABQKD",
"AJBQD", "ABQD") ) {
b <- matrix(m$b[1], dimnames=list(c('B:'), c('')))
} else b <- matrix(m$b, 1, m$K, dimnames=list(c("Bk:"), 1:m$K))
# d, mu, prop
d <- matrix(m$d, 1, m$K, dimnames=list(c('dim:'), "Intrinsic dimensions of the classes:"=1:m$K))
mu <- matrix(m$mu, m$K, p, dimnames=list('Class'=1:m$K, 'Posterior group means:'=paste('V', 1:p, sep='')))
prop <- matrix(m$prop, 1, m$K, dimnames=list(c(''), 'Posterior probabilities of groups'=1:m$K))
nux <- matrix(m$nux, 1, m$K, dimnames=list(c(''), 'Degrees of freedom of t-distributions'=1:m$K))
# Other elements
complexity <- .T_hdc_getComplexityt(m, p, dfconstr)
class(b) <- class(a) <- class(d) <- class(prop) <- class(mu) <- 'hd' #A@5 alpha and eta will nedd class too
cls <- max.col(t)#@ here the cluster is found
##
converged = test < eps
params <- list()
params = c(params,list(
Wlist=Wlist,
model = model,
K = K,
d = d,
a = a,
b = b,
mu = mu,
prop = prop,
nux = nux,
ev = m$ev,
Q = m$Q,
Q1 = m$Q1,
fpca = m$fpcaobj,
loglik = likely[length(likely)],
loglik_all = likely,
posterior = t,
class = cls,
com_ev = com_ev,
N = N,
complexity = complexity,
threshold = threshold,
d_select = method,
converged = converged))
if(clas>0){
params[["index"]] <- testindex
}
# We compute the BIC / ICL
bic_icl <- .T_hdclassift_bic(params, p, dfconstr)
params$BIC <- bic_icl$bic
params$ICL <- bic_icl$icl
# We set the class
class(params) <- 'tfunHDDC'
return(params)
} # end of .T_funhddc_main1
#########################################################
.T_funhddt_init <- function(fdobj,Wlist, K, t, nux, model, threshold, method,
noise.ctrl, com_dim, d_max, d_set){ # this is the init step
if (!inherits(fdobj, 'list')) { x <- t(fdobj$coefs) #THIS IS the univariate CASE
} else {x = t(fdobj[[1]]$coefs); for (i in 2:length(fdobj)) x <- cbind(x,t(fdobj[[i]]$coefs))}
# x is the coefficient in the fdobject
N <- nrow(x)
p <- ncol(x)
prop <- c()
n <- colSums(t)
prop <- n / N # prop is pi as a vector needed for ai
mu <- matrix(NA, K, p)
ind <- apply(t > 0, 2, which)
n_bis <- c()
for(i in 1:K) n_bis[i] <- length(ind[[i]])
#
#Calculation on Var/Covar matrices and mean mu vector
#
# we keep track of the trace (== sum of eigenvalues) to compute the b
traceVect <- c()
ev <- matrix(0, K, p)
Q <- vector(mode='list', length=K) #A@5 this is matrix q_k
fpcaobj <- list()
for (i in 1:K){
donnees <- .T_initmypca.fd1(fdobj,Wlist, t[, i])
mu[i, ] <- donnees$mux
traceVect[i] <- sum( diag(donnees$valeurs_propres) )
ev[i, ] <- donnees$valeurs_propres
Q[[i]] <- donnees$U
fpcaobj[[i]] <- donnees
}
#Intrinsic dimensions selection
if (model%in%c("AJBQD", "ABQD")){
d <- rep(com_dim, length=K)
} else if ( model%in%c("AKJBKQKD", "AKBKQKD", "ABKQKD", "AKJBQKD", "AKBQKD", "ABQKD") ){
dmax <- min(apply((ev>noise.ctrl)*rep(1:ncol(ev), each=K), 1, which.max))-1
if(com_dim>dmax) com_dim <- max(dmax, 1)
d <- rep(com_dim, length=K)
} else {
d <- .T_hdclassif_dim_choice(ev, n, method, threshold, FALSE, noise.ctrl, d_set)
}
#Setup of the Qi matrices
#
Q1 <- Q
for(i in 1:K) Q[[i]] <- matrix(Q[[i]][, 1:d[i]], p, d[i])
#Calculation of the remaining parameters of the selected model
# PARAMETER a
#
ai <- matrix(NA, K, max(d))
if ( model %in% c('AKJBKQKDK', 'AKJBQKDK', 'AKJBKQKD', 'AKJBQKD') ){
for (i in 1:K) ai[i, 1:d[i]] <- ev[i, 1:d[i]]
} else if ( model%in%c('AKBKQKDK', 'AKBQKDK' , 'AKBKQKD', 'AKBQKD') ){
for (i in 1:K) ai[i, ] <- rep(sum(ev[i, 1:d[i]])/d[i], length=max(d))
} else if(model=="AJBQD"){
for (i in 1:K) ai[i, ] <- ev[1:d[1]]
} else if(model=="ABQD") {
ai[] <- sum(ev[1:d[1]])/d[1]
} else {
a <- 0
eps <- sum(prop * d)
for (i in 1:K) a <- a + sum(ev[i, 1:d[i]]) * prop[i]
ai <- matrix(a / eps, K, max(d))
}
# PARAMETER b
#
bi <- c()
denom <- min(N, p)
if ( model %in% c('AKJBKQKDK', 'AKBKQKDK', 'ABKQKDK', 'AKJBKQKD', 'AKBKQKD', 'ABKQKD') ){
for(i in 1:K){
remainEV = traceVect[i] - sum(ev[ i, 1:d[i] ])
# bi[i] <- sum(ev[i, (d[i]+1):min(N, p)])/(p-d[i])
bi[i] <- remainEV / (p - d[i]) #pour moi c'est p au lieu de denom
}
} else {
b <- 0
eps <- sum(prop * d)
for(i in 1:K){
remainEV = traceVect[i] - sum(ev[ i, 1:d[i] ])
# b <- b + sum(ev[i, (d[i]+1):min(N, p)])*prop[i]
b <- b + remainEV * prop[i]
}
bi[1:K] <- b / (min(N, p) - eps)
}
list(model = model,
K = K,
d = d,
a = ai,
b = bi,
mu = mu,
prop = prop,
ev = ev,
Q = Q,
fpcaobj = fpcaobj,
Q1 = Q1
)
} # end of .T_funhddt_init
.T_initmypca.fd1 <- function(fdobj,Wlist, Ti){
#
if (inherits(fdobj, 'list')){ #the multivariate case
#save the mean before centering
mean_fd <- list()
for ( i in 1:length(fdobj) ){
mean_fd[[i]] <- fdobj[[i]]
}
#Making the matrix of coeffs
coef <- t(fdobj[[1]]$coefs)
for ( i in 2:length(fdobj) ){
coef <- cbind( coef, t(fdobj[[i]]$coefs) )
}
wtcov <- cov.wt(coef, wt = Ti, method = "ML")
mat_cov <- wtcov$cov
coefmean <- wtcov$center
n_lead <- 0
n_var <- dim(fdobj[[1]]$coefs)[1]
fdobj[[1]]$coefs <- sweep(fdobj[[1]]$coefs, 1,
coefmean[(n_lead + 1):(n_var + n_lead)])
mean_fd[[1]]$coefs <- as.matrix(data.frame(mean = coefmean[(n_lead + 1):(n_var + n_lead)]))
for ( i in 2:length(fdobj) ){
n_lead <- n_lead + n_var
n_var <- dim(fdobj[[i]]$coefs)[1]
fdobj[[i]]$coefs <- sweep(fdobj[[i]]$coefs, 1, coefmean[(n_lead+1):(n_var+n_lead)])
mean_fd[[i]]$coefs <- as.matrix(data.frame(mean = coefmean[(n_lead + 1):(n_var + n_lead)]))
}
#covariance matrix
cov = Wlist$W_m %*% mat_cov %*% t(Wlist$W_m)
#eigenvalues and eigenfunctions
valeurs <- Eigen(cov)
valeurs_propres <- valeurs$values
vecteurs_propres <- valeurs$vectors
bj <- solve(Wlist$W_m) %*% vecteurs_propres
fonctionspropres <- fdobj[[1]]
fonctionspropres$coefs <- bj
scores <- coef %*% Wlist$W %*% bj
varprop <- valeurs_propres / sum(valeurs_propres)
ipcafd <- list(valeurs_propres = valeurs_propres,
harmonic = fonctionspropres,
scores = scores,
covariance = cov,
U = bj,
varprop = varprop,
meanfd = mean_fd,
mux = coefmean)
} else if (!inherits(fdobj, 'list')) {
#calculation of the mean for each group
mean_fd <- fdobj
coef <- t(fdobj$coefs)
wtcov <- cov.wt(coef, wt = Ti, method = "ML")
coefmean <- wtcov$center
#covariance matrix
mat_cov <- wtcov$cov
#Centering of the functional objects by group
fdobj$coefs <- sweep(fdobj$coefs, 1, coefmean) # sweep subtracts coefmean from fdobj$coefs, i.e fdobj$coefs_i-coefmean_i
mean_fd$coefs <- as.matrix( data.frame(mean = coefmean) )
cov <- Wlist$W_m %*% mat_cov %*% t(Wlist$W_m)
#eigenvalues and eigenfunctions
#--------------------------------------------
valeurs <- Eigen(cov)
valeurs_propres <- valeurs$values
vecteurs_propres <- valeurs$vectors
fonctionspropres <- fdobj
bj <- solve(Wlist$W_m) %*% vecteurs_propres
fonctionspropres$coefs <- bj
#calculations of scores from the formula for pca.fd
scores <- inprod(fdobj, fonctionspropres)
varprop <- valeurs_propres / sum(valeurs_propres)
ipcafd <- list(valeurs_propres = valeurs_propres,
harmonic = fonctionspropres,
scores = scores,
covariance = cov,
U = bj,
meanfd = mean_fd,
mux = coefmean)
#-------------------------------------------
}
class(ipcafd) <- "ipca.fd"
return(ipcafd)
} # end of .T_initmypca.fd1
###################################################
############################################################
.T_funhddt_twinit <- function(fdobj,Wlist, par,nux){ # this is the E step
if (!inherits(fdobj, 'list')) {x <- t(fdobj$coefs)} # THIS IS the univariate CASE
else {x = t(fdobj[[1]]$coefs); for (i in 2:length(fdobj)) x = cbind(x,t(fdobj[[i]]$coefs))} #NOT THIS
p <- ncol(x)
N <- nrow(x)
K <- par$K
a <- par$a
b <- par$b
mu <- par$mu
d <- par$d
Q <- par$Q
Q1 <- par$Q1
W <- matrix(0, N, K)
b[b<1e-6] <- 1e-6
#############################aici am ajuns
#################################################
mah_pen<-matrix(0, N, K)
for (i in 1:K) {
Qk <- Q1[[i]]
aki <- sqrt( diag( c( 1 / a[ i, 1:d[i] ], rep(1 / b[i], p - d[i]) ) ) )
muki <- mu[i,]
Wki <- Wlist$W_m
mah_pen[, i] <- .T_imahalanobis(x, muki, Wki ,Qk, aki)
W[,i] <- (nux[i] + p) / (nux[i] + mah_pen[, i])
}
return(W)
} # end of .T_funhddt_twinit
######################################################################
############################################################
.T_funhddt_e_step1 <- function(fdobj,Wlist, par,clas=0,known=NULL, kno=NULL){ # this is the E step
if (!inherits(fdobj, 'list')) {x <- t(fdobj$coefs)} # THIS IS the univariate CASE
else {x = t(fdobj[[1]]$coefs); for (i in 2:length(fdobj)) x = cbind(x,t(fdobj[[i]]$coefs))} #NOT THIS
p <- ncol(x)
N <- nrow(x)
K <- par$K
nux <- par$nux
a <- par$a
b <- par$b
mu <- par$mu
d <- par$d
prop <- par$prop
Q <- par$Q
Q1 <- par$Q1
b[b<1e-6] <- 1e-6
if(clas>0){
unkno <- (kno-1)*(-1)
}
##################################################
#########################################################
###########################################
t <- matrix(0, N, K)
tw <- matrix(0, N, K)
mah_pen <- matrix(0, N, K)
K_pen <- matrix(0, N, K)
num <- matrix(0, N, K)
ft <- matrix(0, N, K)
s <- rep(0, K)
for (i in 1:K) {
s[i] <- sum( log(a[i, 1:d[i]]) )
Qk <- Q1[[i]]
aki <- sqrt( diag( c( 1 / a[i, 1:d[i]], rep(1 / b[i], p - d[i]) ) ) )
muki <- mu[i, ]
Wki <- Wlist$W_m
dety<-Wlist$dety
mah_pen[, i] <- .T_imahalanobis(x, muki, Wki ,Qk, aki)
tw[, i] <- (nux[i] + p) / (nux[i] + mah_pen[, i])
K_pen[, i] <- log(prop[i]) +
lgamma( (nux[i] + p) / 2) - (1 / 2) * (s[i] + (p - d[i]) * log(b[i]) -log(dety)) -
( ( p / 2) * ( log(pi) + log(nux[i]) ) +
lgamma(nux[i] / 2) + ( (nux[i] + p) / 2 ) *
(log(1 + mah_pen[, i] / nux[i]) ) )
}
ft <- exp(K_pen)
#ft_den used for likelihood
ft_den <- rowSums(ft)
kcon <- -apply(K_pen,1,max)
K_pen <- K_pen + kcon
num <- exp(K_pen)
t <- num / rowSums(num)
L1 <- sum( log(ft_den) )
L <- sum(log( rowSums( exp(K_pen) ) ) - kcon)
trow <- numeric(N)
tcol <- numeric(K)
trow <- rowSums(t)
tcol <- colSums(t)
if( any(tcol < p) ) {
t <- (t + 0.0000001) / ( trow + (K * 0.0000001) )
}
if(clas>0){
t <- unkno*t
for(i in 1:N){
if(kno[i]==1){
t[i, known[i]] <- 1
}
}
}
# if(any(is.nan(t))){
# break
# }
list(t = t,
tw = tw,
L = L)
} # end of .T_funhddt_e_step1
.T_funhddt_m_step1 <- function(fdobj,Wlist, K, t,tw, nux, dfupdate, dfconstr, model, threshold, method, noise.ctrl, com_dim, d_max, d_set){ #A@5 this is the M step
if (!inherits(fdobj, 'list')) { x <- t(fdobj$coefs) # THIS IS the univariate CASE
} else {x = t(fdobj[[1]]$coefs); for (i in 2:length(fdobj)) x = cbind(x,t(fdobj[[i]]$coefs))}
# x is the coefficient in the fdobject
N <- nrow(x)
p <- ncol(x)
prop <- c()
n <- colSums(t)
prop <- n / N # props is pi as a vector
mu <- matrix(NA, K, p)
mu1 <- matrix(NA, K, p)
corX <- matrix(0, N, K)
corX <- t * tw
for (i in 1:K) {
mu[i, ] <- apply(t( as.matrix(corX[, i]) %*% matrix(1, 1, p) ) * t(x),
1, sum) / sum(corX[,i])
mu1[i, ] <- colSums(corX[, i] * x) / sum(corX[, i])
}
ind <- apply(t>0, 2, which)
n_bis <- c()
for(i in 1:K) n_bis[i] <- length(ind[[i]])
#calculation of the degrees of freedom
if(dfupdate=="approx"){
testing <- try(jk861 <- .T_tyxf8(dfconstr, nux, n, t, tw, K, p, N),
silent = TRUE)
if( all( is.finite(testing) ) ){
nux <- jk861
}
# else{break}
}
else{
if(dfupdate=="numeric"){
testing <- try(jk861 <- .T_tyxf7(dfconstr, nux, n, t, tw, K, p, N),
silent = TRUE)
if( all( is.finite(testing) ) ){
nux <- jk861
}
# else{break}
}
}
#
#Calculation on Var/Covar matrices
#
# we keep track of the trace (== sum of eigenvalues) to compute the b
traceVect <- c()
ev <- matrix(0, K, p)
Q <- vector(mode='list', length=K) #A@5 this is matrix q_k
fpcaobj = list()
for (i in 1:K){
donnees <- .T_mypcat.fd1(fdobj,Wlist, t[, i], corX[, i])
#
#-----------------------------------
traceVect[i] <- sum( diag(donnees$valeurs_propres) )
ev[i, ] <- donnees$valeurs_propres
Q[[i]] <- donnees$U
fpcaobj[[i]] <- donnees
}
#Intrinsic dimensions selection
if ( model %in% c("AJBQD", "ABQD") ){
d <- rep(com_dim, length=K)
} else if ( model%in%c("AKJBKQKD", "AKBKQKD", "ABKQKD", "AKJBQKD", "AKBQKD", "ABQKD") ){
dmax <- min(apply((ev>noise.ctrl)*rep(1:ncol(ev), each=K), 1, which.max))-1
if(com_dim>dmax) com_dim <- max(dmax, 1)
d <- rep(com_dim, length=K)
} else {
d <- .T_hdclassif_dim_choice(ev, n, method, threshold, FALSE, noise.ctrl, d_set)
}
#Setup of the Qi matrices
Q1 <- Q
for(i in 1:K) Q[[i]] <- matrix(Q[[i]][, 1:d[i]], p, d[i])
#Calculation of the remaining parameters of the selected model
# PARAMETER a
ai <- matrix(NA, K, max(d))
if ( model%in%c('AKJBKQKDK', 'AKJBQKDK', 'AKJBKQKD', 'AKJBQKD') ){
for (i in 1:K) ai[i, 1:d[i]] <- ev[i, 1:d[i]]
} else if ( model%in%c('AKBKQKDK', 'AKBQKDK' , 'AKBKQKD', 'AKBQKD') ){
for (i in 1:K) ai[i, ] <- rep(sum(ev[i, 1:d[i]])/d[i], length=max(d))
} else if(model=="AJBQD"){
for (i in 1:K) ai[i, ] <- ev[1:d[1]]
} else if(model=="ABQD") {
ai[] <- sum(ev[1:d[1]])/d[1]
} else {
a <- 0
eps <- sum(prop*d)
for (i in 1:K) a <- a + sum(ev[i, 1:d[i]])*prop[i]
ai <- matrix(a/eps, K, max(d))
}
# PARAMETER b
bi <- c()
denom <- min(N, p)
if ( model%in%c('AKJBKQKDK', 'AKBKQKDK', 'ABKQKDK', 'AKJBKQKD', 'AKBKQKD', 'ABKQKD') ){
for(i in 1:K){
remainEV <- traceVect[i] - sum(ev[i, 1:d[i]])
# bi[i] <- sum(ev[i, (d[i]+1):min(N, p)])/(p-d[i])
bi[i] <- remainEV/(p-d[i]) #pour moi c'est p au lieu de denom
}
} else {
b <- 0
eps <- sum(prop*d)
for(i in 1:K){
remainEV = traceVect[i] - sum(ev[i, 1:d[i]])
# b <- b + sum(ev[i, (d[i]+1):min(N, p)])*prop[i]
b <- b + remainEV*prop[i]
}
bi[1:K] <- b / (min(N, p) - eps)
}
#---------------------------------------
list(model = model,
K = K,
d = d,
a = ai,
b = bi,
mu = mu,
prop = prop,
nux = nux,
ev = ev,
Q = Q,
fpcaobj = fpcaobj,
Q1 = Q1)
} # end of .T_funhddt_m_step1
#############################################
##################################################
#degrees of freedom functions modified yxf7 and yxf8 functions
# /*
# * Authors: Andrews, J. Wickins, J. Boers, N. McNicholas, P.
# * Date Taken: 2023-01-01
# * Original Source: teigen (modified)
# * Address: https://github.com/cran/teigen
# *
# */
.T_tyxf7 <- function(dfconstr, nux, n, t, tw, K, p, N){
if(dfconstr=="no"){
dfoldg <- nux
for(i in 1:K){
constn <- 1 + (1/n[i]) * sum(t[, i] * ( log(tw[, i]) - tw[, i]) ) +
digamma( (dfoldg[i] + p) / 2 ) - log( (dfoldg[i] + p) / 2 )
nux[i] <- uniroot( function(v) log(v / 2) - digamma(v / 2) + constn,
lower = 0.0001, upper = 1000, tol = 0.00001)$root
if(nux[i] > 200){
nux[i] <- 200
}
if(nux[i] < 2){
nux[i] <- 2
}
}
}
else
{
dfoldg <- nux[1]
constn <- 1 + (1/N) * sum( t * (log(tw) - tw) ) +
digamma( (dfoldg + p) / 2 ) - log( (dfoldg + p) / 2 )
dfsamenewg <- uniroot( function(v) log(v / 2) - digamma(v / 2) + constn,
lower = 0.0001, upper = 1000, tol = 0.01)$root
if(dfsamenewg > 200){
dfsamenewg <- 200
}
if(dfsamenewg < 2){
dfsamenewg <- 2
}
nux <- c( rep(dfsamenewg, K) )
}
return(nux)
} # end of .T_tyxf7
.T_tyxf8 <- function(dfconstr, nux, n, t, tw, K, p, N){
if(dfconstr == "no"){
dfoldg <- nux
for(i in 1:K){
constn <- 1 + (1 / n[i]) * sum( t[, i] * (log(tw[, i]) - tw[,i]) ) +
digamma( (dfoldg[i] + p) / 2 ) -
log( (dfoldg[i]+p) / 2 )
constn <- -constn
nux[i] <- (-exp(constn) + 2 * ( exp(constn)) *
( exp( digamma(dfoldg[i] / 2)) -
( (dfoldg[i]/2) - (1/2) ) ) ) / ( 1 - exp(constn) )
if(nux[i] > 200){
nux[i] <- 200
}
if(nux[i] < 2){
nux[i] <- 2
}
}
} else {
dfoldg <- nux[1]
constn <- 1 + (1 / N) * sum( t * (log(tw) - tw) ) +
digamma( (dfoldg + p) / 2 ) - log( (dfoldg + p) / 2 )
constn <- -constn
dfsamenewg <- (-exp(constn) + 2 * ( exp(constn)) *
(exp( digamma(dfoldg / 2)) -
( (dfoldg /2) - (1 / 2) ) ) ) / ( 1 - exp(constn) )
if(dfsamenewg > 200){
dfsamenewg <- 200
}
if(dfsamenewg < 2){
dfsamenewg <- 2
}
nux <- c( rep(dfsamenewg, K) )
}
return(nux)
} # end of .T_tyxf8
#######################################################
############################################################
.T_mypcat.fd1 <- function(fdobj,Wlist, Ti, CorI){
if (inherits(fdobj, 'list')){ # multivariate CASE
#saving the mean before contering
mean_fd <- list()
for (i in 1:length(fdobj)){
mean_fd[[i]] <- fdobj[[i]]
}
#centering the functional objects
for ( i in 1:length(fdobj) ){
coefmean <- apply(t( as.matrix(CorI) %*%
matrix(1, 1, nrow(fdobj[[i]]$coefs) ) ) *
fdobj[[i]]$coefs, 1, sum) / sum(CorI)
fdobj[[i]]$coefs <- sweep(fdobj[[i]]$coefs, 1, coefmean)
mean_fd[[i]]$coefs <- as.matrix( data.frame(mean = coefmean) )
}
#Constructing the matrix of coefficents
coef <- t(fdobj[[1]]$coefs)
for ( i in 2:length(fdobj) ){
coef <- cbind( coef, t(fdobj[[i]]$coefs) )
}
#covariance matrix
mat_cov <- crossprod( t( .T_repmat(sqrt(CorI), n = dim(t(coef))[[1]],p=1) *
t(coef) ) ) / sum(Ti)
cov <- Wlist$W_m %*% mat_cov %*% t(Wlist$W_m)
#eigenvalues and eigenfunctions
valeurs <- Eigen(cov)
valeurs_propres <- valeurs$values
vecteurs_propres <- valeurs$vectors
bj <- solve(Wlist$W_m) %*% vecteurs_propres
fonctionspropres <- fdobj[[1]]
fonctionspropres$coefs <- bj
scores <- coef %*% Wlist$W %*% bj
varprop <- valeurs_propres / sum(valeurs_propres)
pcafd <- list(valeurs_propres = valeurs_propres,
harmonic = fonctionspropres,
scores = scores,
covariance = cov,
U = bj,
varprop = varprop,
meanfd = mean_fd
)
}else if (!inherits(fdobj, 'list')) { #univariate CASE
#Calculation of means by group
mean_fd <- fdobj
#Centering the functional objects by group
coefmean <- apply(t( as.matrix(CorI) %*% matrix( 1, 1, nrow(fdobj$coefs) ) )
* fdobj$coefs, 1, sum) / sum(CorI)
fdobj$coefs <- sweep(fdobj$coefs, 1, coefmean)
mean_fd$coefs <- as.matrix( data.frame(mean = coefmean) )
coef <- t(fdobj$coefs)
#covariance matrix
mat_cov <- crossprod( t( .T_repmat(sqrt(CorI),
n = dim( t(coef) )[[1]],p=1) * t(coef) ) ) / sum(Ti)
cov <- Wlist$W_m %*% mat_cov %*% t(Wlist$W_m) #A@5 this is the last formula on page 7
#eigenvalues and eigenfunctions
#--------------------------------------------
valeurs <- Eigen(cov)
valeurs_propres <- valeurs$values
vecteurs_propres <- valeurs$vectors
fonctionspropres <- fdobj
bj <- solve(Wlist$W_m) %*% vecteurs_propres
fonctionspropres$coefs <- bj
#calculations of scores by the formula for pca.fd
scores <- inprod(fdobj, fonctionspropres)
varprop <- valeurs_propres / sum(valeurs_propres)
pcafd <- list(valeurs_propres = valeurs_propres,
harmonic = fonctionspropres,
scores = scores,
covariance = cov,
U = bj,
meanfd = mean_fd
)
#-------------------------------------------
}
class(pcafd) <- "pca.fd"
return(pcafd)
} # end of .T_mypcat.fd1
.T_hddc_ari <- function(x, y){
#This function is drawn from the mclust package
x <- as.vector(x)
y <- as.vector(y)
tab <- table(x, y)
if ( all( dim(tab) == c(1, 1) ) ) return(1)
a <- sum( choose(tab, 2) )
b <- sum( choose(rowSums(tab), 2) ) - a
c <- sum( choose(colSums(tab), 2) ) - a
d <- choose(sum(tab), 2) - a - b - c
ARI <- (a - (a + b) * (a + c)/(a + b + c + d))/((a + b + a + c)/2 - (a + b) * (a + c)/(a + b + c + d))
return(ARI)
} # end of .T_hddc_ari
####
#### CONTROLS ####
####
.T_hddc_control = function(call){
prefix = "HDDC: "
.T_myCallAlerts(call, "data", "list,fd", 3, TRUE, prefix)
.T_myCallAlerts(call, "K", "integerVectorGE1", 3, FALSE, prefix) #
.T_myCallAlerts(call, "known", "intNAVectorGE1", 3, FALSE, prefix) #
.T_myCallAlerts(call, "dfstart", "singleIntegerGE2", 3, FALSE, prefix)
.T_myCallAlerts(call, "dfupdate", "singleCharacter", 3, FALSE, prefix)
.T_myCallAlerts(call, "dfconstr", "singleCharacter", 3, FALSE, prefix)
.T_myCallAlerts(call, "model", "characterVector", 3, FALSE, prefix)
.T_myCallAlerts(call, "threshold", "numericVectorGE0LE1", 3, FALSE, prefix)
.T_myCallAlerts(call, "criterion", "character", 3, FALSE, prefix)
.T_myCallAlerts(call, "com_dim", "singleIntegerGE1", 3, FALSE, prefix)
.T_myCallAlerts(call, "itermax", "singleIntegerGE2", 3, FALSE, prefix) # IAIN set to 2 iteration min
.T_myCallAlerts(call, "eps", "singleNumericGE0", 3, FALSE, prefix)
.T_myCallAlerts(call, "graph", "singleLogical", 3, FALSE, prefix)
.T_myCallAlerts(call, "d_select", "singleCharacter", 3, FALSE, prefix)
.T_myCallAlerts(call, "init", "singleCharacter", 3, FALSE, prefix)
.T_myCallAlerts(call, "show", "singleLogical", 3, FALSE, prefix)
.T_myCallAlerts(call, "mini.nb", "integerVectorGE1", 3, FALSE, prefix)
.T_myCallAlerts(call, "min.individuals", "singleIntegerGE2", 3, FALSE, prefix)
.T_myCallAlerts(call, "noise.ctrl", "singleNumericGE0", 3, FALSE, prefix)
.T_myCallAlerts(call, "mc.cores", "singleIntegerGE1", 3, FALSE, prefix)
.T_myCallAlerts(call, "nb.rep", "singleIntegerGE1", 3, FALSE, prefix)
.T_myCallAlerts(call, "keepAllRes", "singleLogical", 3, FALSE, prefix)
.T_myCallAlerts(call, "d_max", "singleIntegerGE1", 3, FALSE, prefix)
.T_myCallAlerts(call, "d_range", "integerVectorGE1", 3, FALSE, prefix)
.T_myCallAlerts(call, "verbose", "singleLogical", 3, FALSE, prefix)
####
#### SPECIFIC controls
####
# Getting some elements
data = eval.parent(call[["data"]], 2)
K = eval.parent(call[["K"]], 2)
known = eval.parent(call[["known"]], 2)
init = eval.parent(call[["init"]], 2)
criterion = eval.parent(call[["criterion"]], 2)
d_select = eval.parent(call[["d_select"]], 2)
data_length = 0
N = 0
d_range = eval.parent(call[["d_range"]], 2)
if (is.fd(data)){
# No NA in the data:
if (any(is.na(data$coefs))) stop("HDDC: NA values in the data are not supported. Please remove them beforehand.", call. = FALSE)
if (any(!is.numeric(data$coefs))) stop("HDDC: Only numeric values are supported in fdata object. Please check coefficients.", call. = FALSE)
if (any(!is.finite(data$coefs))) stop("HDDC: Only finite values are supported in fdata object. Please check coefficients.", call. = FALSE)
# Size of the data
if(any(K>2*NROW(data$coefs))) stop("HDDC: The number of observations must be at least twice the number of clusters.", call. = FALSE)
data_length <- nrow(data$coefs)
N <- ncol(data$coefs)
}else{
# No NA in the data:
for (i in 1:length(data)){
if(!is.fd(data[[i]])) stop("HDDC: All dimensions of data must be fd object.", call. = FALSE)
if (any(is.na(data[[i]]$coefs))) stop("HDDC: NA values in the data are not supported. Please remove them beforehand.", call. = FALSE)
if (any(!is.numeric(data[[i]]$coefs))) stop("HDDC: Only numeric values are supported in fdata object. Please check coefficients.", call. = FALSE)
if (any(!is.finite(data[[i]]$coefs))) stop("HDDC: Only finite values are supported in fdata object. Please check coefficients.", call. = FALSE)
data_length <- data_length + nrow(data[[i]]$coefs)
}
# Size of the data
if(any(K>2*NROW(data[[1]]$coefs))) stop("HDDC: The number of observations must be at least twice the number of clusters ", call. = FALSE)
N <- ncol(data[[1]]$coefs)
}
# Initialization Controls
if(!is.null(init)){
# we get the value of the initialization
init = .T_myAlerts(init, "init", "singleCharacterMatch.arg", "HDDC: ", c('random', 'kmeans', 'tkmeans', 'mini-em', 'vector'))
# Custom initialization => controls and setup
if(init == "vector"){
fdobj = data
if (!inherits(fdobj, 'list')) {x = t(fdobj$coefs)}
else {x = t(fdobj[[1]]$coefs); for (i in 2:length(fdobj)) x = cbind(x,t(fdobj[[i]]$coefs))}
.T_myCallAlerts(call, "init.vector", "(integer,factor)Vector", 3, FALSE, prefix)
init.vector = eval.parent(call[["init.vector"]], 2)
if(is.null(init.vector)) stop("HDDC: When init='vector', the argument 'init.vector' should be provided.", call. = FALSE)
if(length(unique(K))>1) stop("HDDC: Several number of classes K cannot be estimated when init='vector'.", call. = FALSE)
init.vector <- unclass(init.vector)
if(K!=max(init.vector)) stop("HDDC: The number of class K, and the number of classes in the initialization vector are different", call. = FALSE)
if( length(init.vector)!=nrow(x) ) stop("HDDC: The size of the initialization vector is different of the size of the data", call. = FALSE)
}
# The param init A@5 is this even implemented????
if (init=='param' && nrow(data)<ncol(data)){
stop("HDDC: The 'param' initialization can't be done when N<p", call. = FALSE)
}
# The mini.em init
if (init=='mini-em'){
mini.nb = eval.parent(call[["mini.nb"]], 2)
if(!is.null(mini.nb) && length(mini.nb)!=2){
stop("HDDC: The parameter mini.nb must be a vector of length 2 with integers\n", call. = FALSE)
}
}
}
if(!is.null(d_select) && d_select == 'grid') {
if(!is.null(d_range) && (max(d_range) > data_length)) stop("HDDC: Intrinsic dimension 'd' cannot be larger than number of input parameters. Please set a lower max.", call. = FALSE)
}
if(!is.null(known)) {
if(all(is.na(known))) stop("HDDC: declared 'known' must have known values from each class (not all NA).", call. = F)
if(length(known) != N) stop("HDDC: 'known' length must match the number of observations from data (known may include NA for observations where groups are unknown).", call. = F)
if(length(K) > 1) stop("HDDC: only one group count can be used since known must have values for each group.", call. = F)
if(length(unique(known[!is.na(known)])) > K) stop("HDDC: at most K different classes may be passed to 'known'.", call. = F)
if(max(known, na.rm = T) > K) stop("HDDC: group numbers must come from integers up to K (ie. for K = 3 integers are from 1, 2, 3).", call. = F)
}
}
.T_default_kmeans_control = function(control){
.T_myAlerts(control,"kmeans.control","list","kmeans controls: ")
#
# Default values of the control parameters
#
myDefault = list()
myDefault$iter.max = 10
myDefault$nstart = 1
myDefault$algorithm = c("Hartigan-Wong", "Lloyd", "Forgy","MacQueen")
myDefault$trace = FALSE
myDefault$alpha = 0.2
#
# Types of each arg
#
myTypes = c("singleIntegerGE1", "singleIntegerGE1", "match.arg",
"singleLogical", "singleIntegerGE1")
#
# Recreation of the kmeans controls + Alerts
#
control = .T_matchTypeAndSetDefault(control, myDefault,
myTypes, "kmeans list of controls: ")
return(control)
}
#=================================#
# This file contains all the
# "control" functions
#=================================#
# Possible elements of myAlerts:
#
# /* BEGIN FROM FUNHDDC (UNMODIFIED) */
# /*
# * Authors: Bouveyron, C. Jacques, J.
# * Date Taken: 2022-01-01
# * Original Source: funHDDC (unmodified)
# * Address: https://github.com/cran/funHDDC
# *
# */
.T_myCallAlerts = function(call, name, myType, nParents=1, mustBeThere=FALSE, prefix=""){
# This function basically calls the function myAlerts, but the arguments are different
if( name %in% names(call) ){
# we check the element exists => to provide a fine error
what = call[[name]]
val = try(eval.parent(what, nParents), silent = TRUE)
# browser()
if( "try-error" %in% class(val) ){
if( inherits(what, 'name') ){
# it means the variable was not found
stop(prefix,"For argument '",name,"': object '",what,"' not found.", call. = FALSE)
} else {
stop(prefix,"For argument '",name,"': expression ",as.character(as.expression(what))," could not be evaluated.", call. = FALSE)
}
} else {
a = .T_myAlerts(val, name, myType, prefix)
return(a)
}
} else if(mustBeThere) {
stop(prefix, "The argument '", name, "' must be provided.", call. = FALSE)
}
}
.T_myAlerts = function(x, name, myType, prefix="", charVec){
# Format of my types:
# - single => must be of lenght one
# - Vector => must be a vector
# - Matrix => must be a matrix
# - GE/GT/LE/LT: greater/lower than a given value
# - predefinedType => eg: numeric, integer, etc
# - match.arg => very specific => should match the charVec
# If there is a parenthesis => the class must be of specified types:
# ex: "(list, data.frame)" must be a list of a data.frame
ignore.case = TRUE
firstMsg = paste0(prefix,"The argument '",name,"' ")
# simple function to extract a pattern
# ex: if my type is VectorIntegerGE1 => myExtract("GE[[:digit:]]+","VectorIntegerGE1") => 1
myExtract = function(expr, text, trim=2){
start = gregexpr(expr,text)[[1]] + trim
length = attr(start,"match.length") - trim
res = substr(text,start,start+length-1)
as.numeric(res)
}
#
# General types handling
#
loType = tolower(myType)
if(grepl("single",loType)){
if(length(x)!=1) stop(firstMsg,"must be of length one.", call. = FALSE)
}
if(grepl("vector",loType) && !grepl("factor",loType)){
if(!is.vector(x)) stop(firstMsg,"must be a vector.", call. = FALSE)
if(is.list(x)) stop(firstMsg,"must be a vector (and not a list).", call. = FALSE)
}
res = .T_checkTheTypes(loType, x)
if(!res$OK) stop(firstMsg,res$message, call. = FALSE)
# GE: greater or equal // GT: greater than // LE: lower or equal // LT: lower than
if(grepl("ge[[:digit:]]+",loType)){
n = myExtract("ge[[:digit:]]+", loType)
if( !all(x>=n, na.rm = T) ) stop(firstMsg,"must be greater than, or equal to, ", n,".", call. = FALSE)
}
if(grepl("gt[[:digit:]]+",loType)){
n = myExtract("gt[[:digit:]]+", loType)
if( !all(x>n, na.rm = T) ) stop(firstMsg,"must be strictly greater than ", n,".", call. = FALSE)
}
if(grepl("le[[:digit:]]+",loType)){
n = myExtract("le[[:digit:]]+", loType)
if( !all(x<=n, na.rm = T) ) stop(firstMsg,"must be lower than, or equal to, ",n,".", call. = FALSE)
}
if(grepl("lt[[:digit:]]+",loType)){
n = myExtract("lt[[:digit:]]+", loType)
if( !all(x<n, na.rm = T) ) stop(firstMsg,"must be strictly lower than ", n,".", call. = FALSE)
}
#
# Specific Types Handling
#
if(grepl("match.arg",loType)){
if(ignore.case){
x = toupper(x)
newCharVec = toupper(charVec)
} else {
newCharVec = charVec
}
if( is.na(pmatch(x, newCharVec)) ){
n = length(charVec)
if(n == 1){
msg = paste0("'",charVec,"'")
} else {
msg = paste0("'", paste0(charVec[1:(n-1)], collapse="', '"), "' or '",charVec[n],"'")
}
stop(firstMsg, "must be one of:\n", msg, ".", call. = FALSE)
} else {
qui = pmatch(x, newCharVec)
return(charVec[qui])
}
}
}
.T_matchTypeAndSetDefault = function(myList, myDefault, myTypes, prefix){
# This function:
# i) check that all the elements of the list are valid
# ii) put the default values if some values are missing
# iii) Gives error messages if some types are wrong
# This function obliges myList to be valid (as given by myDefault)
# 1) check that the names of the list are valid
if(is.null(myList)) myList = list()
list_names = names(myList)
if(length(list_names)!=length(myList) || any(list_names=="")){
stop(prefix,"The elements of the list should be named.", call. = FALSE)
}
obj_names = names(myDefault)
isHere = pmatch(list_names,obj_names)
if(anyNA(isHere)){
if(sum(is.na(isHere))==1) stop(prefix, "The following argument is not defined: ",paste(list_names[is.na(isHere)],sep=", "), call. = FALSE)
else stop(prefix, "The following arguments are not defined: ",paste(list_names[is.na(isHere)],sep=", "), call. = FALSE)
}
# 2) We set the default values and run Warnings
res = list()
for(i in 1:length(obj_names)){
obj = obj_names[i]
qui = which(isHere==i) # qui vaut le numero de l'objet dans myList
type = myTypes[i] # we extract the type => to control for "match.arg" type
if(length(qui)==0){
# we set to the default if it's missing
if(type == "match.arg") {
res[[obj]] = myDefault[[i]][1]
} else {
res[[obj]] = myDefault[[i]]
}
} else {
# we check the integrity of the value
val = myList[[qui]]
if(type == "match.arg"){
# If the value is to be a match.arg => we use our controls and not
# directly the one of the function match.arg()
charVec = myDefault[[i]]
.T_myAlerts(val, obj, "singleCharacterMatch.arg", prefix, charVec)
val = match.arg(val, charVec)
} else {
.T_myAlerts(val, obj, type, prefix)
}
res[[obj]] = val
}
}
return(res)
}
.T_checkTheTypes = function(str, x){
# This function takes in a character string describing the types of the
# element x => it can be of several types
# types that are controlled for:
allTypes = c("numeric", "integer", "character", "logical", "list", "data.frame", "matrix", "factor", "intna")
OK = FALSE
message = c()
for(type in allTypes){
if(grepl(type, str)){
# we add the type of the control
if(type == "intna") {
message = c(message, "integer or NA")
} else {
message = c(message, type)
}
if(type == "numeric"){
if(!OK & is.numeric(x)){
OK = TRUE
}
} else if(type == "integer"){
if(is.numeric(x) && (is.integer(x) || (all(is.finite(x)) && all(x%%1==0)))){ # IAIN added non finite condition
OK = TRUE
}
} else if(type == "intna"){
if(is.numeric(x[!is.na(x)]) && (is.integer(x[!is.na(x)]) || (all(is.finite(x[!is.na(x)])) && all(x[!is.na(x)]%%1==0)))){ # IAIN added non finite condition
OK = TRUE
}
} else if(type == "character"){
if(is.character(x)){
OK = TRUE
}
} else if(type == "logical"){
if(is.logical(x)){
OK = TRUE
}
} else if(type == "list"){
if(is.list(x)){
OK = TRUE
}
} else if(type == "data.frame"){
if(is.data.frame(x)){
OK=TRUE
}
} else if(type == "matrix"){
if(is.matrix(x)){
OK = TRUE
}
} else if(type == "factor"){
if(is.factor(x)){
OK = TRUE
}
}
}
if(OK) break
}
if(length(message) == 0) OK = TRUE #ie there is no type to be searched
else if(length(message) >= 3){
n = length(message)
message = paste0("must be of type: ", paste0(message[1:(n-1)], collapse = ", "), " or ", message[n], ".")
} else {
message = paste0("must be of type: ", paste0(message, collapse = " or "), ".")
}
return(list(OK=OK, message=message))
}
.T_hdclassif_dim_choice <- function(ev, n, method, threshold, graph, noise.ctrl, d_set){
# Selection of the intrinsic dimension
# browser()
N <- sum(n)
prop <- n/N
K = ifelse(is.matrix(ev), nrow(ev), 1)
# browser()
if(is.matrix(ev) && K>1){
p <- ncol(ev)
if(method=="cattell"){
dev <- abs(apply(ev, 1, diff))
max_dev <- apply(dev, 2, max, na.rm=TRUE)
dev <- dev/rep(max_dev, each=p-1)
d <- apply((dev>threshold)*(1:(p-1))*t(ev[, -1]>noise.ctrl), 2, which.max)
if(graph){
# return user settings to original
oldpar <- par(no.readonly = TRUE)
on.exit(par(oldpar))
op = par(mfrow=c(K*(K<=3)+2*(K==4)+3*(K>4 && K<=9)+4*(K>9), 1+floor(K/4)-1*(K==12)+1*(K==7)))
for(i in 1:K){
sub1 <- paste("Class #", i, ", d", i, "=", d[i], sep="")
Nmax <- max(which(ev[i, ]>noise.ctrl))-1
plot(dev[1:(min(d[i]+5, Nmax)), i], type="l", col="blue", main=paste("Cattell's Scree-Test\n", sub1, sep=""), ylab=paste("threshold =", threshold), xlab="Dimension", ylim=c(0, 1.05))
abline(h=threshold, lty=3)
points(d[i], dev[d[i], i], col='red')
}
par(op)
}
} else if(method=="bic"){
d <- rep(0, K)
if(graph) op = par(mfrow=c(K*(K<=3)+2*(K==4)+3*(K>4 && K<=9)+4*(K>9), 1*(1+floor(K/4)-1*(K==12)+1*(K==7))))
for (i in 1:K) {
B <- c()
Nmax <- max(which(ev[i, ]>noise.ctrl))-1
p2 <- sum(!is.na(ev[i, ]))
Bmax <- -Inf
for (kdim in 1:Nmax){
if ((d[i]!=0 & kdim>d[i]+10)) break
a <- sum(ev[i, 1:kdim])/kdim
b <- sum(ev[i, (kdim+1):p2])/(p2-kdim)
if (b<0 | a<0){
B[kdim] <- -Inf
} else {
L2 <- -1/2*(kdim*log(a)+(p2-kdim)*log(b)-2*log(prop[i])+p2*(1+1/2*log(2*pi))) * n[i]
B[kdim] <- 2*L2 - (p2+kdim*(p2-(kdim+1)/2)+1) * log(n[i])
}
if ( B[kdim]>Bmax ){
Bmax <- B[kdim]
d[i] <- kdim
}
}
if(graph){
plot(B, type='l', col=4, main=paste("class #", i, ", d=", d[i], sep=''), ylab='BIC', xlab="Dimension")
points(d[i], B[d[i]], col=2)
}
}
if(graph) par(op)
} else if(method=="grid"){
d <- d_set
}
} else{
ev <- as.vector(ev)
p <- length(ev)
if(method=="cattell"){
dvp <- abs(diff(ev))
Nmax <- max(which(ev>noise.ctrl))-1
if (p==2) d <- 1
else d <- max(which(dvp[1:Nmax]>=threshold*max(dvp[1:Nmax])))
diff_max <- max(dvp[1:Nmax])
if(graph){
plot(dvp[1:(min(d+5, p-1))]/diff_max, type="l", col="blue", main=paste("Cattell's Scree-Test\nd=", d, sep=''), ylab=paste("threshold =", threshold, sep=' '), xlab='Dimension', ylim=c(0, 1.05))
abline(h=threshold, lty=3)
points(d, dvp[d]/diff_max, col='red')
}
} else if(method=="bic"){
d <- 0
Nmax <- max(which(ev>noise.ctrl))-1
B <- c()
Bmax <- -Inf
for (kdim in 1:Nmax){
if (d!=0 && kdim>d+10) break
a <- sum(ev[1:kdim])/kdim
b <- sum(ev[(kdim+1):p])/(p-kdim)
if (b<=0 | a<=0) B[kdim] <- -Inf
else{
L2 <- -1/2*(kdim*log(a)+(p-kdim)*log(b)+p*(1+1/2*log(2*pi)))*N
B[kdim] <- 2*L2 - (p+kdim*(p-(kdim+1)/2)+1)*log(N)
}
if ( B[kdim]>Bmax ){
Bmax <- B[kdim]
d <- kdim
}
}
if(graph){
plot(B, type='l', col=4, main=paste("BIC criterion\nd=", d, sep=''), ylab='BIC', xlab="Dimension")
points(d, B[d], col=2)
}
}
}
return(d)
}
.T_hdclassift_bic <- function(par, p, dfconstr,data=NULL){
model <- par$model
K <- par$K
d <- par$d
b <- par$b
a <- par$a
mu <- par$mu
N <- par$N
prop <- par$prop
mux<-par$mux
if(length(b)==1){
#update of b to set it as variable dimension models
eps <- sum(prop*d)
n_max <- ncol(par$ev)
b <- b*(n_max-eps)/(p-eps)
b <- rep(b, length=K)
}
if (length(a)==1) a <- matrix(a, K, max(d))
else if (length(a)==K) a <- matrix(a, K, max(d))
else if (model=='AJBQD') a <- matrix(a, K, d[1], byrow=TRUE)
if(min(a, na.rm=TRUE)<=0 | any(b<0)) return(-Inf)
if (is.null(par$loglik)){
som_a <- c()
for (i in 1:K) som_a[i] <- sum(log(a[i, 1:d[i]]))
L <- -1/2*sum(prop * (som_a + (p-d)*log(b) - 2*log(prop)+ p*(1+log(2*pi))))*N
}
else L <- par$loglik[length(par$loglik)]
# add K or 1 parameters for nux in ro
if (dfconstr=="no")
ro <- K*(p+1)+K-1
else
ro <- K*p+K
tot <- sum(d*(p-(d+1)/2))
D <- sum(d)
d <- d[1]
to <- d*(p-(d+1)/2)
if (model=='AKJBKQKDK') m <- ro+tot+D+K
else if (model=='AKBKQKDK') m <- ro+tot+2*K
else if (model=='ABKQKDK') m <- ro+tot+K+1
else if (model=='AKJBQKDK') m <- ro+tot+D+1
else if (model=='AKBQKDK') m <- ro+tot+K+1
else if (model=='ABQKDK') m <- ro+tot+2
bic <- -(-2*L+m*log(N))
#calcul ICL
t = par$posterior
Z = ((t - apply(t, 1, max))==0) + 0
icl = bic - 2*sum(Z*log(t+1e-15))
return(list(bic = bic, icl = icl))
}
.T_hdc_getComplexityt = function(par, p, dfconstr){
model <- par$model
K <- par$K
d <- par$d
b <- par$b
a <- par$a
mu <- par$mu
prop <- par$prop
#@ add in ro K parameters for nuk or add 1 parameter if degrees equal
if (dfconstr=="no")
ro <- K*(p+1)+K-1
else
ro <- K*p+K
tot <- sum(d*(p-(d+1)/2))
D <- sum(d)
d <- d[1]
to <- d*(p-(d+1)/2)
if (model=='AKJBKQKDK') m <- ro+tot+D+K
else if (model=='AKBKQKDK') m <- ro+tot+2*K
else if (model=='ABKQKDK') m <- ro+tot+K+1
else if (model=='AKJBQKDK') m <- ro+tot+D+1
else if (model=='AKBQKDK') m <- ro+tot+K+1
else if (model=='ABQKDK') m <- ro+tot+2
return(m)
}
.T_hdc_getTheModel = function(model, all2models = FALSE){
# Function used to get the models from number or names
model_in = model
if(!is.vector(model)) stop("The argument 'model' must be a vector.")
if(anyNA(model)) stop("The argument 'model' must not contain any NA.")
ModelNames <- c("AKJBKQKDK", "AKBKQKDK", "ABKQKDK", "AKJBQKDK", "AKBQKDK", "ABQKDK", "AKJBKQKD", "AKBKQKD", "ABKQKD", "AKJBQKD", "AKBQKD", "ABQKD")
model = toupper(model)
if(length(model)==1 && model=="ALL"){
if(all2models) model <- 1:14
else return("ALL")
}
qui = which(model %in% 1:14)
model[qui] = ModelNames[as.numeric(model[qui])]
# We order the models properly
qui = which(!model%in%ModelNames)
if (length(qui)>0){
if(length(qui)==1){
msg = paste0("(e.g. ", model_in[qui], " is incorrect.)")
} else {
msg = paste0("(e.g. ", paste0(model_in[qui[1:2]], collapse=", or "), " are incorrect.)")
}
stop("Invalid model name ", msg)
}
# warning:
if(max(table(model))>1) warning("The model vector, argument 'model', is made unique (repeated values are not tolerated).")
mod_num <- c()
for(i in 1:length(model)) mod_num[i] <- which(model[i]==ModelNames)
mod_num <- sort(unique(mod_num))
model <- ModelNames[mod_num]
return(model)
}
####
#### Utilities ####
####
.T_addCommas = function(x) sapply(x, .T_addCommas_single )
.T_addCommas_single = function(x){
# This function adds commas for clarity for very long values of likelihood
if(!is.finite(x)) return(as.character(x))
s = sign(x)
x = abs(x)
decimal = x - floor(x)
if(decimal>0) dec_string = substr(decimal, 2, 4)
else dec_string = ""
entier = as.character(floor(x))
quoi = rev(strsplit(entier, "")[[1]])
n = length(quoi)
sol = c()
for(i in 1:n){
sol = c(sol, quoi[i])
if(i%%3 == 0 && i!=n) sol = c(sol, ",")
}
res = paste0(ifelse(s==-1, "-", ""), paste0(rev(sol), collapse=""), dec_string)
res
}
.T_repmat <- function(v,n,p){ #A@5 WHAT IS THIS???????????????????????
if (p==1){M = cbind(rep(1,n)) %*% v} #A@5 a matrix of column of v
else { M = matrix(rep(v,n),n,(length(v)*p),byrow=T)} # removed cat("!");
M
}
.T_diago <- function(v){
if (length(v)==1){ res = v }
else { res = diag(v)}
res
}
# /* END OF FROM FUNHDDC */
.T_imahalanobis <- function(x, muk, wk, Qk, aki) {
# w = fpcaobj[[i]]$W
# *************************************************************************** #
# should be called as imahalanobis(x, mu[i,], w[i,], Q[[i]], a[i,], b[i])
# return formula on top of page 9
# *************************************************************************** #
# Code modified to take vectors instead of matrices
p <- ncol(x)
N <- nrow(x)
res <- rep(0, N)
#X <- x - matrix(muk, N, p, byrow=TRUE)
# Qi <- wk %*% Qk
# xQi <- (X %*% Qi)
#proj <- (X %*% Qi) %*% aki
#the R result- no C code
# res_old <- rowSums(proj ^ 2)
# Calling C_imahalanobis
res <- .C(".C_imahalanobis", as.double(x), as.double(muk), as.double(wk),
as.double(Qk), as.double(aki), as.integer(p),
as.integer(N), as.integer(nrow(aki)), res = rep(0,N), PACKAGE="TFunHDDC")
return(res$res)
} # end of .T_imahalanobis
#********************************* TIMING FUNCTION *********************************#
# /*
# * Authors: Andrews, J. Wickins, J. Boers, N. McNicholas, P.
# * Date Taken: 2023-01-01
# * Original Source: teigen (modified)
# * Address: https://github.com/cran/teigen
# *
# */
.T_estimateTime <- function(stage, start.time, totmod, backspace = 0){
curwidth <- ifelse(backspace != 0, backspace, getOption("width"))
##Output enough backspace characters to erase the previous output progress information
##\b is used instead of \r due to RStudio's difficulty with handling carriage returns
# cat(paste(rep("\b", backspace), collapse = ""))
##the string that will eventually be output to the screen
output.string <- ""
##[short,med,long]string all are strings that will be output depending on size of the
##console. These strings will be one of two things, depending on if stage is "init" or not
if(stage=="init"){
medstring <- longstring <- "????"
shortstring <- "0"
modelcount <- NA
}
else{
for(i in 1:(backspace)) cat("\b")
# cat("\f")
modelcount <- stage+1 ##number of models calculated
modsleft <- totmod-modelcount
timerun <- difftime(Sys.time(),start.time, units="secs")
timeremain <- (timerun/modelcount)*modsleft
if(timeremain>60){
units(timeremain) <- "mins"
} else if(timeremain>3600){
units(timeremain) <- "hours "
} else if(timeremain>86400){
units(timeremain) <- "days"
} else if(timeremain>604800){
units(timeremain) <- "weeks "
}
## % complete
shortstring <- paste(format(round((1-modsleft/totmod)*100), width=3, justify="right"), sep = "")
##approx. time remaining
medstring <- paste(format(round(timeremain,1), width=4, justify="right"), sep = "")
##Time taken
longstring <- paste(format(round(timerun,1), width=4, justify="right"), sep = "")
}
##start to build up the output string depending on console size
if(curwidth>=15){
shortstring <- str_pad(shortstring, 5, pad=" ")
output.string <- paste(shortstring, "% complete", sep="")
##if larger console, output more information
if(curwidth>=48){
medstring <-str_pad(medstring, 10, pad=" ")
output.string <- paste("Approx. remaining:", medstring," | ", output.string, sep = "")
##if even larger console, output even more information
if(curwidth>=74){
longstring <- str_pad(longstring, 10, pad=" ")
output.string <- paste("Time taken:", longstring, " | ", output.string, sep = "")
}
}
}
cat(output.string)
flush.console()
## return model count and output string size
c(modelcount, nchar(output.string))
} # end of .T_estimateTime
######################################
###########################################
# prediction for classificati
predict.tfunHDDC <- function(object, data=NULL, ...){
#object is tfunHDDC
#fdobj is new data given as a data frame for new observations
#should be represented in the same basis as the data from object (p should be the same)
if (!inherits(object,"t-funHDDC")) {
stop("The first argument should be the output of a converged tfunHDDC()")
return(NULL)
}
if(is.null(data)){
data <- object$data
} else {
call <- match.call()
prefix = "HDDC: "
.T_myCallAlerts(call, "data", "list,fd", 3, TRUE, prefix)
}
Np=0
if (is.fd(data)){
# No NA in the data:
if (any(is.na(data$coefs))) stop("HDDC: NA values in the data are not supported. Please remove them beforehand.", call. = FALSE)
if (any(!is.numeric(data$coefs))) stop("HDDC: Only numeric values are supported in fdata object. Please check coefficients.", call. = FALSE)
if (any(!is.finite(data$coefs))) stop("HDDC: Only finite values are supported in fdata object. Please check coefficients.", call. = FALSE)
}else{
# No NA in the data:
for (i in 1:length(data)){
if(!is.fd(data[[i]])) stop("HDDC: All dimensions of data must be fd object.", call. = FALSE)
if (any(is.na(data[[i]]$coefs))) stop("HDDC: NA values in the data are not supported. Please remove them beforehand.", call. = FALSE)
if (any(!is.numeric(data[[i]]$coefs))) stop("HDDC: Only numeric values are supported in fdata object. Please check coefficients.", call. = FALSE)
if (any(!is.finite(data[[i]]$coefs))) stop("HDDC: Only finite values are supported in fdata object. Please check coefficients.", call. = FALSE)
}
}
## Here we should check if object is a tfunhddc object
if (is.fd(object$data)){
# Size of the data
Np <- nrow(object$data$coefs)
}else{
# Size of the data
Np <- nrow(object$data[[1]]$coefs); for (i in 2:length(object$data)) Np = Np + nrow(object$data[[i]]$coefs)
}
if (!inherits(data, 'list')) {
x <- t(data$coefs)
}
else {x = t(data[[1]]$coefs); for (i in 2:length(data)) x = cbind(x,t(data[[i]]$coefs))} #NOT THIS
p <- ncol(x)
N <- nrow(x)
if (Np!= p) stop("The new observations should be represented using the same base as for the training set")
K <- object$K
nux <- object$nux
a <- object$a
b <- object$b
mu <- object$mu
d <- object$d
prop <- object$prop
Q <- object$Q
Q1 <- object$Q1
Wki <- object$Wlist$W_m
dety<-object$Wlist$dety
b[b<1e-6] <- 1e-6
##################################################
#########################################################
###########################################
t <- matrix(0, N, K)
mah_pen <- matrix(0, N, K)
K_pen <- matrix(0, N, K)
num <- matrix(0, N, K)
s <- rep(0, K)
#############################aici am ajuns
#################################################
if (object$model=="AKJBKQKDK"){
for (i in 1:K) {
s[i] <- sum( log(a[i, 1:d[i]]) )
Qk <- Q1[[i]]
aki <- sqrt( diag( c( 1 / a[i, 1:d[i]], rep(1 / b[i], p - d[i]) ) ) )
muki <- mu[i, ]
mah_pen[, i] <- .T_imahalanobis(x, muki, Wki ,Qk, aki)
K_pen[, i] <- log(prop[i]) +
lgamma( (nux[i] + p) / 2) - (1 / 2) * (s[i] + (p - d[i]) * log(b[i]) -log(dety)) -
( ( p / 2) * ( log(pi) + log(nux[i]) ) +
lgamma(nux[i] / 2) + ( (nux[i] + p) / 2 ) *
(log(1 + mah_pen[, i] / nux[i]) ) )
}
}
else
{if (object$model=="AKJBQKDK"){for (i in 1:K) {
s[i] <- sum( log(a[i, 1:d[i]]) )
Qk <- Q1[[i]]
aki <- sqrt( diag( c( 1 / a[i, 1:d[i]], rep(1 / b[1], p - d[i]) ) ) )
muki <- mu[i, ]
mah_pen[, i] <- .T_imahalanobis(x, muki, Wki ,Qk, aki)
K_pen[, i] <- log(prop[i]) +
lgamma( (nux[i] + p) / 2) - (1 / 2) * (s[i] + (p - d[i]) * log(b[1]) -log(dety)) -
( ( p / 2) * ( log(pi) + log(nux[i]) ) +
lgamma(nux[i] / 2) + ( (nux[i] + p) / 2 ) *
(log(1 + mah_pen[, i] / nux[i]) ) )
}
} else {if (object$model=="AKBKQKDK"){for (i in 1:K) {
s[i] <- d[i]*log(a[i])
Qk <- Q1[[i]]
aki <- sqrt( diag( c( rep(1 / a[i], d[i]), rep(1 / b[i], p - d[i]) ) ) )
muki <- mu[i, ]
mah_pen[, i] <- .T_imahalanobis(x, muki, Wki ,Qk, aki)
K_pen[, i] <- log(prop[i]) +
lgamma( (nux[i] + p) / 2) - (1 / 2) * (s[i] + (p - d[i]) * log(b[i]) -log(dety)) -
( ( p / 2) * ( log(pi) + log(nux[i]) ) +
lgamma(nux[i] / 2) + ( (nux[i] + p) / 2 ) *
(log(1 + mah_pen[, i] / nux[i]) ) )
}
} else {if (object$model=="ABKQKDK"){for (i in 1:K) {
s[i] <- d[i]* log(a[1])
Qk <- Q1[[i]]
aki <- sqrt( diag( c( rep(1 / a[1], d[i]), rep(1 / b[i], p - d[i]) ) ) )
muki <- mu[i, ]
mah_pen[, i] <- .T_imahalanobis(x, muki, Wki ,Qk, aki)
K_pen[, i] <- log(prop[i]) +
lgamma( (nux[i] + p) / 2) - (1 / 2) * (s[i] + (p - d[i]) * log(b[i]) -log(dety)) -
( ( p / 2) * ( log(pi) + log(nux[i]) ) +
lgamma(nux[i] / 2) + ( (nux[i] + p) / 2 ) *
(log(1 + mah_pen[, i] / nux[i]) ) )
}
}else {if (object$model=="AKBQKDK"){for (i in 1:K) {
s[i] <- d[i]* log(a[i])
Qk <- Q1[[i]]
aki <- sqrt( diag( c( rep(1 / a[i], d[i]), rep(1 / b[1], p - d[i]) ) ) )
muki <- mu[i, ]
mah_pen[, i] <- .T_imahalanobis(x, muki, Wki ,Qk, aki)
K_pen[, i] <- log(prop[i]) +
lgamma( (nux[i] + p) / 2) - (1 / 2) * (s[i] + (p - d[i]) * log(b[1]) -log(dety)) -
( ( p / 2) * ( log(pi) + log(nux[i]) ) +
lgamma(nux[i] / 2) + ( (nux[i] + p) / 2 ) *
(log(1 + mah_pen[, i] / nux[i]) ) )
}
} else {if (object$model=="ABQKDK"){for (i in 1:K) {
s[i] <- d[i]* log(a[1])
Qk <- Q1[[i]]
aki <- sqrt( diag( c( rep(1 / a[1], d[i]), rep(1 / b[1], p - d[i]) ) ) )
muki <- mu[i, ]
mah_pen[, i] <- .T_imahalanobis(x, muki, Wki ,Qk, aki)
K_pen[, i] <- log(prop[i]) +
lgamma( (nux[i] + p) / 2) - (1 / 2) * (s[i] + (p - d[i]) * log(b[1]) -log(dety)) -
( ( p / 2) * ( log(pi) + log(nux[i]) ) +
lgamma(nux[i] / 2) + ( (nux[i] + p) / 2 ) *
(log(1 + mah_pen[, i] / nux[i]) ) )
}
}
}}}}}
kcon <- -apply(K_pen,1,max)
K_pen <- K_pen + kcon
num <- exp(K_pen)
t <- num / rowSums(num)
# if(any(is.nan(t))){
# break
# }
cls <- max.col(t)
list(t = t,
class=cls)
} # end of .predict.tfunHddc
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.