Nothing
selFWD <- function(X, G, ctrlLCA = controlLCA(), ctrlReg = controlReg(),
independence = FALSE, swap = FALSE, start = NULL, bicDiff = 0,
covariates = NULL, parallel = FALSE, checkG = TRUE,
verbose = interactive())
{
i <- NULL # avoid 'no visible binding for global variable'
varnames <- colnames(X)
N <- nrow(X)
M <- length(varnames)
# select clustering variables, thus G > 1
G <- setdiff(G, 1)
# store information during search and better printing
long <- varnames[which.max(nchar(varnames))]
info <- as.data.frame(
list(Iter = 111, Step = "Remove",
Var = if ( swap ) paste(long, "-", long) else long,
BICdiff = -1111.11, Decision = "Rejected"), stringsAsFactors = FALSE )
colnames(info) <- c("", "Step", " Variable", " BICdiff", " Decision")
# parallel and do operator
parallel <- if( is.logical(parallel) ) {
if ( parallel ) GA::startParallel(parallel) else FALSE
} else { GA::startParallel(parallel) }
on.exit( if(parallel) parallel::stopCluster(attr(parallel, "cluster")) )
`%DO%` <- if ( parallel ) foreach::`%dopar%` else foreach::`%do%`
# initialize clustering variables, using Dean, Raftery (2010) approach
if ( is.null(start) ) {
GG <- if ( checkG ) suppressWarnings( maxG(X, G) ) else G
fitStart <- fitLCA(X, G = max(GG), X = covariates, ctrlLCA = ctrlLCA)
vars <- sapply( lapply(fitStart$parameters$theta, function(t) apply(t, 2, var)), sum )
set <- names( sort(vars, decreasing = TRUE) )
crit <- TRUE
j <- 4 # at least 3 binary variables to identify a lca model
while ( crit ) {
start <- set[1:j]
val <- if ( checkG ) try( suppressWarnings(maxG(X[,start], 2)), silent = TRUE) else G
crit <- if ( !is.null(val) ) {
if ( val == 2 ) FALSE else TRUE
} else TRUE
j <- j + 1
}
clus <- varnames[ sort(match(start, varnames)) ]
} else {
clus <- start
}
noclus <- setdiff(varnames, clus)
# model on clustering variables
GG <- 2
clusMod <- fitLCA(X[,clus], GG, X = covariates, ctrlLCA = ctrlLCA)
bicClusMod <- clusMod$bic
if ( verbose ) cat(" Starting clustering set: \n", clus, "\n\n")
# variable selection------------------------------------------------------
crit <- TRUE
toRem <- toAdd <- NULL
# zero <- (.Machine$double.eps)^(1/3)
zero <- bicDiff
first <- TRUE
iter <- 0
while ( crit ) {
# add step.......................................................
#
# do not add the variable just removed
Noclus <- if ( !is.null(toRem) ) {
if ( is.na(match(toRem, noclus)) ) noclus else noclus[ -match(toRem, noclus) ]
} else noclus
#
if ( length(Noclus) != 0 ) { # there are no variables to add
out <- foreach::foreach( i = Noclus, .combine = "rbind", .errorhandling = "remove",
.final = function(o) matrix(o, ncol = 2) ) %DO% {
set <- c(clus, i) # clustering set after adding variable j
bicReg <- regressionStep( y = X[,i], X = X[,clus], ctrlReg = ctrlReg,
independence = independence )
GG <- if ( checkG ) suppressWarnings( maxG(X[,set], G) ) else G
modClus <- if ( length(GG) > 0 )
fitLCA( X[,set], G = GG, X = covariates, ctrlLCA = ctrlLCA )$bic else NA
return( c(bicReg, modClus) )
}
iter <- iter + 1
rownames(out) <- Noclus
bicNoClus <- bicClusMod + out[,1]
bicAdd <- out[,2]
bicAddDiff <- bicAdd - bicNoClus
if ( any(!is.na(bicAddDiff)) ) {
# variable proposed for adding
best <- which.max(bicAddDiff)
Max <- max(bicAddDiff, na.rm = TRUE)
toAdd <- names(bicAddDiff)[best]
# add
info[iter+1,1] <- iter
info[iter+1,2] <- "Add"
info[iter+1,3] <- toAdd
info[iter+1,4] <- Max
if ( bicAddDiff[best] > zero ) {
info[iter+1,5] <- "Accepted"
added <- TRUE
bicClusMod <- bicAdd[best]
j <- match(toAdd, noclus)
clus <- c(clus, toAdd)
noclus <- noclus[-j]
if ( verbose ) monitor(info, iter)
} else {
info[iter+1,5] <- "Rejected"
added <- FALSE
toAdd <- NULL
if ( verbose ) monitor(info, iter)
if ( !added & first & !swap ) break
}
} else {
info[iter+1,1] <- iter
info[iter+1,2] <- "Add"
info[iter+1,3] <- NA
info[iter+1,4] <- NA
added <- FALSE
toAdd <- NULL
if ( verbose ) monitor(info, iter)
}
} else { # if ( length(Noclus) != 0 )
info[iter+1,1] <- iter
info[iter+1,2] <- "Add"
info[iter+1,3] <- NA
info[iter+1,4] <- NA
added <- FALSE
toAdd <- NULL
if ( verbose ) monitor(info, iter)
}
first <- FALSE
#................................................................
# swap step......................................................
if ( swap ) {
iter <- iter + 1
srt <- sort(bicAddDiff, decreasing = TRUE)
# if ( length(srt) > 1 ) {
if ( length(srt) > 1 | (length(srt) >= 1 & !added) ) {
if ( length(noclus) != 0 ) {
# which variable in the non-clustering set do we swap with all the ones in the clustering set?
if ( added ) {
# the variable to swap is the second with the largest evidence
toSwap <- names(srt[2])
} else {
# or the first if none has removed
toSwap <- names(srt[1])
}
indSwap <- match(toSwap, noclus)
# swap variable 'toSwap' in the non-clustering set with one of the clustering set
bicReg <- regressionStep( y = X[,toSwap], X = X[,clus], ctrlReg = ctrlReg )
out <- foreach::foreach( i = clus, .combine = "rbind", .errorhandling = "remove",
.final = function(o) matrix(o, ncol = 2) ) %DO% {
j <- match(i, clus)
# clustering set changing 'toSwap' with one of the clustering set
set <- c( toSwap, clus[-j] )
bicRegSwap <- regressionStep( y = X[,i], X = X[,set], ctrlReg = ctrlReg,
independence = independence )
GG <- if ( checkG ) suppressWarnings( maxG(X[,set], G) ) else G
modClus <- if ( length(GG) > 0 )
fitLCA( X[,set], G = GG, X = covariates, ctrlLCA = ctrlLCA )$bic else NA
return( c(bicRegSwap, modClus) )
}
rownames(out) <- clus
bicSwapClus <- out[,2] + out[,1] # bic with 'toSwap' in the clustering set
bicSwapNoClus <- bicClusMod + bicReg # bic with 'toSwap' in the non clustering set
bicSwapDiff <- bicSwapClus - bicSwapNoClus
if ( any(!is.na(bicSwapDiff)) ) {
# variable in 'clus' proposed for swapping with 'toSwap'
best <- which.max(bicSwapDiff)
Max <- max(bicSwapDiff, na.rm = TRUE)
toSwapOut <- names(bicSwapDiff)[best]
# swap
info[iter+1,1] <- iter
info[iter+1,2] <- "Swap"
info[iter+1,3] <- paste0(toSwapOut, "-", toSwap)
info[iter+1,4] <- Max
if ( Max > zero ) {
info[iter+1,5] <- "Accepted"
swapped <- TRUE
bicClusMod <- out[best,2]
j <- match(toSwapOut, clus)
clus <- c( clus[-j], toSwap )
noclus <- c(noclus[-indSwap], toSwapOut)
if ( verbose ) monitor(info, iter)
} else {
info[iter+1,5] <- "Rejected"
swapped <- FALSE
if ( verbose ) monitor(info, iter)
}
} else {
info[iter+1,1] <- iter
info[iter+1,2] <- "Swap"
info[iter+1,3] <- NA
info[iter+1,4] <- NA
swapped <- FALSE
if ( verbose ) monitor(info, iter)
}
} else swapped <- FALSE
} else {
info[iter+1,1] <- iter
info[iter+1,2] <- "Swap"
info[iter+1,3] <- NA
info[iter+1,4] <- NA
swapped <- FALSE
if ( verbose ) monitor(info, iter)
}
} else swapped <- FALSE
#................................................................
# removal step...................................................
#
# do not remove the variable just added
Clus <- if ( !is.null(toAdd) ) {
if ( is.na(match(toAdd, clus)) ) clus else clus[ -match(toAdd, clus) ]
} else clus
out <- foreach::foreach( i = Clus, .combine = "rbind", .errorhandling = "remove",
.final = function(o) matrix(o, ncol = 2) ) %DO% {
j <- match(i, Clus)
set <- Clus[-j] # clustering set after removing variable j
bicReg <- regressionStep( y = X[,i], X = X[,set], ctrlReg = ctrlReg,
independence = independence )
GG <- if ( checkG ) suppressWarnings( maxG(X[,set], G) ) else G
modNoClus <- if ( length(GG) > 0 )
fitLCA( X[,set], G = GG, X = covariates, ctrlLCA = ctrlLCA )$bic else NA
return( c(bicReg, modNoClus) )
}
iter <- iter + 1
rownames(out) <- Clus
bicRem <- rowSums(out)
if ( any( !is.na(bicRem) ) ) {
# variable proposed for removal
best <- which.max(bicRem)
Max <- max(bicRem, na.rm = TRUE)
toRem <- names(bicRem)[best]
bicRemDiff <- Max - bicClusMod
# remove
info[iter+1,1] <- iter
info[iter+1,2] <- "Remove"
info[iter+1,3] <- toRem
info[iter+1,4] <- bicRemDiff
if ( bicRemDiff > zero ) {
info[iter+1,5] <- "Accepted"
removed <- TRUE
j <- match(toRem, clus)
clus <- clus[-j]
noclus <- c(noclus, toRem)
bicClusMod <- out[best,2]
if ( verbose ) monitor(info, iter)
} else {
info[iter+1,5] <- "Rejected"
removed <- FALSE
toRem <- NULL
if ( verbose ) monitor(info, iter)
if ( length(clus) == M ) break # all variables are selected
}
} else {
info[iter+1,1] <- iter
info[iter+1,2] <- "Remove"
info[iter+1,3] <- NA
info[iter+1,4] <- NA
removed <- FALSE
toRem <- NULL
if ( verbose ) monitor(info, iter)
}
#................................................................
# swap step......................................................
if ( swap ) {
iter <- iter + 1
srt <- sort(bicRem, decreasing = TRUE)
# if ( length(srt) > 1 ) {
if ( length(srt) > 1 | (length(srt) >= 1 & !removed) ) {
# which variable in the clustering set do we swap with all the ones in the non-clustering set?
if ( removed ) {
# the variable to swap is the second with the largest evidence
toSwap <- names(srt[2])
} else {
# or the first if none has removed
toSwap <- names(srt[1])
}
indSwap <- match(toSwap, clus)
# swap variable 'toSwap' in the clustering set with one of the non-clustering set
out <- foreach::foreach( i = noclus, .combine = "rbind", .errorhandling = "remove",
.final = function(o) matrix(o, ncol = 3) ) %DO% {
# clustering set changing 'toSwap' with one of the non-clustering set
set <- c( clus[-indSwap], i )
bicReg <- regressionStep( y = X[,i], X = X[,clus], ctrlReg = ctrlReg,
independence = independence )
bicRegSwap <- regressionStep( y = X[,toSwap], X = X[,set], ctrlReg = ctrlReg )
GG <- if ( checkG ) suppressWarnings( maxG(X[,set], G) ) else G
modNoClus <- if ( length(GG) > 0 )
fitLCA( X[,set], G = GG, X = covariates, ctrlLCA = ctrlLCA )$bic else NA
return( c(bicReg, bicRegSwap, modNoClus) )
}
rownames(out) <- noclus
bicSwapClus <- bicClusMod + out[,1] # bic with 'toSwap' in the clustering set
bicSwapNoClus <- out[,3] + out[,2] # bic with 'toSwap' in the non clustering set
bicSwapDiff <- bicSwapNoClus - bicSwapClus
if ( any( !is.na(bicSwapDiff) ) ) {
# variable in 'noclus' proposed for swapping with 'toSwap'
best <- which.max(bicSwapDiff)
Max <- max(bicSwapDiff, na.rm = TRUE)
toSwapIn <- names(bicSwapDiff)[best]
# swap
info[iter+1,1] <- iter
info[iter+1,2] <- "Swap"
info[iter+1,3] <- paste0(toSwap, "-", toSwapIn)
info[iter+1,4] <- Max
if ( Max > zero ) {
info[iter+1,5] <- "Accepted"
swapped <- TRUE
bicClusMod <- out[best,3]
j <- match(toSwap, clus)
clus <- c( clus[-j], toSwapIn )
noclus <- c(noclus[-best], toSwap)
if ( verbose ) monitor(info, iter)
} else {
info[iter+1,5] <- "Rejected"
swapped <- FALSE
if ( verbose ) monitor(info, iter)
}
} else{
info[iter+1,1] <- iter
info[iter+1,2] <- "Swap"
info[iter+1,3] <- NA
info[iter+1,4] <- NA
swapped <- FALSE
if ( verbose ) monitor(info, iter)
}
} else {
info[iter+1,1] <- iter
info[iter+1,2] <- "Swap"
info[iter+1,3] <- NA
info[iter+1,4] <- NA
swapped <- FALSE
if ( verbose ) monitor(info, iter)
}
} else swapped <- FALSE
#................................................................
# check
crit <- ( (removed | added | swapped) & (length(clus) >= 3) )
# we need at least 3 binary variables to identify a 2-class LCA model
}
#-------------------------------------------------------------------------
# stop parallel computing
# if ( parallel ) parallel::stopCluster( attr(parallel, "cluster") )
# fit LCA model on selected variables
clus <- varnames[ sort(match(clus, varnames)) ]
GG <- GG <- if ( checkG ) suppressWarnings( maxG(X[,clus], G) ) else G
mod <- fitLCA( X[,clus], G = GG, X = covariates, ctrlLCA = ctrlLCA )
rownames(info) <- info[,1]
info <- info[,-1]
return( list(variables = clus, model = mod, info = info[-1,],
search = "forward", swap = swap, independence = independence) )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.