#' multiple resolution neighborhood random forest regression
#'
#' Represents feature images as a neighborhood across scales
#' to build a random forest prediction from an image population. A use case
#' for this function is to predict cognition from multiple image features, e.g.
#' from the voxelwise FA of the corpus callosum and, in parallel, voxelwise
#' measurements of the volume of the inferior frontal gyrus.
#'
#' @param y vector of scalar values or labels. if a factor, do classification,
#' otherwise regression.
#' @param x a list of lists where each list contains feature images
#' @param labelmasks a list of masks where each mask defines the image space
#' for the given list and the number of parallel predictors. more labels means
#' more predictors. alternatively, separate masks may be used for each feature
#' in which case this should be a list of lists. see examples.
#' @param rad vector of dimensionality d define nhood radius
#' @param nsamples (per subject to enter training)
#' @param multiResSchedule an integer vector defining multi-res levels
#' @param ntrees (for the random forest model)
#' @return list with a random forest model, a vector identifying which rows
#' correspond to which subjects and a prediction vector.
#' @author Avants BB, Tustison NJ
#'
#' @references Pustina, D, et al. Automated segmentation of chronic stroke
#' lesions using LINDA: Lesion Identification with Neighborhood Data Analysis,
#' Human Brain Mapping, 2016. (related work, not identical)
#'
#' @seealso \code{\link{getMultiResFeatureMatrix}} \code{\link{mrvnrfs}}
#' @examples
#'
#' mask <- makeImage(c(100, 100), 0)
#' mask[30:60, 30:60] <- 1
#' mask[35:45, 50:60] <- 2
#' ilist <- list()
#' masklist <- list()
#' inds <- 1:8
#' yvec <- rep(0, length(inds))
#' scl <- 0.33 # a noise parameter
#' for (i in inds) {
#' img <- antsImageClone(mask)
#' imgb <- antsImageClone(mask)
#' limg <- antsImageClone(mask)
#' img[3:6, 3:6] <- rnorm(16, 1) * scl * (i) + scl * mean(rnorm(1))
#' imgb[3:6, 3:6] <- rnorm(16, 1) * scl * (i) + scl * mean(rnorm(1))
#' ilist[[i]] <- list(img, imgb) # two features
#' yvec[i] <- i^2.0 # a real outcome
#' masklist[[i]] <- antsImageClone(mask)
#' }
#' r <- c(1, 1)
#' mr <- c(2, 0)
#' featMat <- getMultiResFeatureMatrix(ilist[[1]], masklist[[1]],
#' rad = r, , multiResSchedule = mr
#' )
#' rfm <- multiResRandomForestRegression(
#' yvec, ilist, masklist,
#' rad = r, multiResSchedule = mr
#' )
#' preds <- predict(rfm, newdata = featMat)
#' \dontrun{
#' # data: https://github.com/stnava/ANTsTutorial/tree/master/phantomData
#' fns <- Sys.glob("phantom*wmgm.jpg")
#' ilist <- imageFileNames2ImageList(fns)
#' masklist <- list()
#' flist <- list()
#' for (i in 1:length(fns))
#' {
#' # 2 labels means 2 sets of side by side predictors and features at each scale
#' locseg <- kmeansSegmentation(ilist[[i]], 2)$segmentation
#' masklist[[i]] <- list(locseg, locseg %>% thresholdImage(2, 2), locseg)
#' flist[[i]] <- list(
#' ilist[[i]], ilist[[i]] %>% iMath("Laplacian", 1),
#' ilist[[i]] %>% iMath("Grad", 1)
#' )
#' }
#' yvec <- factor(rep(c(1, 2), each = 4)) # classification
#' r <- c(1, 1)
#' mr <- c(2, 1, 0)
#' ns <- 50
#' trn <- c(1:3, 6:8)
#' ytrain <- yvec[trn]
#' ftrain <- flist[trn]
#' mtrain <- masklist[trn]
#' mrrfr <- multiResRandomForestRegression(ytrain, ftrain, mtrain,
#' rad = c(1, 1),
#' nsamples = ns, multiResSchedule = mr
#' )
#' mypreds <- rep(NA, length(fns))
#' mymode <- function(x) {
#' ux <- unique(x)
#' ux[which.max(tabulate(match(x, ux)))]
#' }
#' for (i in 4:5) # test set
#' {
#' fmat <- getMultiResFeatureMatrix(flist[[i]], masklist[[i]],
#' rad = r, multiResSchedule = mr, nsamples = ns
#' )
#' myp <- predict(mrrfr, newdata = fmat)
#' mypreds[i] <- mymode(myp) # get the most frequent observation
#' # use median or mean for continuous predictions
#' }
#' print("predicted")
#' print(mypreds[-trn])
#' print("ground truth")
#' print(yvec[-trn])
#' }
#' @export multiResRandomForestRegression
multiResRandomForestRegression <- function(
y,
x,
labelmasks,
rad = NA,
nsamples = 10,
multiResSchedule = c(0),
ntrees = 500) {
if (all(is.na(rad))) {
rad <- rep(0, x[[1]][[1]]@dimension)
}
idim <- length(rad)
if (idim != x[[1]][[1]]@dimension) {
stop("multiResRandomForestRegression: dimensionality does not match")
}
fm <- getMultiResFeatureMatrix(x[[1]], labelmasks[[1]],
rad = rad, multiResSchedule = multiResSchedule, nsamples = nsamples
)
if (length(labelmasks) > 1) {
for (kk in 2:length(labelmasks))
{
temp <- getMultiResFeatureMatrix(x[[kk]], labelmasks[[kk]],
rad = rad, multiResSchedule = multiResSchedule, nsamples = nsamples
)
fm <- rbind(fm, temp)
}
}
# getMultiResFeatureMatrix
if (usePkg("randomForest")) {
yExt <- rep(y, each = nsamples)
rfm <- randomForest::randomForest(
y = yExt, x = fm, ntree = ntrees,
importance = FALSE, proximity = FALSE, keep.inbag = FALSE,
keep.forest = TRUE, na.action = na.omit, norm.votes = FALSE
)
# need the forest for prediction
return(rfm)
} else {
stop("install the randomForest package")
}
}
#' build multiple resolution predictor matrix from feature images
#'
#' Represents feature images as a neighborhood across scales. each subject
#' gets a label image and feature list. these labels/features should be
#' the same type for all subjects. e.g each subject has a k-label image
#' where the labels cover the same anatomy and the feature images are the same.
#' for each label in a mask, produce a multi-resolution neighborhood
#' sampling from the data within the label, for a given feature. do this for
#' each feature. so, 2 labels will double the width of the predictor matrix.
#' an additional scale parameter or feature will increase the width. more
#' samples will increase the number of rows in the predictor matrix. the labels
#' allow one to collect predictors side-by-side from different parts of an
#' image-based feature set. future work will allow other covariates.
#'
#' @param x a list of feature images
#' @param labelmask the mask defines the image space for the associated feature
#' and increases the number of predictors. more labels means more
#' predictors. a different mask may be used for each feature, if desired, in
#' which case this should be a list of masks where the length of the list
#' is equal to the number of features.
#' @param rad vector of dimensionality d define nhood radius
#' @param multiResSchedule a vector of smoothing values
#' @param nsamples defines the number of samples to take for each label.
#' @return mat a matrix of predictors with n samples rows
#' @author Avants BB, Tustison NJ
#'
#' @examples
#'
#' img <- antsImageRead(getANTsRData("r16"))
#' seg <- kmeansSegmentation(img, 3)$segmentation
#' flist <- list(img, img %>% iMath("Grad"))
#' featMat <- getMultiResFeatureMatrix(flist, seg,
#' rad = c(1, 1),
#' multiResSchedule = c(2, 1), nsamples = 10
#' )
#' @export getMultiResFeatureMatrix
getMultiResFeatureMatrix <- function(
x,
labelmask,
rad = NA,
multiResSchedule = c(0),
nsamples = 10) {
set.seed(0) # set this in order to increase repeatability of sample call
# loop over features, labels and multires
for (featk in 1:length(x))
{
if (inherits(labelmask, "antsImage")) {
locmask <- (labelmask)
}
if (inherits(labelmask, "list")) {
locmask <- labelmask[[featk]]
}
ulabs <- sort(unique(locmask[locmask > 0]))
for (lab in ulabs)
{
randmask <- locmask * 0
ulabvec <- (as.array(locmask) == as.numeric(lab))
randvec <- rep(FALSE, length(ulabvec))
k <- min(c(nsamples, sum(ulabvec == TRUE)))
n <- sum(ulabvec == TRUE)
randvec[ulabvec == TRUE][sample(1:n)[1:k]] <- TRUE
randmask[randvec] <- 1
for (smlev in multiResSchedule)
{
if (!exists("testmat")) {
testmat <- t(getNeighborhoodInMask(
image = x[[featk]] %>% smoothImage(smlev), mask = randmask,
radius = rad, spatial.info = F, boundary.condition = "image"
))
} else {
testmat <- cbind(
testmat,
t(getNeighborhoodInMask(
image = x[[featk]] %>% smoothImage(smlev), mask = randmask,
radius = rad, spatial.info = F, boundary.condition = "image"
))
)
}
} # multires
} # labels
} # features
return(testmat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.