#' voxelwise neighborhood random forest segmentation and prediction
#'
#' Represents feature images as a neighborhood and uses the features
#' to build a random forest prediction from an image population
#'
#' @param y list of training label images, can be a factor or numeric vector
#' this can also be a regular old vector
#' @param x a list of lists where each list contains feature images
#' @param labelmasks a list of masks where each mask defines the image space
#' for the given list. that is, the nth mask indexes the nth feature set.
#' multi-label masks will try to balance sampling for each label.
#' @param rad vector of dimensionality d define nhood radius
#' @param nsamples (per subject to enter training)
#' @param ntrees (for the random forest model)
#' @param asFactors boolean - treat the y entries as factors
#' @param reduceFactor integer factor by which to reduce (imaging) data resolution
#' @param ... arguments to pass to \code{randomForest}
#' @return list a 4-list with the rf model, training vector, feature matrix
#' and the random mask
#' @author Avants BB, Tustison NJ, Pustina D
#'
#' @examples
#'
#' mask<-makeImage( c(10,10), 0 )
#' mask[ 3:6, 3:6 ]<-1
#' mask[ 5, 5:6]<-2
#' ilist<-list()
#' lablist<-list()
#' masklist<-list()
#' inds<-1:5
#' scl<-0.33 # a noise parameter
#' for ( predtype in c("label","scalar") )
#' {
#' for ( i in inds ) {
#' img<-antsImageClone(mask)
#' imgb<-antsImageClone(mask)
#' limg<-antsImageClone(mask)
#' if ( predtype == "label") { # 4 class prediction
#' img[ 3:6, 3:6 ]<-rnorm(16)*scl+(i %% 4)+scl*mean(rnorm(1))
#' imgb[ 3:6, 3:6 ]<-rnorm(16)*scl+(i %% 4)+scl*mean(rnorm(1))
#' limg[ 3:6, 3:6 ]<-(i %% 4)+1 # the label image is constant
#' }
#' if ( predtype == "scalar") {
#' img[ 3:6, 3:6 ]<-rnorm(16,1)*scl*(i)+scl*mean(rnorm(1))
#' imgb[ 3:6, 3:6 ]<-rnorm(16,1)*scl*(i)+scl*mean(rnorm(1))
#' limg<-i^2.0 # a real outcome
#' }
#' ilist[[i]]<-list(img,imgb) # two features
#' lablist[[i]]<-limg
#' masklist[[i]] = mask
#' }
#' rfm<-vwnrfs( lablist , ilist, masklist[[1]], rad=c(2,2) ) # use single mask
#' rfm<-vwnrfs( lablist , ilist, masklist, rad=c(2,2) )
#' if ( predtype == "label" )
#' print( sum( rfm$tv != predict(rfm$rfm) ) )
#' if ( predtype == "scalar" )
#' print( cor(as.numeric(rfm$tv) , as.numeric(predict(rfm$rfm) ) ) )
#' } # end predtype loop
#'
#' @export vwnrfs
vwnrfs <- function( y, x, labelmasks, rad=NA, nsamples=8,
ntrees=500, asFactors=TRUE, reduceFactor=1,
...) {
if ( ! usePkg("randomForest") )
stop("Please install the randomForest package, example: install.packages('randomForest')")
# one labelmask or many
useFirstMask=FALSE
if ( typeof(labelmasks) != "list" ) {
inmask = antsImageClone( labelmasks )
labelmasks=list()
for ( i in 1:length(x) ) labelmasks[[i]] = inmask
useFirstMask = TRUE
}
# set rad=0 if not defined
if ( all( is.na( rad ) ) ) rad<-rep(0, x[[1]][[1]]@dimension )
# check y type
yisimg<-TRUE
if ( typeof(y[[1]]) == "integer" | typeof(y[[1]]) == "double" ) yisimg<-FALSE
idim<-length(rad)
if ( idim != x[[1]][[1]]@dimension )
stop("vwnrfs: dimensionality does not match")
invisible(gc())
# initialize fm and tv to maximum potential size
ulabs<-sort( unique( c( as.numeric( labelmasks[[1]] ) ) ) )
ulabs<-ulabs[ ulabs > 0 ]
neigh = prod(rad*2+1) # neighborhood size we'll get from getNeighborhoodInMask
nsubj = length(x) # number of subjects
nfeats = length(x[[1]]) # number of features
tv<-rep( NA, nsamples*length(ulabs)*nsubj ) # Y for random forest
fm = matrix(nrow=length(tv) , ncol=neigh*nfeats ) # X for random forest
# fill tv and fm
fromrow = torow = 0
for ( i in 1:nsubj) {
xfactor = x[[i]]
yfactor = y[[i]]
labmaskfactor = antsImageClone(labelmasks[[i]])
# resample subject images with provided factor
if (reduceFactor != 1) {
subdim<-round( dim(labelmasks[[i]]) / reduceFactor )
subdim[ subdim < 2*rad+1 ] <- ( 2*rad+1 )[ subdim < 2*rad+1 ]
if (yisimg) yfactor<-resampleImage( y[[i]], subdim, useVoxels=1, interpType=as.numeric(asFactors) )
for ( k in 1:nfeats ) xfactor[[k]]<-resampleImage( xfactor[[k]], subdim, useVoxels=1, 0 )
if (i==1 | useFirstMask==F)
labmaskfactor = resampleImage(labmaskfactor, subdim, useVoxels=1,
interpType=as.numeric(asFactors) )
}
# get randmask, only once unless necessary
if (i==1 | useFirstMask==F) {
randmask = randomMask(labmaskfactor,nsamples=nsamples,perLabel=asFactors)
randvox = sum(randmask==1)
if ( randvox == 0 ) stop("error in input data - randmask ", i," is empty")
}
# which rows shall we fill
fromrow = torow+1
torow = fromrow + randvox - 1
# fill tv
if ( yisimg ) {
tv[ fromrow:torow ] = t(getNeighborhoodInMask( yfactor, randmask, rad*0, spatial.info=F, boundary.condition='image' ))
} else {
tv[ fromrow:torow ] = rep( yfactor, randvox )
}
fromcol = tocol = 0 # columns need reset for nfeats loop
for ( k in 1:nfeats ) {
# which columns shall we fill
fromcol = tocol+1
tocol = fromcol + neigh - 1
# get neighborhood
m1<-t(getNeighborhoodInMask( xfactor[[k]], randmask, rad,
spatial.info=FALSE, boundary.condition='image' ))
# make sure neiborhood is not out of image
if (any(is.na(m1)))
stop(paste('Neighborhood falling out of image for subject',i,'feature',k,'\n',
'Consider zero-padding images to increase neighborhood availability.'))
# put in fm
fm[fromrow:torow, fromcol:tocol] = m1
invisible(gc())
}
}
invisible(gc())
# prune tv and fm to non-NA rows
fm = fm[!is.na(tv),]
tv = tv[!is.na(tv)]
if ( asFactors ) tv<-factor( tv )
invisible(gc())
rfm <- randomForest::randomForest(
y=tv,x=fm, ntree = ntrees,
importance = FALSE, proximity = FALSE, keep.inbag = FALSE,
keep.forest = TRUE , na.action = na.omit, norm.votes=FALSE,
...)
invisible(gc())
return( list(rfm=rfm, tv=tv, fm=fm, randmask=randmask ) )
}
#' voxelwise neighborhood random forest prediction
#'
#' Takes a model created with vwnrfs and builds a prediction
#' based on similar features used to train vwnrfs
#'
#' @param rfm random forest model trained with vwnrfs with certain
#' number of features.
#' @param x a list of lists. Each list contains the list of feature
#' images required to predict a response or an image. The features
#' must be the same used during training. I.e., if you train on
#' T1 and T2 images, those should be the same features used for
#' prediction, in the same exact order for each subject.
#' @param labelmasks a list of masks where each mask defines the space
#' to predict from. These can be individual masks for each subject
#' (i.e., custom brain masks) or a single antsImage that will be used
#' for all subjects.
#' @param rad vector of dimensionality d define the neighborhood radius.
#' Must be the same radius with which the model was trained, i.e.,
#' c(1,1,1)
#' @param asFactors boolean - treat the y entries as factors. If this is
#' true, the prediction will be a classification, and the output will
#' produce images. If this is false, the prediction will be a regression,
#' and the output will produce a single response value.
#' @param voxchunk value of maximal voxels to predict at once. This value
#' is used to split the prediction into smaller chunks such that memory
#' requirements do not become too big.
#' @param reduceFactor value of resolution reduction (i.e., for 1mm voxels
#' and reduceFactor=3) the model will be trained on ~3mm images.
#' @return list a 2-list with the rf model, training vector, feature matrix
#' and the random mask
#' @author Pustina D
#'
#' @examples
#' ## Do not run
#' ## vwnrfs.predict(rfm, x=x, labelmasks=labelmasks,
#' ## rad=rad, asFactors=TRUE, voxchunk=voxchunk,
#' ## reduceFactor = mr)mask<-makeImage( c(10,10), 0 )
#' ## End do not run
#'
#' @export vwnrfs.predict
vwnrfs.predict = function(rfm, x, labelmasks, rad=NA,
asFactors=TRUE, voxchunk=30000,
reduceFactor = 1) {
if ( ! usePkg("randomForest") )
stop("Please install the randomForest package, example: install.packages('randomForest')")
# one labelmask or many
if ( typeof(labelmasks) != "list" ) {
inmask = antsImageClone( labelmasks )
labelmasks=list()
for ( i in 1:length(x) ) labelmasks[[i]] = inmask
}
neigh = prod(rad*2+1) # neighborhood size we'd get from getNeighborhoodInMask
nsubj = length(x) # number of subjects
nfeats = length(x[[1]]) # number of features
masterprobs = list() # this will have posterior probabilities
if (asFactors) seg = list() # this will have segmentations
if (!asFactors) response = list() # or responses
# predict each subject individually
for(i in 1:nsubj) {
xfactor = x[[i]]
labmaskfactor = antsImageClone(labelmasks[[i]])
# resample subject images with provided factor
if (reduceFactor != 1) {
subdim = round( dim(labmaskfactor) / reduceFactor )
subdim[ subdim < 2*rad+1 ] <- ( 2*rad+1 )[ subdim < 2*rad+1 ]
for ( k in 1:nfeats ) xfactor[[k]]<-resampleImage( xfactor[[k]], subdim, useVoxels=1, 0 )
labmaskfactor = resampleImage(labmaskfactor, subdim, useVoxels=1, interpType=as.numeric(asFactors) )
}
# initialize output images for this subject
if (asFactors) { nprob = length(levels(rfm$y))
} else { nprob=1 }
masterprobs[[i]] = list()
for (t in 1:nprob) masterprobs[[i]][[t]] = labmaskfactor*0
##
nchunks = round( sum(labmaskfactor!=0) / voxchunk )
if ( nchunks <= 2 ) nchunks=1
chunkmask = splitMask(labmaskfactor,nchunks)
for (ch in 1:nchunks) {
fm = matrix(nrow=sum(chunkmask==ch), ncol=neigh*nfeats) # initialize matrix to predict from
fromcol = tocol = 0 # reset this for the nfeats loop
binchunk = thresholdImage(chunkmask,ch,ch) # binary mask for this chunk only
for ( k in 1:nfeats ) {
# which columns shall we fill
fromcol = tocol+1
tocol = fromcol + neigh - 1
# get neighborhood
m1<-t(getNeighborhoodInMask( xfactor[[k]], binchunk, rad, spatial.info=F, boundary.condition='image' ))
# make sure neiborhood is not out of image
if (any(is.na(m1)))
stop(paste('Neighborhood falling out of image for subject',i,'feature',k,'\n',
'Consider padding the images with zero values to increase neighborhood availability.'))
# put in fm
fm[, fromcol:tocol] = m1
invisible(gc())
}
# predict this chunk
predtype<-'response'
if ( asFactors ) predtype<-'prob'
probs = t( predict( rfm ,newdata=fm, type=predtype) )
# fill masterprobs of this subject
for (m in 1:nprob) masterprobs[[i]][[m]][binchunk==1] = probs[m,]
rm(probs)
invisible(gc()) # clean up some memory
}
# create segmentation for this sub
if (asFactors) {
temp = imageListToMatrix( unlist(masterprobs[[i]]) , labmaskfactor )
temp = apply( temp, FUN=which.max, MARGIN=2)
seg[[i]] = makeImage( labmaskfactor , temp )
rm(temp); invisible(gc())
} else {
response[[i]] = apply( imageListToMatrix( unlist(masterprobs[[i]]) , labmaskfactor ), FUN=median, MARGIN=1 )
}
}
# return either image segmentation or response
if ( asFactors ) {
return( list( seg=seg, probs=masterprobs ) )
} else {
return( list( seg=response, probs=masterprobs ) )
}
}
#' split a mask into n labeled sub-masks
#'
#' @param mask antsImage mask
#' @param n number of mask chunks (if voxchunk is not set)
#' @param voxchunk number of voxels per chunk (if n is not set)
#' @return relabeledMask
#' @author Avants BB, Tustison NJ, Pustina D
#'
#' @examples
#' mask = getMask( antsImageRead( getANTsRData("r16" ) ) )
#' smask = splitMask( mask, 10 )
#'
#' @export splitMask
splitMask <- function( mask, n = NA, voxchunk = NA ) {
if ( is.na(n) & is.na(voxchunk) ) {
stop('Arguments n or voxchunk are required for splitMask')
}
if ( class(mask) != 'antsImage') stop('Mask must be a single antsImage')
hasvalues = mask >= 0.5
nnz = sum( hasvalues )
if ( is.numeric(n) ) {
# if voxchunk not set compute chunk size
nnz = sum( hasvalues )
voxchunk = round( nnz / n ) - 1
}
if (voxchunk > sum(mask>=0.5)) {
return(thresholdImage(mask, 0.5, Inf))
}
chunk.seq = seq(1, nnz, by=voxchunk )
chunk.seq[ length(chunk.seq) ] = nnz
voxels = rep(NA, sum(mask>=0.5) )
for ( ch in 1:( length(chunk.seq)-1 ) ) {
# set end of this chunk
chnxt = chunk.seq[ ch + 1 ] - 1
if ( ch == ( length(chunk.seq)-1 ) ) chnxt = nnz
voxels[ chunk.seq[ch]:chnxt ] = ch
}
smask = mask * 0
smask[mask>=0.5] = voxels
if ( sum( mask >= 0.5 ) != sum(smask >= 0.5 ) ) {
stop("submask non-zero entries should be the same as input mask" )
}
return( smask )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.