# R/glmDenoiseR.R In stnava/RKRNS: ANTsR Decoding

#### Documented in glmDenoiseR

```#' Optimize a regression model for BOLD based on cross-validated model fitting
#'
#' Inspired by discussion with Kendrick Kay regarding his glm denoise tool
#' http://journal.frontiersin.org/Journal/10.3389/fnins.2013.00247/abstract 0.
#' estimate hrf using assumed function or finite impulse response (FIR) 1.
#' regressors include: design + trends + noise-pool 2. find noise-pool by
#' initial cross-validation without noise regressors 3. cross-validate
#' predictions using different numbers of noise regressors 4. select best n for
#' predictors from noise pool 5. return the noise mask and the value for n
#'
#'
#' @param boldmatrix input raw bold data in time by space matrix
#' @param designmatrix input design matrix - binary/impulse entries for event
#' related design, blocks otherwise
#' @param hrfBasis basis function for assumed HRF otherwise use FIR
#' @param hrfShifts n-shifts of assumed hrf - shifts by 1 or, for FIR, length
#' of estimated HRF
#' @return returns a list with relevant output
#' @author Avants BB
#' @examples
#'
#' # get example image
#' fn<-paste(path.package("RKRNS"),"/extdata/subaal.nii.gz",sep="")
#' boldImage<-bb\$simbold
#' runs<-bb\$desmat\$Run;
#' # finite impulse response
#' hrfbasislength<-20
#' dd<-glmDenoiseR( mat, bb\$desmat[,1:4], hrfBasis=NA, hrfShifts = hrfbasislength,
#'   crossvalidationgroups=runs, maxnoisepreds=c(0,1,4,6,10,14) , selectionthresh=0.1 ,
#'   collapsedesign=F, polydegree=4 )
#' # average of assumed HRFs
#' tr<-1
#' a1<-4
#' a2<-10
#' hrf<-hemodynamicRF( hrfbasislength, onsets=2,
#'   durations=tr, rt=tr,cc=0.1,a1=a1,a2=a2,b1=0.9, b2=0.9 )
#' plot(ts(hrf))
#' dd2<-glmDenoiseR( mat, bb\$desmat[,1:4], hrfBasis=hrf, hrfShifts = 0 ,
#'   crossvalidationgroups=runs, debug=T,
#'   maxnoisepreds=4 , selectionthresh=0.1 , collapsedesign=T, polydegree=4 )
#' # or refine FIR
#' dd3<-glmDenoiseR( mat, bb\$desmat[,1:4], hrfBasis=shift(dd\$hrf,-2), hrfShifts = 4 , crossvalidationgroups=runs,
#'   maxnoisepreds=0:2 , selectionthresh=0.1 , collapsedesign=T, polydegree=4 )
#'
glmDenoiseR <- function( boldmatrix, designmatrixIn , hrfBasis=NA, hrfShifts=4,
selectionthresh=0.1, maxnoisepreds=1:12, collapsedesign=TRUE,
debug=FALSE, polydegree=4 ,
crossvalidationgroups=4, denoisebyrun=TRUE,
timevals=NA, runfactor=NA, baseshift=0,
noisepoolfun=max, myintercept=0 )
{
nvox<-ncol(boldmatrix)
designmatrix<-data.matrix(designmatrixIn) # as.matrix( designmatrixIn[,colMeans(abs(designmatrixIn))>0 ] )
groups<-crossvalidationgroups
if ( length(groups) == 1 ) {
kfolds<-groups
groups<-c()
grouplength<-round(nrow(boldmatrix)/kfolds)-1
for ( k in 1:kfolds ) groups<-c(groups,rep(k,grouplength))
groups<-c( rep(1,nrow(boldmatrix)-length(groups)) , groups)
}
getnoisepool<-function( x, frac = selectionthresh ) {
xord<-sort(x)
l<-round(length(x)*frac)
val<-xord[l]
mynoisevox<-( x < val & x < 0 )
return( mynoisevox )
}

crossvalidatedR2<-function( residmat, designmathrf, groups , noiseu=NA, p=NA, howmuchnoise ) {
nvox<-ncol(residmat)
kfo<-unique( groups )
R2<-matrix(rep(0, nvox * length(kfo) ), nrow=length(kfo) )
for ( k in kfo )
{
selector <- groups!=k
mydf<-data.frame( designmathrf[selector,] )
if ( ! all( is.na(noiseu) ) )
mydf<-data.frame( mydf, noiseu[selector,1:howmuchnoise] )
if ( ! all( is.na(p) ) )
mydf<-data.frame( mydf, p[selector,] )
mylm1<-lm( residmat[selector,]   ~  . , data=mydf )
selector <- groups==k
mydf<-data.frame( designmathrf[selector,] )
if ( ! all( is.na(noiseu) ) )
mydf<-data.frame( mydf, noiseu[selector,1:howmuchnoise] )
if ( ! all( is.na(p) ) )
mydf<-data.frame( mydf, p[selector,] )
predmat<-predict(mylm1,newdata=mydf)
realmat<-residmat[selector,]
for ( v in 1:nvox ) R2[k,v]<-100*( 1 -  sum( ( predmat[,v] - realmat[,v] )^2 ) /
sum(  (mean(realmat[,v]) - realmat[,v] )^2 )  )
}
# TODO write some kendrick like graphics that show the R2
# plotted over the bold image - need a mask as input
return(R2)
}

#################################################
# overall description of the method
# 0. estimate hrf
# 1. regressors include: design + trends + noise-pool
# 2. find noise-pool by initial cross-validation without noise regressors
# 3. cross-validate predictions using different numbers of noise regressors
# 4. select best n for predictors from noise pool
# 5. return the noise mask and the value for n
# make polynomial regressors per run / cv group
if ( all(is.na(timevals)) ) {
timevals<-rep(0,nrow(designmatrix))
for ( run in unique(groups)  ) {
timeinds<-which( groups == run )
timevals[ timeinds ]<-1:length(timeinds)
}
}
if ( !denoisebyrun ) timevals<-1:nrow(designmatrix)
p<-stats::poly( timevals ,degree=polydegree )
if ( all( !is.na(runfactor) ) ) p<-cbind(p,runfactor)
rawboldmat<-data.matrix(boldmatrix)
rawboldmatsd<-apply( rawboldmat , FUN=sd, MARGIN=2 )
rawboldmat[ , rawboldmatsd==0 ]<-rowMeans( rawboldmat[ , rawboldmatsd>0 ] )
svdboldmat<-rawboldmat
if ( denoisebyrun )
{
for ( run in unique(groups)  )
{
# FIXME - should clarify that this is done in the documentation
# TODO - add option of single model
timeinds<-( groups == run )
if ( myintercept == 0 )
svdboldmat[timeinds,]<-residuals( lm( rawboldmat[timeinds,] ~ 0 + p[ timeinds, ]  ) )
if ( myintercept > 0 )
svdboldmat[timeinds,]<-residuals( lm( rawboldmat[timeinds,] ~ 1 + p[ timeinds, ]  ) )
}
} else {
if ( myintercept > 0 )
svdboldmat<-residuals( lm( rawboldmat ~ 1 + p ) )
if ( myintercept == 0 )
svdboldmat<-residuals( lm( rawboldmat ~ 0 + p ) )
}
# FIXME - consider residualizing nuisance against design matrix
if (debug) print('lm')
# FIXME - factor out both HRF estimation approaches as functions
# FIXME - implement HRF library and just loop over library
if ( !all(is.na(hrfBasis)) ) { # use shifted basis functions
if ( hrfShifts > 1 ) {
fir<-finiteImpulseResponseDesignMatrix( designmatrix,
n=hrfShifts, baseshift=baseshift )
} else fir<-designmatrix
for ( i in 1:ncol(fir) )
fir[,i]<-conv( fir[,i]  , hrfBasis )[1:nrow(designmatrix)]
mylm<-lm( svdboldmat  ~  fir )
mylm<-bigLMStats( mylm, 0.01 )
# here call crossvalidatedR2 to allow us to select best voxels by R2
betas<-mylm\$beta.t[1:hrfShifts,]
if (debug) print('meanmax')
meanmax<-function( x ) {  return( mean(sort((x),decreasing=T)[1:50]) ) }
if ( hrfShifts <= 1 ) {
# Old-school VCR format.
betamax<-meanmax( betas )
} else {
betamax<-apply( (betas),FUN=meanmax,MARGIN=1)
}
betamax<-betamax/sum(abs(betamax))
if ( debug ) print(betamax)
hrf<-hrfBasis*0
for ( i in 1:length(betamax) )
{
hrf<-hrf+shift(hrfBasis,baseshift+i-1)*betamax[i]
}
} else { # use FIR / deconvolution
# Q: What's the important difference with this new way?
# After looking through it, looks like this is deconvolution rather than
# convolving event onset with an assumed HRF. Is that right?
# A: Yes
ldes<-matrix(rowMeans(abs(designmatrix)),ncol=1)
ldes<-ldes/ldes; ldes[is.nan(ldes)]<-0
fir<-finiteImpulseResponseDesignMatrix( ldes,
n=hrfShifts, baseshift=baseshift )
mylm<-lm( rawboldmat  ~  fir + p )
mylm<-bigLMStats( mylm, 0.01 )
betablock<-mylm\$beta.t[1:ncol(fir),]
sumbetablock<-betablock[1:hrfShifts,]*0
j<-1
for ( i in 1:ncol(ldes) ) {
sumbetablock<-sumbetablock+betablock[j:(j+hrfShifts-1),]
j<-j+hrfShifts
}
# Q: Not sure what's being summed here. Is this summing the betas for all the HRF shifts tested?
# If so, why? Or are you summing betas in an FIR model to get area-under-the-curve?
# A: estimate the HRF from the "best fit" set of predictors. "best fit" defined by high beta values.
betablock<-sumbetablock
temp<-apply( (betablock) , FUN=sum, MARGIN=2)
tempord<-sort(temp,decreasing=TRUE)
bestvoxnum<-50
# Q: Finding the voxels with the 50 highest betas?
# A: Yes
bestvoxels<-which( temp >= tempord[bestvoxnum]  )
# Q: Deriving an HRF model from the voxels with the highest summed betas in an FIR model?
# A: Yes
# FIXME : define bestvoxels based on crossvalidatedR2 based on residualized data
hrf<-rowSums( (betablock[,bestvoxels] ) )
meanhrfval<-mean(hrf)
mxdf<-abs(max(hrf)-meanhrfval)
mndf<-abs(min(hrf)-meanhrfval)
if ( mndf > mxdf  ) hrf<-hrf*(-1)
if ( abs(min(hrf)) > max(hrf) ) hrf<-hrf*(-1)
#  hrf<-data.frame(stl(ts(hrf, frequency = 4),"per")\$time.series)\$trend
}
hrf<-hrf/max(hrf)
if ( debug ) plot( ts( hrf ) )
################### now redo some work w/new hrf
# reset designmatrix
# designmatrix<-as.matrix( designmatrixIn[,colMeans(abs(designmatrixIn))>0 ] )
designmatrix<-data.matrix( designmatrixIn )
if ( collapsedesign )
designmatrix<-as.matrix( as.numeric( rowSums( designmatrix ) ) )
if (debug) print('hrf conv')
hrfdesignmat<-designmatrix
for ( i in 1:ncol(hrfdesignmat) )
{
hrfdesignmat[,i]<-conv( hrfdesignmat[,i]  , hrf )[1:nrow(hrfdesignmat)]
}
R2base<-crossvalidatedR2(  svdboldmat, hrfdesignmat, groups , p=NA )
R2base<-apply(R2base,FUN=noisepoolfun,MARGIN=2)
noisepool<-getnoisepool( R2base )
# return( list( R2base, noisepool ) )
if ( debug ) print( paste("nvox in noisepool:" ,sum(noisepool) ) )
if ( max(maxnoisepreds) == 0 )  # TODO: denoise from polynomials by run
{
return(list( n=0, R2atBestN=NA, hrf=hrf, noisepool=noisepool, R2base=R2base, R2final=NA, hrfdesignmat=hrfdesignmat, noiseu=rep(1,nrow(hrfdesignmat)), polys=p ))
}
print(paste("Noise pool has nvoxels=",sum(noisepool)))
# Step 5. [Calculate noise regressors using PCA on time-series of voxels
# in the noise pool] For each run, we extract the time-series of the
# voxels in the noise pool, project out the polynomial regressors from
# each time-series, normalize each time-series to unit length, and
# perform principal components analysis (PCA) (Behzadi et al., 2007;
# Bianciardi et al., 2009b). The resulting principal components
# constitute candidate noise regressors.
if ( ! denoisebyrun ) {
svdboldmat<-scale(svdboldmat) # z-score
noiseu<-svd( svdboldmat[,noisepool], nv=0, nu=max(maxnoisepreds) )\$u
} else {
for ( run in unique(groups)  ) {
locmat<-rawboldmat[  groups == run ,noisepool]
locmat<-scale(locmat) # z-score
if ( myintercept == 0 )
locmat<-residuals( lm( locmat ~ 0 + p[ groups == run, ] ) )
if ( myintercept > 0 )
locmat<-residuals( lm( locmat ~ 1 + p[ groups == run, ] ) )
locsvd<-svd( locmat, nv=0, nu=max(maxnoisepreds) )
# TODO: should this be pca?
if ( run == unique(groups)[1]  )
{
noiseu<-locsvd\$u
} else {
noiseu<-rbind( noiseu, locsvd\$u )
}
}
}
# Step 6. Enter noise regressors into model; evaluate using cross-validation We
# refit the model to the data, systematically varying the number of noise
# regressors included in the model.
R2summary<-rep(0,length(maxnoisepreds))
ct<-1
for ( i in maxnoisepreds )
{
R2<-crossvalidatedR2(  rawboldmat, hrfdesignmat, groups,
noiseu=noiseu, howmuchnoise=i, p=p  )
R2max<-apply(R2,FUN=max,MARGIN=2)
if ( ct == 1 ) R2perNoiseLevel<-R2max
if ( ct  > 1 ) R2perNoiseLevel<-cbind(R2perNoiseLevel,R2max)
R2pos<-R2max[ R2max > 0 ]
R2summary[ct]<-median(R2pos)
print(paste("NoiseU:",i,"MeanRSqrd",  R2summary[ct] ))
ct<-ct+1
}
scl<-0.95
if (max(R2summary)<0) scl<-1.05
bestn<-maxnoisepreds[which( R2summary > scl*max(R2summary) )[1]]
hrf<-hrf/max(hrf)
if ( denoisebyrun ) {
for ( run in unique(groups)  ) {
locmat<-rawboldmat[  groups == run , ]
if ( myintercept == 0 )
locmat<-( residuals( lm( locmat ~ 0 + noiseu[ groups == run, 1:bestn ]
+ p[ groups == run, ] ) ) )
if ( myintercept > 0 )
locmat<-( residuals( lm( locmat ~ 1 + noiseu[ groups == run, 1:bestn ]
+ p[ groups == run, ] ) ) )
if ( run == unique(groups)[1]  ) denoisedBold<-locmat else denoisedBold<-rbind( denoisedBold, locmat )
}
} else {
if ( myintercept > 0 )
denoisedBold<-residuals(lm(rawboldmat~1+noiseu[, 1:bestn ]  + p))
if ( myintercept == 0 )
denoisedBold<-residuals(lm(rawboldmat~0+noiseu[, 1:bestn ]  + p))
}
return(
list( denoisedBold=denoisedBold, n=bestn, R2atBestN=R2summary[bestn],
hrf=hrf, noisepool=noisepool, R2base=R2base, R2final=R2perNoiseLevel,
hrfdesignmat=hrfdesignmat, noiseu=noiseu[,1:bestn], polys=p )
)
}
```
stnava/RKRNS documentation built on Aug. 26, 2017, 9:55 a.m.