knitr::opts_chunk$set(echo = FALSE, eval=TRUE, message=FALSE, warning=FALSE, fig.width=15, fig.height=9)
path = Sys.getenv('PATH')
path = Sys.setenv('PATH'=paste(path, '/home/rstudio/.local/bin', sep=':'))

AWS machine image setup

I use the directions here to create an AMI to run Rstudio on. The Welcome.R script in the NIsim package has code to setup this machine image with Dropbox access to the files.

Setup simulations

#devtools::install_github('simonvandekar/pbj', ref='ftest')
#devtools::install_github('simonvandekar/NIsim')
### LIBRARIES ###
library(RNifti)
library(parallel)
library(splines)
library(mmand)
library(fslr)
library(progress)
library(abind)
library(pbj)
library(PDQutils)
library(NIsim)



### LOAD IN DATA FROM DROPBOX ###
dbimagedir = '~/Dropbox (VUMC)/pbj/data/abide/neuroimaging/cpac/alff_cropped'
dbresimagedir = '~/Dropbox (VUMC)/pbj/data/abide/neuroimaging/cpac/alff_cropped_res'
dbdatafile = '~/Dropbox (VUMC)/pbj/data/abide/demographic/n1035_phenotypic_20190509.rds'
  maskfile = '~/Dropbox (VUMC)/pbj/data/abide/neuroimaging/cpac/cropped_n1035_mask.nii.gz'


# load in data and get directories
dat = readRDS(dbdatafile)
dat$imgname = paste(dat$file_id, 'alff.nii.gz', sep='_')
dat$files = file.path(dbimagedir, dat$imgname)



### COMPUTING PARAMETERS ###
computeConfig = list(
  # number of cores to use for computing
  ncores = 32
)




### SIMULATION PARAMETERS ###
simConfig = list(
  # use robust variance estimator?
  robust = TRUE,
  # what transformation to use. Only the first is used
  tranform = c('t', 'edgeworth', 'none'),
  # vector of sample sizes to simulate
  ns = c(200, 400, 800),
  # number of simulations to run
  nsim=500,
  # number of bootstraps
  nboot = 500,
  # number of permutations
  nperm = 0,
  # cluster forming thresholds
  cfts.s = c(0.1, 0.25, 0.4),
  cfts.p = c(0.01, 0.001),

  # radius for spheres of signal.
  rs=c(8),

  #### MODEL FORMULAS FOR SIMULATIONS ####
  formres = as.formula( paste0(" ~ dx_group + sex + func_mean_fd + ns(age_at_scan, df=10)" )),
  # need age_at_scan in both models for testing nonlinear functions
  form = as.formula(paste0(" ~ sex + func_mean_fd + age_at_scan + fake_covariate1 + scale(fake_covariate1^2) + scale(fake_covariate1^3)" )),
  formred = as.formula(paste0(" ~ sex + func_mean_fd + age_at_scan + fake_covariate1")),
  #  weights for each subject. Can be a character vector
  W = c("func_mean_fd"),
  # where to put residuals
  resdir = dbresimagedir,
  # where to output results
  simdir = '~/temp',
  dat = dat,
  mask = maskfile,
  output = '~/Dropbox (VUMC)/pbj/pbj_ftest/covariance_sim_df2_polynomial_covariate.rdata'
)
# use betas = 0 for global null
# parameters = betas * sd(y)/sd(x).
simConfig$betas = rep(0, length(simConfig$rs))
### SETUP THE SIMULATION ANALYSIS ###
# subsets dataset to all people who have the variables
simConfig$dat = simConfig$dat[apply(!is.na(simConfig$dat[ ,c(all.vars(as.formula(simConfig$formres)), simConfig$W)]), 1, all), ]
# Create residualized images
if(class(simConfig$formres)=='formula' | is.character(simConfig$formres)){
  simConfig$dat$rfiles = file.path(simConfig$resdir, basename(simConfig$dat$files))
  if(!all(file.exists(simConfig$dat$rfiles))){
  pbj::residualizeImages(files=simConfig$dat$files, dat=simConfig$dat, mask=simConfig$mask, form=simConfig$formres,
                         outfiles=simConfig$dat$rfiles, mc.cores=computeConfig$ncores)
}
  simConfig$dat$files = simConfig$dat$rfiles
  # clean up. May not be necessary
  gc()
}


simdirs = simSetup(simConfig$dat$files, data=simConfig$dat, outdir=simConfig$simdir, nsim=simConfig$nsim, ns=simConfig$ns, mask=simConfig$mask, rs=simConfig$rs, betas=simConfig$betas )
# simfunc should contain a data argument, which is defined within runSim
# Other arguments are identical across simulation runs.
simFunc = function(lmfull, lmred, mask, data, nboot, cfts){
  data$fake_group = factor(ceiling(ppoints(nrow(data))*3 ) )
  data$fake_covariate1 = rnorm(nrow(data))
  data$fake_covariate2 = rnorm(nrow(data))
  statmap = lmPBJ(data$images, form=lmfull, formred=lmred, mask=mask, data=data, transform = 't')
  #k = mmand::shapeKernel(3, 3, type='box')
  #stat = lapply(cfts, function(th) max(c(table(c(mmand::components(stat.statMap(statmap) >th^2*statmap$rdf + statmap$df, k))),0), na.rm=TRUE) )
  #pbj = pbjSEI(statmap, nboot = nboot, cfts.s = cfts)
  #pbj = lapply(pbj[grep('cft', names(pbj))], function(x) x[['boots']])
  return(list(estimates=statmap$normedcoef, covestimator=statmap$sqrtSigma))
}

#debug(lmPBJ)
#test = simFunc(simConfig$form, simConfig$formred, simConfig$mask, readRDS(file.path(simdirs$simdir[101], 'data.rds')), simConfig$nboot, simConfig$cfts.s)


results = runSim(simdirs$simdir, method='synthetic',
       simfunc = simFunc, mask = simConfig$mask,
simfuncArgs = list(
  lmfull= simConfig$form,
  lmred = simConfig$formred,
  mask = simConfig$mask, nboot=simConfig$nboot, cfts=simConfig$cfts.s), ncores = computeConfig$ncores)

dir.create(dirname(simConfig$output), showWarnings = FALSE, recursive = TRUE)
# clean up files
save.image(file=simConfig$output)
#Sys.sleep(5*60)
#unlink(list.files(tempdir(), full.names = TRUE))
#gc()
#unlink(simdirs)
#system('sudo shutdown -h now')

# summarize the results
# apply(rowMeans(simplify2array(x[!is.na(x)]), dims = 2), 2, quantile)
resultsFixedX = runSim(rep(simdirs$simdir[seq(1, nrow(simdirs), by=simConfig$nsim)], each=simConfig$nsim), method='synthetic',
       simfunc = simFunc, mask = simConfig$mask,
simfuncArgs = list(
  lmfull= simConfig$form,
  lmred = simConfig$formred,
  mask = simConfig$mask), ncores = computeConfig$ncores)

Compare covariance estimator to simulations estimator

colMeans(do.call(rbind, lapply(results, function(x) c(crossprod(x$covestimator[1,,], x$covestimator[2,,])))))

cov(do.call(rbind, lapply(results, function(x) c(x$estimates))))
load('~/Dropbox (VUMC)/pbj/pbj_ftest/synthsim_transform_images.rdata')
simdirs$results = resultsFixedX# lapply(results, simplify2array)
x =simdirs[simdirs$n==100,]
simdirs$results[ !sapply(simdirs$results, is.numeric) ] = NA
#simdirs$results = lapply(simdirs$results, function(x){ x[,'edgeworth'] = ifelse(is.infinite(x[,'edgeworth']), x[, 't'], x[,'edgeworth']); x})
by(simdirs, simdirs$n, function(x) sum(!is.na(x$results)))
by(simdirs, simdirs$n, function(x) apply(rowMeans(simplify2array(x$results[!is.na(x$results)]), dims = 2), 2, function(x) quantile(x)))
by(simdirs, simdirs$n, function(x) apply(apply(simplify2array(x$results[!is.na(x$results)]), 1:2, var ), 2, function(x) quantile(x)) )
by(simdirs, simdirs$n, function(x) apply(apply(simplify2array(x$results[!is.na(x$results)]), 1:2, function(y) var(y) ), 2, function(x) x) )
by(simdirs, simdirs$n, function(x) apply(apply(simplify2array(x$results[!is.na(x$results)]), 1:2, function(y) sd(y)/sqrt(length(y)) ), 2, function(x) x) )
simConfig$dat$images = simConfig$dat$files
test2 = simFuncCoefs(lmfull= simConfig$form,
 lmred = simConfig$formred,
 mask = simConfig$mask,
 data=simConfig$dat[1:50,])

results = runSim(simdirs$simdir, method='synthetic',
       simfunc = simFuncCoefs, mask = simConfig$mask,
simfuncArgs = list(
  lmfull= simConfig$form,
  lmred = simConfig$formred,
  mask = simConfig$mask), ncores = computeConfig$ncores)
# plotting function for below sections
plots = function(rdata){
  load(rdata)
simdirs$results = results# lapply(results, simplify2array)
x =simdirs[simdirs$n==100,]
simdirs[, paste0('obsMaxClust_cft.s', simConfig$cfts.s)] = do.call(rbind, by(simdirs, simdirs$n, function(x) do.call(rbind, lapply(x$results, function(y) unlist(y[['obs']]) ) ) ))

cex=1.5
par(mgp=c(1.7,.7,0), lwd=1.5, lend=2, cex.lab=0.8*cex, cex.axis=0.8*cex, cex.main=1*cex, mfrow=c(1,1), mar=c(2.8,2.8,1.8,.2), bty='l')
layout(mat=matrix(1:(length(simConfig$cfts.s)*length(simConfig$ns)), nrow=length(simConfig$cfts.s)) )
# axes are based on tail quantiles
probs = seq(0.75, 1, length.out=100) #length.out=pmin(simConfig$nsim, simConfig$nboot)

trash = by(simdirs, simdirs$n, function(df){
  for(cftInd in 1:length(simConfig$cfts.s)){
    ylims = range(sapply(df$results, function(x) range(quantile(x$boot[[cftInd]][[1]], probs=probs))))
    colname = paste0('obsMaxClust_cft.s', simConfig$cfts.s[cftInd])
    x = df[,colname]
    xlims = range(quantile(x, probs=probs) )
    xaxlab = c(0.9, 0.95, 0.99, 0.999) 
    xaxt = quantile(x, probs=xaxlab)
    plot(x, ylim=ylims, xlim=xlims, type='n', xlab='Observed cluster quantile', ylab='Estimated cluster quantile', main=paste('n =', df$n[1], 'cft =', simConfig$cfts.s[cftInd]))
    #axis(side=1, at=xaxt, labels=xaxlab)
    abline(v=xaxt, col='orange', lty=2)
    for(ind in 1:simConfig$nsim){
      points(quantile(x, probs=probs), quantile(df$results[[ind]]$boot[[cftInd]][[1]], probs=probs), type='l')
    }
    abline(a=0,b=1, col='blue')
  }
})

trash = by(simdirs, simdirs$n, function(df){
  for(cftInd in 1:length(simConfig$cfts.s)){
    ylims = range(sapply(df$results, function(x) range(quantile(x$boot[[cftInd]][[1]], probs=probs))))
    colname = paste0('obsMaxClust_cft.s', simConfig$cfts.s[cftInd])
    x = df[,colname]
    xlims = range(quantile(x, probs=probs) )
    xaxlab = c(0.9, 0.95, 0.99, 0.999) 
    xaxt = quantile(x, probs=xaxlab)
    y=colMeans(do.call(rbind, lapply(1:nrow(df), function(ind) quantile(df$results[[ind]]$boot[[cftInd]][[1]], probs=xaxlab)<df[ind,colname])) )
    plot(1-xaxlab, y, type='b', xlab='Target type 1 error', ylab='Actual type 1 error', xlim=range(c(y, 1-xaxlab)), ylim=range(c(y, 1-xaxlab)), , main=paste('n =', df$n[1], 'cft =', simConfig$cfts.s[cftInd]))
    abline(a=0,b=1, col='blue')
  }
})
}

Group covariate

plots('~/Dropbox (VUMC)/pbj/pbj_ftest/df2_group_covariate.rdata')

Independent continuous covariates

plots('~/Dropbox (VUMC)/pbj/pbj_ftest/df2_independent_covariates.rdata')

Polynomial continuous covariate

Testing the second and third degree terms of a polynomial covariate.

plots('~/Dropbox (VUMC)/pbj/pbj_ftest/df2_polynomial_covariate.rdata')


simonvandekar/NIsim documentation built on Oct. 12, 2020, 5:06 p.m.