knitr::opts_chunk$set(echo = FALSE, eval=TRUE, message=FALSE, warning=FALSE, fig.width=15, fig.height=9) path = Sys.getenv('PATH') path = Sys.setenv('PATH'=paste(path, '/home/rstudio/.local/bin', sep=':'))
I use the directions here to create an AMI to run Rstudio on.
The Welcome.R
script in the NIsim package has code to setup this machine image with Dropbox access to the files.
#devtools::install_github('simonvandekar/pbj', ref='ftest') #devtools::install_github('simonvandekar/NIsim') ### LIBRARIES ### library(RNifti) library(parallel) library(splines) library(mmand) library(fslr) library(progress) library(abind) library(pbj) library(PDQutils) library(NIsim) ### LOAD IN DATA FROM DROPBOX ### dbimagedir = '~/Dropbox (VUMC)/pbj/data/abide/neuroimaging/cpac/alff_cropped' dbresimagedir = '~/Dropbox (VUMC)/pbj/data/abide/neuroimaging/cpac/alff_cropped_res' dbdatafile = '~/Dropbox (VUMC)/pbj/data/abide/demographic/n1035_phenotypic_20190509.rds' maskfile = '~/Dropbox (VUMC)/pbj/data/abide/neuroimaging/cpac/cropped_n1035_mask.nii.gz' # load in data and get directories dat = readRDS(dbdatafile) dat$imgname = paste(dat$file_id, 'alff.nii.gz', sep='_') dat$files = file.path(dbimagedir, dat$imgname) ### COMPUTING PARAMETERS ### computeConfig = list( # number of cores to use for computing ncores = 32 ) ### SIMULATION PARAMETERS ### simConfig = list( # use robust variance estimator? robust = TRUE, # what transformation to use. Only the first is used tranform = c('t', 'edgeworth', 'none'), # vector of sample sizes to simulate ns = c(200, 400, 800), # number of simulations to run nsim=500, # number of bootstraps nboot = 500, # number of permutations nperm = 0, # cluster forming thresholds cfts.s = c(0.1, 0.25, 0.4), cfts.p = c(0.01, 0.001), # radius for spheres of signal. rs=c(8), #### MODEL FORMULAS FOR SIMULATIONS #### formres = as.formula( paste0(" ~ dx_group + sex + func_mean_fd + ns(age_at_scan, df=10)" )), # need age_at_scan in both models for testing nonlinear functions form = as.formula(paste0(" ~ sex + func_mean_fd + age_at_scan + fake_covariate1 + scale(fake_covariate1^2) + scale(fake_covariate1^3)" )), formred = as.formula(paste0(" ~ sex + func_mean_fd + age_at_scan + fake_covariate1")), # weights for each subject. Can be a character vector W = c("func_mean_fd"), # where to put residuals resdir = dbresimagedir, # where to output results simdir = '~/temp', dat = dat, mask = maskfile, output = '~/Dropbox (VUMC)/pbj/pbj_ftest/covariance_sim_df2_polynomial_covariate.rdata' ) # use betas = 0 for global null # parameters = betas * sd(y)/sd(x). simConfig$betas = rep(0, length(simConfig$rs))
### SETUP THE SIMULATION ANALYSIS ### # subsets dataset to all people who have the variables simConfig$dat = simConfig$dat[apply(!is.na(simConfig$dat[ ,c(all.vars(as.formula(simConfig$formres)), simConfig$W)]), 1, all), ] # Create residualized images if(class(simConfig$formres)=='formula' | is.character(simConfig$formres)){ simConfig$dat$rfiles = file.path(simConfig$resdir, basename(simConfig$dat$files)) if(!all(file.exists(simConfig$dat$rfiles))){ pbj::residualizeImages(files=simConfig$dat$files, dat=simConfig$dat, mask=simConfig$mask, form=simConfig$formres, outfiles=simConfig$dat$rfiles, mc.cores=computeConfig$ncores) } simConfig$dat$files = simConfig$dat$rfiles # clean up. May not be necessary gc() } simdirs = simSetup(simConfig$dat$files, data=simConfig$dat, outdir=simConfig$simdir, nsim=simConfig$nsim, ns=simConfig$ns, mask=simConfig$mask, rs=simConfig$rs, betas=simConfig$betas )
# simfunc should contain a data argument, which is defined within runSim # Other arguments are identical across simulation runs. simFunc = function(lmfull, lmred, mask, data, nboot, cfts){ data$fake_group = factor(ceiling(ppoints(nrow(data))*3 ) ) data$fake_covariate1 = rnorm(nrow(data)) data$fake_covariate2 = rnorm(nrow(data)) statmap = lmPBJ(data$images, form=lmfull, formred=lmred, mask=mask, data=data, transform = 't') #k = mmand::shapeKernel(3, 3, type='box') #stat = lapply(cfts, function(th) max(c(table(c(mmand::components(stat.statMap(statmap) >th^2*statmap$rdf + statmap$df, k))),0), na.rm=TRUE) ) #pbj = pbjSEI(statmap, nboot = nboot, cfts.s = cfts) #pbj = lapply(pbj[grep('cft', names(pbj))], function(x) x[['boots']]) return(list(estimates=statmap$normedcoef, covestimator=statmap$sqrtSigma)) } #debug(lmPBJ) #test = simFunc(simConfig$form, simConfig$formred, simConfig$mask, readRDS(file.path(simdirs$simdir[101], 'data.rds')), simConfig$nboot, simConfig$cfts.s) results = runSim(simdirs$simdir, method='synthetic', simfunc = simFunc, mask = simConfig$mask, simfuncArgs = list( lmfull= simConfig$form, lmred = simConfig$formred, mask = simConfig$mask, nboot=simConfig$nboot, cfts=simConfig$cfts.s), ncores = computeConfig$ncores) dir.create(dirname(simConfig$output), showWarnings = FALSE, recursive = TRUE) # clean up files save.image(file=simConfig$output) #Sys.sleep(5*60) #unlink(list.files(tempdir(), full.names = TRUE)) #gc() #unlink(simdirs) #system('sudo shutdown -h now') # summarize the results # apply(rowMeans(simplify2array(x[!is.na(x)]), dims = 2), 2, quantile)
resultsFixedX = runSim(rep(simdirs$simdir[seq(1, nrow(simdirs), by=simConfig$nsim)], each=simConfig$nsim), method='synthetic', simfunc = simFunc, mask = simConfig$mask, simfuncArgs = list( lmfull= simConfig$form, lmred = simConfig$formred, mask = simConfig$mask), ncores = computeConfig$ncores)
colMeans(do.call(rbind, lapply(results, function(x) c(crossprod(x$covestimator[1,,], x$covestimator[2,,]))))) cov(do.call(rbind, lapply(results, function(x) c(x$estimates))))
load('~/Dropbox (VUMC)/pbj/pbj_ftest/synthsim_transform_images.rdata') simdirs$results = resultsFixedX# lapply(results, simplify2array) x =simdirs[simdirs$n==100,] simdirs$results[ !sapply(simdirs$results, is.numeric) ] = NA #simdirs$results = lapply(simdirs$results, function(x){ x[,'edgeworth'] = ifelse(is.infinite(x[,'edgeworth']), x[, 't'], x[,'edgeworth']); x}) by(simdirs, simdirs$n, function(x) sum(!is.na(x$results))) by(simdirs, simdirs$n, function(x) apply(rowMeans(simplify2array(x$results[!is.na(x$results)]), dims = 2), 2, function(x) quantile(x))) by(simdirs, simdirs$n, function(x) apply(apply(simplify2array(x$results[!is.na(x$results)]), 1:2, var ), 2, function(x) quantile(x)) ) by(simdirs, simdirs$n, function(x) apply(apply(simplify2array(x$results[!is.na(x$results)]), 1:2, function(y) var(y) ), 2, function(x) x) ) by(simdirs, simdirs$n, function(x) apply(apply(simplify2array(x$results[!is.na(x$results)]), 1:2, function(y) sd(y)/sqrt(length(y)) ), 2, function(x) x) )
simConfig$dat$images = simConfig$dat$files test2 = simFuncCoefs(lmfull= simConfig$form, lmred = simConfig$formred, mask = simConfig$mask, data=simConfig$dat[1:50,]) results = runSim(simdirs$simdir, method='synthetic', simfunc = simFuncCoefs, mask = simConfig$mask, simfuncArgs = list( lmfull= simConfig$form, lmred = simConfig$formred, mask = simConfig$mask), ncores = computeConfig$ncores)
# plotting function for below sections plots = function(rdata){ load(rdata) simdirs$results = results# lapply(results, simplify2array) x =simdirs[simdirs$n==100,] simdirs[, paste0('obsMaxClust_cft.s', simConfig$cfts.s)] = do.call(rbind, by(simdirs, simdirs$n, function(x) do.call(rbind, lapply(x$results, function(y) unlist(y[['obs']]) ) ) )) cex=1.5 par(mgp=c(1.7,.7,0), lwd=1.5, lend=2, cex.lab=0.8*cex, cex.axis=0.8*cex, cex.main=1*cex, mfrow=c(1,1), mar=c(2.8,2.8,1.8,.2), bty='l') layout(mat=matrix(1:(length(simConfig$cfts.s)*length(simConfig$ns)), nrow=length(simConfig$cfts.s)) ) # axes are based on tail quantiles probs = seq(0.75, 1, length.out=100) #length.out=pmin(simConfig$nsim, simConfig$nboot) trash = by(simdirs, simdirs$n, function(df){ for(cftInd in 1:length(simConfig$cfts.s)){ ylims = range(sapply(df$results, function(x) range(quantile(x$boot[[cftInd]][[1]], probs=probs)))) colname = paste0('obsMaxClust_cft.s', simConfig$cfts.s[cftInd]) x = df[,colname] xlims = range(quantile(x, probs=probs) ) xaxlab = c(0.9, 0.95, 0.99, 0.999) xaxt = quantile(x, probs=xaxlab) plot(x, ylim=ylims, xlim=xlims, type='n', xlab='Observed cluster quantile', ylab='Estimated cluster quantile', main=paste('n =', df$n[1], 'cft =', simConfig$cfts.s[cftInd])) #axis(side=1, at=xaxt, labels=xaxlab) abline(v=xaxt, col='orange', lty=2) for(ind in 1:simConfig$nsim){ points(quantile(x, probs=probs), quantile(df$results[[ind]]$boot[[cftInd]][[1]], probs=probs), type='l') } abline(a=0,b=1, col='blue') } }) trash = by(simdirs, simdirs$n, function(df){ for(cftInd in 1:length(simConfig$cfts.s)){ ylims = range(sapply(df$results, function(x) range(quantile(x$boot[[cftInd]][[1]], probs=probs)))) colname = paste0('obsMaxClust_cft.s', simConfig$cfts.s[cftInd]) x = df[,colname] xlims = range(quantile(x, probs=probs) ) xaxlab = c(0.9, 0.95, 0.99, 0.999) xaxt = quantile(x, probs=xaxlab) y=colMeans(do.call(rbind, lapply(1:nrow(df), function(ind) quantile(df$results[[ind]]$boot[[cftInd]][[1]], probs=xaxlab)<df[ind,colname])) ) plot(1-xaxlab, y, type='b', xlab='Target type 1 error', ylab='Actual type 1 error', xlim=range(c(y, 1-xaxlab)), ylim=range(c(y, 1-xaxlab)), , main=paste('n =', df$n[1], 'cft =', simConfig$cfts.s[cftInd])) abline(a=0,b=1, col='blue') } }) }
plots('~/Dropbox (VUMC)/pbj/pbj_ftest/df2_group_covariate.rdata')
plots('~/Dropbox (VUMC)/pbj/pbj_ftest/df2_independent_covariates.rdata')
Testing the second and third degree terms of a polynomial covariate.
plots('~/Dropbox (VUMC)/pbj/pbj_ftest/df2_polynomial_covariate.rdata')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.