knitr::opts_chunk$set(echo = TRUE)

Rank 1 test

# simulate with f normal and l an equal mixture of 0s and double-exponential
# if missing, add missing data at half of positions
sim_rank1 =function(n=100,p=200,pve=0.5,missing=FALSE){
  f = rnorm(p)
  half_n = trunc(n/2)
  l = c(rep(0,half_n),(-1)^rbinom(n-half_n,1,0.5)*(rexp(n-half_n)))
  LF = l %*% t(f)
  vLF = var(as.vector(LF))
  Y = LF + sqrt(vLF * ((1-pve)/pve)) * matrix(rnorm(n*p),nrow=n,ncol=p)  
  if(missing){
    miss = matrix(rbinom(n*p,1,0.5),nrow=n)
    Y[miss==1] = NA
  }
  return(list(Y=Y,LF=LF))
}

# runs some simple simulations and computes mean squared error for estimate
# of LF 
test_sims = function(flash_fn=flashr::flash_add_greedy,flash_param = list(),simfn = sim_rank1, ntest=20){
  rmse = rep(0,ntest)
  for(i in 1:ntest){
    set.seed(i)
    d = simfn()
    #print(d$Y)
    f = do.call(flash_fn,modifyList(flash_param,list(data=d$Y)))
    rmse[i] = sqrt(mean((flashr::flash_get_fitted_values(f)-d$LF)^2))
  }
  return(rmse)
}

Run without missing data

res = list()
res[[1]] = test_sims(flash_param = list(Kmax=4))
res[[2]]= test_sims(flash_param=list(Kmax=4,init_fn=flashr::udv_si_svd))
res[[3]] = test_sims(flash_param =list(Kmax=4,ebnm_fn =flashr::ebnm_pn))

boxplot(res)
lapply(res,mean)

And some with missing data

res = list()
res[[1]] = test_sims(flash_param = list(Kmax=4),simfn = function(...){sim_rank1(...,missing=TRUE)})
lapply(res,mean)
sessionInfo()


stephenslab/flashr documentation built on Jan. 31, 2024, 2:07 a.m.