####################
##this is r developing code for running simulation to generate
##the DNA/protein microarray data
## the model (ref: smyth 2004, Bayesian hierarchical model and linear model)
## linear model:
## Y_ijk=alpha_i+beta_j+ gamma_ij+ epsilon_ijk
## see the doc named: "Microarray data simulation.doc"
## feng@BU 09/03/2016
##
## Note: Oct 1st 2016, the code listed here is the old one that before
## writing them up into package functions. We keep them for reference
##
####################################
library(limma)
set.seed(2004);
#define the variables
numTreatment<-2 #number of different beta
numGene<-1000 #number of alpha
sampleSize<-100 ##this is the number of repeats for each group
alpha0_sigma<-2 #variance for alpha prior
beta0_sigma<-3 #variance for beta prior
#v0<-10 # the unscaled factor for gamma given gamma<=>0
gamma0_diff_sigma<-3
#the effects are assumed to be non negative and also fixed effect
#but following a normal distribution
alpha0<-(rnorm(numGene, 0,alpha0_sigma))
beta0<-(rnorm(numTreatment,0,beta0_sigma))
#
p_diff<-0.01
numGene_diff<-floor(numGene*p_diff)
#priors for variance distribution
d0<-5
sSquare0<-2
gamma0<-matrix(rep(0,numGene*numTreatment),nrow=numGene, byrow=T)
##in this following code snip, we randomly distribute the differential gamma into
##different positions across different treatment group
##other part is
for(i in c(1:numTreatment))
{
pos_diff<-sample(numGene, size=numGene_diff, replace=T)
gamma0[pos_diff,i]<-(rnorm(numGene_diff,0,gamma0_diff_sigma))
}
##now ready to generate variance
gamma0_var<-matrix(rchisq(numGene*numTreatment, df=d0),nrow=numGene, byrow=T)
gamma0_var<-1/(d0*sSquare0*sSquare0)*gamma0_var
gamma0_var<-1/gamma0_var
#now we have everything ready, do the observation values
#put them into matrix first
groupInfo<-rep(0,sampleSize*numTreatment)
Y_ijk<-matrix(rep(0,numGene*numTreatment*sampleSize),nrow=numGene,byrow=T)
for(j in c(1:numTreatment)) #for different treatment
{
samplePos<-c(1:sampleSize)+(j-1)*sampleSize;
groupInfo[samplePos]<-j;
for(k in c(1:numGene))
{
#write the meta data first
Y_ijk[k,samplePos]<-alpha0[k]+beta0[j]+gamma0[k,j]
Y_ijk[k,samplePos]<-Y_ijk[k,samplePos]+rnorm(sampleSize,0,sqrt(gamma0_var[k,j]))
}
}
##==========>now testing the function code in the module
library(ARPPA)
nGene<-2000
nTreatment<-numTreatment #number of different beta
sampleSize<-20 ##this is the number of repeats for each group
alpha.mean<-0 #variance for alpha prior
alpha.sigma<-alpha0_sigma
beta.mean<-0
beta.sigma<-beta0_sigma #variance for beta prior
#v0<-10 # the unscaled factor for gamma given gamma<=>0
gammaN0.sigma<-gamma0_diff_sigma
p_diff<-0.01
#priors for variance distribution
d0<-10
s0<-2
#call it
set.seed(2004);
dataExp<-simulateExpression(nGene, nTreatment, sampleSize,
alpha.mean=alpha.mean, alpha.sigma=alpha.sigma,
beta.mean=beta.mean, beta.sigma=beta.sigma,
prob.nonZero=0.01, gamma.sigma=gammaN0.sigma,
epsilon.d0=d0, epsilon.s0=s0
)
##now start fitting the model to estimate the EBayes parameters
#defining a S3 function for calculating the prior variance and df
#based on the observed variance
calculatePriorVariance<-function (x,df)
{
eg<-log(x)-digamma(df/2)+log(df/2)
eg_bar<-mean(eg)
tg_d02<-mean((eg-eg_bar)*(eg-eg_bar)*length(x)/(length(x)-1)-trigamma(df/2))
d0<-trigammaInverse(tg_d02)*2
s02<-exp(eg_bar+digamma(d0/2)-log(d0/2))
prior<-list(d0=d0, s02=s02)
}
scaledChiSq_Pdf<-function(x, d0, s0)
{
x_t<-d0*s0*s0/(x*x)
#x_t<-sqrt(x_t)
1/(2^(d0/2)*gamma(d0/2))*x_t^(d0/2-1)*exp(-1*x_t/2)
}
####testing code
###following the example for R help for eBayes
# See also lmFit examples
# Simula2te gene expression data,
# 6 microarrays and 100 genes with one gene differentially expressed
set.seed(2004); invisible(runif(100))
M <- matrix(rnorm(100000*60,sd=0.3),100000,60)
M[1,] <- M[1,] + 1
fit <- lmFit(M)
# Moderated t-statistic
fit <- eBayes(fit)
x<-fit$sigma*fit$sigma
df<-fit$df.residual
r<-sampleVariancePrior(x,df)
#testing code 2
#testing the data generated by the Y_ijk model above
#first got the variance
Y_ijk<-dataExp$exp
tY<-data.frame(t(Y_ijk))
tY$group<-c(rep(1,sampleSize),rep(2,sampleSize))
varY<-aggregate(tY,by=list(tY$group),FUN=var) #by doing aggregate, the new dataframe is arranged to have first column for "by" criteria.
#that is why in the next step, we need to get rid of the first column and last one, since the last one is the one we added as the group info.
varK<-varY[,c(-1,-nGene-2)]
x<-as.numeric(c(varK[1,],varK[2,]))
df<-rep(sampleSize-1,length(x))
r2<-calculatePriorVariance(x,df)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.