A simulated data set for demonstrating how to use the BUScorrect package
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | ## Not run:
#This data set is simulated according to the following R code
rm(list = ls(all = TRUE))
set.seed(123456)
B <- 3
#total number of batches
K <- 3
#total number of subtypes
G <- 2000
#total number of genes
pi <- matrix(NA, B, K)
# pi[b,k] stands for the proportion of kth subtype in bth batch
pi[1, ] <- c(0.2, 0.3, 0.5)
pi[2, ] <- c(0.4, 0.2, 0.4)
pi[3, ] <- c(0.3, 0.4, 0.3)
#total number of samples in each bacth.
n_vec <- rep(NA, B)
#n_vec[b] represents the total number of samples in batch b.
n_vec <- c(70, 80, 70)
#Data list
example_Data <- list()
#baseline expression level
alpha <- rep(2, G)
#subtype effect
mu <- matrix(NA, G, K)
#subtype effect, mu[g,k] stands for the kth-subtype effect of gene g
mu[ ,1] <- 0
#the first subtype is taken as the baseline subtype
#the subtype effect of subtype 1 is set to zero
mu[ ,2] <- c(rep(2,G/20), rep(0,G/20),rep(0, G-G/20-G/20))
mu[ ,3] <- c(rep(0,G/20), rep(2,G/20),rep(0, G-G/20-G/20))
#batch effect
gamma <- matrix(NA, B, G)
#'location' batch effect of gene g in batch b
gamma[1, ] <- 0
#the first batch is taken as the reference batch without batch effects
#the batch effect of batch 1 is set to zero
gamma[2, ] <- c(rep(3,G/5),rep(2,G/5),rep(1,G/5),
rep(2,G/5),rep(3,G/5))
gamma[3, ] <- c(rep(1,G/5),rep(2,G/5),rep(3,G/5),
rep(2,G/5),rep(1,G/5))
sigma_square <- matrix(NA, B,G)
#sigma_square[b,g] denotes the error variance of gene g in batch b.
sigma_square[1,] <- rep(0.1, G)
sigma_square[2,] <- rep(0.2, G)
sigma_square[3,] <- rep(0.15, G)
Z <- list()
#subtype indicator. Z[b,j] represents the subtype of sample j in batch b
Z[[1]] <- as.integer(c(rep(1,floor(pi[1,1]*n_vec[1])),rep(2,floor(pi[1,2]*n_vec[1])),
rep(3,floor(pi[1,3]*n_vec[1]))))
Z[[2]] <- as.integer(c(rep(1,floor(pi[2,1]*n_vec[2])),rep(2,floor(pi[2,2]*n_vec[2])),
rep(3,floor(pi[2,3]*n_vec[2]))))
Z[[3]] <- as.integer(c(rep(1,floor(pi[3,1]*n_vec[3])),rep(2,floor(pi[3,2]*n_vec[3])),
rep(3,floor(pi[3,3]*n_vec[3]))))
for(b in 1:B){ #generate data
num <- n_vec[b]
example_Data[[b]] <- sapply(1:num, function(j){
tmp <- alpha + mu[ ,Z[[b]][j]] + gamma[b, ] +
rnorm(G, sd = sqrt(sigma_square[b, ]))
tmp
})
}
BUSexample_data <- example_Data
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.