Nothing
generateSimulationSet <- function(simPath, dataPath, nPerK, rounds = 400, nu = 0, pcnv = 1, norm.contam = FALSE, dataPars = NULL){
dataPars <- list(snps.seq = 1000000, snps.cgh = 600000, mu = 70, sigma.reads = 25, sigma0.lrr = 0.15,
sigma0.baf = 0.03, density.sigma = 0.1)
threshold <- 0.04
dirs <- list.dirs(simPath)
dirs <- dirs[-1]
if(length(dirs)>0){
setIDs <- as.numeric(sapply(1:length(dirs),function(j){strsplit(dirs[j], split = 'sims-')[[1]][2]}))
setID <- max(setIDs) + 1
} else {
setID <- 1
}
if (!dir.exists(simPath)){
dir.create(simPath)
}
if (!dir.exists(dataPath)){
dir.create(dataPath)
}
ks <- unlist(lapply(1:length(nPerK), function(x) {rep(nPerK[x], x)}))
psis <- Reduce(rbind, lapply(1:5, function(x) {
mat <- sampleSimplex(nPerK[x], x)
cbind(mat, matrix(0, nrow = nrow(mat), ncol = 5-ncol(mat)))
}))
n <- nrow(psis)
for (j in 1:nrow(psis)) {
go <- FALSE
while (go == FALSE) {
tumor <- try(Tumor(psis[j,], rounds, nu, pcnv, norm.contam), silent = TRUE)
if (!inherits(tumor, 'try-error')) {
data <- generateTumorData(tumor, dataPars$snps.seq, dataPars$snps.cgh, dataPars$mu, dataPars$sigma.reads,
dataPars$sigma0.lrr, dataPars$sigma0.baf, dataPars$density.sigma)
}
if (!inherits(tumor, 'try-error') & !inherits(data, 'try-error')) {
go <- TRUE
}
}
filename.tumor <- paste(simPath, '/', 'sim-', j, '.rda', sep = '')
filename.data <- paste(dataPath, '/', 'dat-', j, '.rda', sep = '')
sim <- list('tumor' = tumor)
save(sim, file = filename.tumor)
dat <- list('dat' = data)
save(dat, file = filename.data)
}
if (nu > 0 & pcnv > 0) {
alt.types <- 'both'
} else if (nu > 0 & pcnv == 0) {
alt.types <- 'mut'
} else if (nu == 0 & pcnv > 0) {
alt.types <- 'cnv'
} else {
alt.types <- 'none'
}
metadata <- list(tumor.params = list(rounds, nu, pcnv, norm.contam), data.params = dataPars,
alt.types = alt.types, date = Sys.Date(), setID = setID)
save(metadata, file = paste(simPath, '/metadata', '.rda', sep = ''))
}
###Now code to generate a set of 300 artificial mixtures:
mix <- function(datList, psi, dataPath, mixPath, index, pos, alter=FALSE) {
breaks <- lapply(1:22, function(j) {
brks <- sort(unique(unlist(sapply(1:length(datList), function(k) {c(datList[[k]][datList[[k]]$chrom==j,]$loc.start,
datList[[k]][datList[[k]]$chrom==j,]$loc.end)}))))
starts <- brks[1:(length(brks) - 1)]
ends <- c(starts[2:length(starts)] - 1, brks[length(brks)])
data.frame(start = starts, end = ends, chr = rep(j, length(starts)))
})
coords <- Reduce(rbind, breaks)
columns <- sample(1:2,length(psi),replace=TRUE)
shifts <- sapply(1:length(datList),function(j) {
lrrs <- unlist(sapply(1:nrow(datList[[j]]), function(k) {rep(datList[[j]]$seg.median[k], datList[[j]]$num.mark[k])}))
dens <- density(na.omit(2*10^lrrs))
dens$x[which.max(dens$y)] - 2
})
d2 <- lapply(1:length(datList), function(j) {
temp <- datList[[j]]
res <- t(sapply(1:nrow(coords),function(k) {
chrdat <- temp[as.character(temp$chrom) == as.character(coords$chr[k]),]
seg <- chrdat[chrdat$loc.end>=coords$start[k] & chrdat$loc.start<=coords$end[k],]
baf <- seg$AvgBAF
tcn <- 2*10^(seg$seg.median)
tcn <- tcn - shifts[j]
if (length(which(tcn<0))>0) {
tcn[which(tcn<0)] <- -tcn[which(tcn<0)]
}
X <- tcn*(1 - baf)
Y <- tcn*baf
if (nrow(seg)>0) {
output <- c(X, Y)
} else {
output <- rep(NA, 2)
}
output
}))
colnames(res) <- c('X', 'Y')
res
})
data.mixed <- as.data.frame(Reduce('+', lapply(1:length(d2), function(k) {psi[k]*d2[[k]]})))
notna <- which(sapply(1:nrow(data.mixed), function(x) {length(which(is.na(data.mixed[x,]))) == 0}))
for (j in 1:length(d2)) {
d2[[j]] <- d2[[j]][notna,]
}
data.mixed <- data.mixed[notna,]
starts <- coords$start[notna]
ends <- coords$end[notna]
chrs <- coords$chr[notna]
data.mixed$chr <- chrs
data.mixed$seg <- 1:length(notna)
data.mixed$LRR <- log10((data.mixed$X + data.mixed$Y)/2)
data.mixed$BAF <- data.mixed$X/(data.mixed$X + data.mixed$Y)
data.mixed$markers <- unlist(sapply(1:22,function(i) {
pos.chr <- pos[pos$Chr==i,]
data.chr <- data.mixed[data.mixed$chr==i,]
sapply(1:nrow(data.chr),function(j) {
length(which(pos.chr >= starts[which(data.mixed$chr==i)[j]] & pos.chr <= ends[which(data.mixed$chr==i)[j]]))
})
}))
data.mixed$start <- starts
data.mixed$end <- ends
data.mixed <- data.mixed[,c(3,4,5,6,1,2,7,8,9)]
clones <- lapply(1:length(d2),function(k) {
df <- data.frame('A'=rep(1,nrow(d2[[1]])),'B'=rep(1,nrow(d2[[1]])))
colnames(df) <- c('A','B')
df$chr <- data.mixed$chr
df$start <- starts
df$end <- ends
df$seg <- 1:length(starts)
df$parent.index <- rep(0, length(starts))
df$markers <- data.mixed$markers
df <- df[,c(3,4,5,1,2,6,7,8)]
list('cn'=df, 'seq'=NULL)
})
for (k in 2:nrow(clones[[1]]$cn)) {
bin1 <- data.mixed$X[k]==data.mixed$X[k-1] & data.mixed$Y[k]==data.mixed$Y[k-1]
bin2 <- clones[[1]]$cn$chr[k]==clones[[1]]$cn$chr[k-1]
if (is.na(bin1)) {
bin1 <- FALSE
}
if (bin1==TRUE & bin2==TRUE) {
for (l in 1:length(clones)) {
clones[[l]]$cn$start[k] <- clones[[l]]$cn$start[k-1]
clones[[l]]$cn$markers[k] <- clones[[l]]$cn$markers[k] + clones[[l]]$cn$markers[k-1]
clones[[l]]$cn[k-1,] <- rep(NA, ncol(clones[[1]]$cn))
}
data.mixed$start[k] <- data.mixed$start[k-1]
data.mixed$markers[k] <- data.mixed$markers[k-1]
data.mixed[k-1,] <- rep(NA, ncol(data.mixed))
}
}
notna <- unique(unlist(lapply(1:length(clones),function(j) {which(!is.na(clones[[j]]$cn$seg))})))
for (k in 1:length(clones)) {
clones[[k]]$cn <- clones[[k]]$cn[notna,]
clones[[k]]$cn$seg <- rownames(clones[[k]]$cn) <- 1:nrow(clones[[k]]$cn)
clones[[k]]$cn
}
data.mixed <- data.mixed[notna,]
dat <- list('cn.data'=data.mixed, 'seq.data'=NULL)
dat$cn.data <- na.omit(dat$cn.data)
dat$cn.data$seg <- rownames(dat$cn.data) <- 1:nrow(dat$cn.data)
if (alter) {
alt.pools <- lapply(1:length(psi[which(psi>0)]),function(j) {
x <- dat$cn.data$X
y <- dat$cn.data$Y
markers <- dat$cn.data$markers
which(markers>1000 & x-shifts[j]/2>.95 & x-shifts[j]/2<1.05 & y-shifts[j]/2>.95 & y-shifts[j]/2<1.05)
})
altN <- sample(1:3,length(psi[which(psi>0)]),prob=c(.6,.3,.1),replace=TRUE)
altered <- lapply(1:length(psi[which(psi>0)]),function(j) {sample(alt.pools[[j]],altN[j])})
change <- lapply(1:length(psi[which(psi>0)]),function(j) {sample(c(-1,1),altN[j],replace=TRUE)})
for (l in 1:length(psi[which(psi>0)])) {
if (length(altered[[l]])>0) {
for (m in 1:length(altered[[l]])) {
allele <- sample(c(1,2),1)
clones[[l]]$cn[altered[[l]][[m]],allele+3] <-
clones[[l]]$cn[altered[[l]][[m]],allele+3] + change[[l]][[m]]
dat$cn.data[altered[[l]][[m]],allele+4] <- dat$cn.data[altered[[l]][[m]],allele+4] +
change[[l]][[m]]*psi[l]
if (dat$cn.data[altered[[l]][[m]],allele+4] < 0) {
dat$cn.data[altered[[l]][[m]],allele+4] <- -dat$cn.data[altered[[l]][[m]],allele+4]
}
}
}
}
}
tumor <- list('clones'=clones, 'psi'=psi, 'altered'=altered,'change'=change)
save(dat, file=paste(dataPath, '/mixdat', '-', index, '.rda', sep=''))
save(tumor, file=paste(mixPath, '/mixsim', '-', index, '.rda', sep=''))
}
generateMixtures <- function(dataPath, mixPath, nPerK, segmentedData, ID_pool, pos) {
threshold <- 0.04
psis <- Reduce(rbind, lapply(1:5, function(x) {
mat <- sampleSimplex(nPerK[x], x)
cbind(mat, matrix(0, nrow = nrow(mat), ncol = 5 - ncol(mat)))
}))
IDs <- lapply(1:nrow(psis), function(j) {
sample(ID_pool, length(which(psis[j,]>0)), replace=FALSE)
})
if (!dir.exists(mixPath)) {
dir.create(mixPath)
}
if (!dir.exists(dataPath)) {
dir.create(dataPath)
}
metadata <- list(IDs = IDs, psis = psis)
save(metadata, file=paste(mixPath, '/metadata.rda', sep=''))
sapply(1:nrow(psis), function(i) {
temp <- lapply(1:length(IDs[[i]]), function(j) {
segmentedData[segmentedData$SamID == IDs[[i]][j],]
})
mix(temp , psis[i,], dataPath, mixPath, i, pos, alter=TRUE)
})
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.