1 | rpm2timing3(bgfiles, reference = 1, cores = "max")
|
bgfiles |
|
reference |
|
cores |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | ##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function (bgfiles, reference = 1, cores = "max")
{
library(parallel)
if (cores == "max") {
cores = detectCores() - 1
}
fractions = c("G1b", "G2", "S1", "S2", "S3", "S4")
cat("checking file count\n")
if (floor(length(bgfiles)/6) != ceiling(length(bgfiles)/6)) {
stop("# of files not divisible by 6. missing a file?")
}
cat("finding files for each cell line\n")
cells <- unique(remove.suffix(remove.prefix(bgfiles[grep("G2",
bgfiles)], "Seq"), "G2"))
print(cells)
numcells <- length(cells)
rb = rainbow(numcells)
cellfiles <- lapply(1:numcells, function(x) bgfiles[grep(cells[x],
bgfiles)])
ubgnames <- paste(cells, "_union.bg", sep = "")
outnames <- paste(cells, "_weightedscore.bg", sep = "")
finalnames <- paste(cells, "_iqr_qnorm.bg", sep = "")
cat("checking files for each cell line\n")
numpercell = unique(unlist(lapply(cellfiles, length)))
if (length(numpercell) != 1 | numpercell[1] != 6) {
print(cellfiles)
stop("each cell line must match only 6 files")
}
checko <- lapply(1:numcells, function(x) {
lapply(1:6, function(y) {
if (grepl(fractions[y], cellfiles[[x]][y]) == FALSE) {
stop(paste("cell files do not match expected fraction for",
cells[x], cellfiles[[x]][y]))
}
})
})
cat("making unionbg\n")
celldata <- mclapply(1:numcells, function(x) {
cd <- read.delim(pipe(paste("bedtools unionbedg -filler NA -i",
paste(cellfiles[[x]], collapse = " "))))
colnames(cd) <- c("chrom", "start", "stop", fractions)
goodrows <- which(rowSums(is.na(cd)) != 6)
numrows <- nrow(cd)
numgoodrows <- length(goodrows)
cat(numrows - numgoodrows, "windows have no data in at least 1 fraction (",
(numrows - numgoodrows)/numrows, "% )\n")
cd[is.na(cd)] <- 0
wa <- (0.917 * cd$G1b) + (0.75 * cd$S1) + (0.583 * cd$S2) +
(0.417 * cd$S3) + (0.25 * cd$S4) + (0 * cd$G2)
cd <- data.frame(V1 = cd$chrom, V2 = cd$start, V3 = cd$stop,
V4 = wa, stringsAsFactors = FALSE)
rm(wa)
return(cd)
print(head(cd))
}, mc.cores = cores)
ref <- unlist(lapply(1:numcells, function(x) celldata[[x]][,
4]))
cat("normalizing data\n")
celldata <- mclapply(1:numcells, function(x) {
cd <- celldata[[x]]
numpoints <- nrow(cd)
curref <- sample(ref, numpoints)
cd[, 4][order(cd[, 4])] <- curref[order(curref)]
cd[, 4] <- ((cd[, 4] - median(cd[, 4], na.rm = TRUE))) *
1.59/IQR(cd[, 4], na.rm = TRUE)
write.tsv(cd, file = finalnames[x])
return(cd)
}, mc.cores = cores)
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.