1 | rpm2timing2(bgfiles, reference = 1, cores = "max", fractionhist = TRUE, scorehist = TRUE)
|
bgfiles |
|
reference |
|
cores |
|
fractionhist |
|
scorehist |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | ##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function (bgfiles, reference = 1, cores = "max", fractionhist = TRUE,
scorehist = TRUE)
{
library(parallel)
if (cores == "max") {
cores = detectCores() - 1
}
fractions = c("G1b", "G2", "S1", "S2", "S3", "S4")
cat("checking file count\n")
if (floor(length(bgfiles)/6) != ceiling(length(bgfiles)/6)) {
stop("# of files not divisible by 6. missing a file?")
}
cat("checking file lines\n")
if (length(unique(unlist(lapply(bgfiles, filelines)))) !=
1) {
stop("files have different numbers of lines")
}
cat("finding files for each cell line\n")
cells <- unique(remove.suffix(remove.prefix(bgfiles[grep("G2",
bgfiles)], "Seq"), "G2"))
print(cells)
numcells <- length(cells)
rb = rainbow(numcells)
cellfiles <- lapply(1:numcells, function(x) bgfiles[grep(cells[x],
bgfiles)])
print(cellfiles)
outnames <- paste(cells, "_weightedscore.bg", sep = "")
finalnames <- paste(cells, "_iqr_qnorm.bg", sep = "")
cat("checking files for each cell line\n")
numpercell = unique(unlist(lapply(cellfiles, length)))
if (length(numpercell) != 1 | numpercell[1] != 6) {
print(cellfiles)
stop("each cell line must match only 6 files")
}
checko <- lapply(1:numcells, function(x) {
lapply(1:6, function(y) {
if (grepl(fractions[y], cellfiles[[x]][y]) == FALSE) {
stop(paste("cell files do not match expected fraction for",
cells[x], cellfiles[[x]][y]))
}
})
})
coords <- read.delim(pipe(paste("cut -f 1,2,3", cellfiles[[1]][1])),
header = F)
cat("getting scores\n")
celldata <- mclapply(1:numcells, function(x) {
cd <- as.data.frame(lapply(1:6, function(y) {
as.numeric(readLines(pipe(paste("cut -f 4", cellfiles[[x]][y]))))
}))
colnames(cd) <- fractions
return(cd)
}, mc.cores = cores)
celldata <- as.data.frame(mclapply(1:numcells, function(x) {
cd <- celldata[[x]]
wa <- (0.917 * cd$G1b) + (0.75 * cd$S1) + (0.583 * cd$S2) +
(0.417 * cd$S3) + (0.25 * cd$S4) + (0 * cd$G2)
wa[which(wa == 0)] <- NA
return(wa)
}, mc.cores = cores), stringsAsFactors = FALSE)
colnames(celldata) <- cells
numwindows <- nrow(celldata)
goodblocks <- which(complete.cases(celldata))
cat("discarding", numwindows - length(goodblocks), "windows that have no data in at least 1 sample (",
(numwindows - length(goodblocks))/numwindows, "% )\n")
celldata <- celldata[goodblocks, ]
coords <- coords[goodblocks, ]
reference <- rowMeans(as.data.frame(lapply(celldata, sort)))
celldata <- as.data.frame(mclapply(1:numcells, function(x) {
celldata[, x][order(celldata[, x])] <- reference
return(celldata[, x])
}, mc.cores = cores))
celldata <- ((celldata - median(celldata, na.rm = TRUE))) *
1.59/IQR(unlist(celldata), na.rm = TRUE)
celldata <- mclapply(1:numcells, function(x) {
write.tsv(cbind(coords, celldata[, x], stringsAsFactors = FALSE),
file = finalnames[x])
}, mc.cores = cores)
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.