rpm2timing3:

Usage Arguments Examples

Usage

1
rpm2timing3(bgfiles, reference = 1, cores = "max")

Arguments

bgfiles
reference
cores

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (bgfiles, reference = 1, cores = "max") 
{
    library(parallel)
    if (cores == "max") {
        cores = detectCores() - 1
    }
    fractions = c("G1b", "G2", "S1", "S2", "S3", "S4")
    cat("checking file count\n")
    if (floor(length(bgfiles)/6) != ceiling(length(bgfiles)/6)) {
        stop("# of files not divisible by 6. missing a file?")
    }
    cat("finding files for each cell line\n")
    cells <- unique(remove.suffix(remove.prefix(bgfiles[grep("G2", 
        bgfiles)], "Seq"), "G2"))
    print(cells)
    numcells <- length(cells)
    rb = rainbow(numcells)
    cellfiles <- lapply(1:numcells, function(x) bgfiles[grep(cells[x], 
        bgfiles)])
    ubgnames <- paste(cells, "_union.bg", sep = "")
    outnames <- paste(cells, "_weightedscore.bg", sep = "")
    finalnames <- paste(cells, "_iqr_qnorm.bg", sep = "")
    cat("checking files for each cell line\n")
    numpercell = unique(unlist(lapply(cellfiles, length)))
    if (length(numpercell) != 1 | numpercell[1] != 6) {
        print(cellfiles)
        stop("each cell line must match only 6 files")
    }
    checko <- lapply(1:numcells, function(x) {
        lapply(1:6, function(y) {
            if (grepl(fractions[y], cellfiles[[x]][y]) == FALSE) {
                stop(paste("cell files do not match expected fraction for", 
                  cells[x], cellfiles[[x]][y]))
            }
        })
    })
    cat("making unionbg\n")
    celldata <- mclapply(1:numcells, function(x) {
        cd <- read.delim(pipe(paste("bedtools unionbedg -filler NA -i", 
            paste(cellfiles[[x]], collapse = " "))))
        colnames(cd) <- c("chrom", "start", "stop", fractions)
        goodrows <- which(rowSums(is.na(cd)) != 6)
        numrows <- nrow(cd)
        numgoodrows <- length(goodrows)
        cat(numrows - numgoodrows, "windows have no data in at least 1 fraction (", 
            (numrows - numgoodrows)/numrows, "% )\n")
        cd[is.na(cd)] <- 0
        wa <- (0.917 * cd$G1b) + (0.75 * cd$S1) + (0.583 * cd$S2) + 
            (0.417 * cd$S3) + (0.25 * cd$S4) + (0 * cd$G2)
        cd <- data.frame(V1 = cd$chrom, V2 = cd$start, V3 = cd$stop, 
            V4 = wa, stringsAsFactors = FALSE)
        rm(wa)
        return(cd)
        print(head(cd))
    }, mc.cores = cores)
    ref <- unlist(lapply(1:numcells, function(x) celldata[[x]][, 
        4]))
    cat("normalizing data\n")
    celldata <- mclapply(1:numcells, function(x) {
        cd <- celldata[[x]]
        numpoints <- nrow(cd)
        curref <- sample(ref, numpoints)
        cd[, 4][order(cd[, 4])] <- curref[order(curref)]
        cd[, 4] <- ((cd[, 4] - median(cd[, 4], na.rm = TRUE))) * 
            1.59/IQR(cd[, 4], na.rm = TRUE)
        write.tsv(cd, file = finalnames[x])
        return(cd)
    }, mc.cores = cores)
  }

dvera/arthur documentation built on June 5, 2019, 5:12 a.m.