rpm2timing2:

Usage Arguments Examples

Usage

1
rpm2timing2(bgfiles, reference = 1, cores = "max", fractionhist = TRUE, scorehist = TRUE)

Arguments

bgfiles
reference
cores
fractionhist
scorehist

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (bgfiles, reference = 1, cores = "max", fractionhist = TRUE, 
    scorehist = TRUE) 
{
    library(parallel)
    if (cores == "max") {
        cores = detectCores() - 1
    }
    fractions = c("G1b", "G2", "S1", "S2", "S3", "S4")
    cat("checking file count\n")
    if (floor(length(bgfiles)/6) != ceiling(length(bgfiles)/6)) {
        stop("# of files not divisible by 6. missing a file?")
    }
    cat("checking file lines\n")
    if (length(unique(unlist(lapply(bgfiles, filelines)))) != 
        1) {
        stop("files have different numbers of lines")
    }
    cat("finding files for each cell line\n")
    cells <- unique(remove.suffix(remove.prefix(bgfiles[grep("G2", 
        bgfiles)], "Seq"), "G2"))
    print(cells)
    numcells <- length(cells)
    rb = rainbow(numcells)
    cellfiles <- lapply(1:numcells, function(x) bgfiles[grep(cells[x], 
        bgfiles)])
    print(cellfiles)
    outnames <- paste(cells, "_weightedscore.bg", sep = "")
    finalnames <- paste(cells, "_iqr_qnorm.bg", sep = "")
    cat("checking files for each cell line\n")
    numpercell = unique(unlist(lapply(cellfiles, length)))
    if (length(numpercell) != 1 | numpercell[1] != 6) {
        print(cellfiles)
        stop("each cell line must match only 6 files")
    }
    checko <- lapply(1:numcells, function(x) {
        lapply(1:6, function(y) {
            if (grepl(fractions[y], cellfiles[[x]][y]) == FALSE) {
                stop(paste("cell files do not match expected fraction for", 
                  cells[x], cellfiles[[x]][y]))
            }
        })
    })
    coords <- read.delim(pipe(paste("cut -f 1,2,3", cellfiles[[1]][1])), 
        header = F)
    cat("getting scores\n")
    celldata <- mclapply(1:numcells, function(x) {
        cd <- as.data.frame(lapply(1:6, function(y) {
            as.numeric(readLines(pipe(paste("cut -f 4", cellfiles[[x]][y]))))
        }))
        colnames(cd) <- fractions
        return(cd)
    }, mc.cores = cores)
    celldata <- as.data.frame(mclapply(1:numcells, function(x) {
        cd <- celldata[[x]]
        wa <- (0.917 * cd$G1b) + (0.75 * cd$S1) + (0.583 * cd$S2) + 
            (0.417 * cd$S3) + (0.25 * cd$S4) + (0 * cd$G2)
        wa[which(wa == 0)] <- NA
        return(wa)
    }, mc.cores = cores), stringsAsFactors = FALSE)
    colnames(celldata) <- cells
    numwindows <- nrow(celldata)
    goodblocks <- which(complete.cases(celldata))
    cat("discarding", numwindows - length(goodblocks), "windows that have no data in at least 1 sample (", 
        (numwindows - length(goodblocks))/numwindows, "% )\n")
    celldata <- celldata[goodblocks, ]
    coords <- coords[goodblocks, ]
    reference <- rowMeans(as.data.frame(lapply(celldata, sort)))
    celldata <- as.data.frame(mclapply(1:numcells, function(x) {
        celldata[, x][order(celldata[, x])] <- reference
        return(celldata[, x])
    }, mc.cores = cores))
    celldata <- ((celldata - median(celldata, na.rm = TRUE))) * 
        1.59/IQR(unlist(celldata), na.rm = TRUE)
    celldata <- mclapply(1:numcells, function(x) {
        write.tsv(cbind(coords, celldata[, x], stringsAsFactors = FALSE), 
            file = finalnames[x])
    }, mc.cores = cores)
  }

dvera/arthur documentation built on June 5, 2019, 5:12 a.m.