repliseqwindowsize:

Usage Arguments Examples

Usage

1
repliseqwindowsize(early, late, scalar = "auto", genome = "hg19", stepsize = 1000, windowsizes = 1000 * 2^(0:10), cores = "max")

Arguments

early
late
scalar
genome
stepsize
windowsizes
cores

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (early, late, scalar = "auto", genome = "hg19", stepsize = 1000, 
    windowsizes = 1000 * 2^(0:10), cores = "max") 
{
    library(parallel)
    if (cores == "max") {
        cores = detectCores() - 1
    }
    numwins <- length(windowsizes)
    rb <- rainbow(numwins)
    cat("windowing genome\n")
    windowfiles <- unlist(mclapply(1:numwins, function(x) bed.makewindows("hg19", 
        genome = TRUE, windowsize = windowsizes[x], stepsize = stepsize, 
        outname = paste(early, windowsizes[x], "windows", sep = "")), 
        mc.cores = cores))
    cat("counting windows\n")
    windowcounts <- unlist(mclapply(windowfiles, filelines, mc.cores = cores))
    print(windowcounts)
    cat("calculating windowed coverage for early\n")
    ecovs <- unlist(mclapply(1:numwins, function(x) bed.windowcov(early, 
        windowbed = windowfiles[x], windowsize = windowsizes[x], 
        stepsize = stepsize, genome = genome, scalar = scalar), 
        mc.cores = cores))
    cat("calculating windowed coverage for late\n")
    lcovs <- unlist(mclapply(1:numwins, function(x) bed.windowcov(late, 
        windowbed = windowfiles[x], windowsize = windowsizes[x], 
        stepsize = stepsize, genome = genome, scalar = scalar), 
        mc.cores = cores))
    ecovs <- gsub("bw", "bg", ecovs)
    lcovs <- gsub("bw", "bg", lcovs)
    cat("loading coverages\n")
    cat("loading data\n")
    covs <- mclapply(ecovs, read.tsv, mc.cores = cores)
    covs <- mclapply(1:numwins, function(x) {
        covs[[x]]$V5 <- as.numeric(readLines(pipe(paste("cut -f 4", 
            lcovs[x]))))
        covs[[x]]$V6 <- log2(covs[[x]][, 4]/covs[[x]][, 5])
        covs[[x]]$V6[is.infinite(covs[[x]][, 6])] <- NA
        covs[[x]]
    }, mc.cores = cores)
    cat("counting and plotting zeroes\n")
    earlyzeroes <- (mclapply(1:numwins, function(x) which(covs[[x]][, 
        4] == 0), mc.cores = cores))
    numearlyzeroes <- unlist(lapply(earlyzeroes, length))
    latezeroes <- (mclapply(1:numwins, function(x) which(covs[[x]][, 
        5] == 0), mc.cores = cores))
    numlatezeroes <- unlist(lapply(latezeroes, length))
    bothzeroes <- (mclapply(1:numwins, function(x) which(covs[[x]][, 
        4] == 0 & covs[[x]][, 5] == 0), mc.cores = cores))
    numbothzeroes <- unlist(lapply(bothzeroes, length))
    pdf(file = paste(basename(removeext(early)), "_noscores-vs-windowsize.pdf", 
        sep = ""))
    plot(0, type = "n", xlab = "windowsize (bp)", ylab = "% windows with no reads", 
        xlim = c(0, 128000), ylim = c(0, 50))
    abline(v = windowsizes, col = "grey70")
    points(windowsizes, 100 * numearlyzeroes/windowcounts, col = "blue", 
        lwd = 3)
    lines(windowsizes, 100 * numearlyzeroes/windowcounts, col = "blue", 
        lwd = 3)
    lines(windowsizes, 100 * numlatezeroes/windowcounts, col = "red", 
        lwd = 3)
    points(windowsizes, 100 * numlatezeroes/windowcounts, col = "red", 
        lwd = 3)
    lines(windowsizes, 100 * numbothzeroes/windowcounts, lwd = 3)
    points(windowsizes, 100 * numbothzeroes/windowcounts, lwd = 3)
    legend("topright", legend = c("early", "late", "both"), col = c("blue", 
        "red", "black"), lwd = 3)
    cat("calculating score distributions and plotting\n")
    equants <- unlist(mclapply(1:numwins, function(x) quantile(coverages[[x]][, 
        4], probs = 0.95, na.rm = TRUE), mc.cores = cores))
    equants <- 1000 * equants/windowsizes
    lquants <- unlist(mclapply(1:numwins, function(x) quantile(coverages[[x]][, 
        5], probs = 0.95, na.rm = TRUE), mc.cores = cores))
    lquants <- 1000 * lquants/windowsizes
    maxscore <- max(c(equants, lquants))
    earlydensities <- mclapply(1:numwins, function(x) density(1000 * 
        coverages[[x]][, 4]/windowsizes[x], na.rm = TRUE, from = 0, 
        to = maxscore), mc.cores = cores)
    latedensities <- mclapply(1:numwins, function(x) density(1000 * 
        coverages[[x]][, 5]/windowsizes[x], na.rm = TRUE, from = 0, 
        to = maxscore), mc.cores = cores)
    ratiodensities <- mclapply(1:numwins, function(x) density(coverages[[x]][, 
        6], na.rm = TRUE, from = -8, to = 8), mc.cores = cores)
    plot(0, type = "n", xlab = "RPM", ylab = "frequency of windows (kernel density estimate)", 
        main = "read density histograms for early fraction", 
        xlim = c(0, quantile(unlist(lapply(earlydensities, "[", 
            "x")), probs = 0.9)), ylim = c(0, max(unlist(lapply(earlydensities, 
            "[", "y")))))
    for (i in 1:numwins) {
        lines(earlydensities[[i]][["x"]], earlydensities[[i]][["y"]], 
            lwd = 3, col = rb[i])
    }
    legend("topright", legend = paste(windowsizes, "bp windows"), 
        col = rb, lwd = 3, lty = 1)
    plot(0, type = "n", xlab = "RPM", ylab = "frequency of windows (kernel density estimate)", 
        main = "read density histograms for late fraction", xlim = c(0, 
            quantile(unlist(lapply(earlydensities, "[", "x")), 
                probs = 0.9)), ylim = c(0, max(unlist(lapply(earlydensities, 
            "[", "y")))))
    for (i in 1:numwins) {
        lines(latedensities[[i]][["x"]], latedensities[[i]][["y"]], 
            lwd = 3, col = rb[i])
    }
    legend("topright", legend = paste(windowsizes, "bp windows"), 
        col = rb, lwd = 3, lty = 1)
    plot(0, type = "n", xlab = "RT score , log2(early/late)", 
        ylab = "frequency of windows (kernel density estimate)", 
        main = "RT score histograms", xlim = c(-8, 8), ylim = c(0, 
            max(unlist(lapply(ratiodensities, "[", "y")))))
    for (i in 1:numwins) {
        lines(ratiodensities[[i]][["x"]], ratiodensities[[i]][["y"]], 
            lwd = 3, col = rb[i])
    }
    legend("topright", legend = paste(windowsizes, "bp windows"), 
        col = rb, lwd = 3, lty = 1, cex = 0.75)
    latewherenoearly <- mclapply(1:numwins, function(x) {
        coverages[[x]][, 5][which(coverages[[x]][, 4] == 0)]
    }, mc.cores = cores)
    earlywherenolate <- mclapply(1:numwins, function(x) {
        coverages[[x]][, 4][which(coverages[[x]][, 5] == 0)]
    }, mc.cores = cores)
    dev.off()
  }

dvera/arthur documentation built on June 5, 2019, 5:12 a.m.