coords.2.matrix:

Usage Arguments Examples

Usage

1
coords.2.matrix(coordfile, genomefile, outdir, windowsize = 10000, cores = "max")

Arguments

coordfile
genomefile
outdir
windowsize
cores

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (coordfile, genomefile, outdir, windowsize = 10000, 
    cores = "max") 
{
    library(parallel)
    library(gtools)
    if (cores == "max") {
        cores = detectCores() - 1
    }
    cat("finding chromosomes\n")
    chroms <- mixedsort(readLines(pipe(paste("cut -f 1", coordfile, 
        "| sort | uniq"))))
    chroms <- chroms[which(chroms != "chrM")]
    chroms <- chroms[which(chroms != "chrY")]
    numchroms <- length(chroms)
    chromsizes <- read.tsv(genomefile)
    if (sum(chroms %in% chromsizes[, 1]) != length(chroms)) {
        stop("one or more chromosomes not found in genome file")
    }
    chromsizes <- chromsizes[match(chroms, chromsizes[, 1]), 
        ]
    dir.create(outdir)
    outnames <- paste0(outdir, "/", chroms, "_win", windowsize, 
        ".mat")
    mclapply(1:numchroms, function(r) {
        cat("getting", chroms[r], "data\n")
        numchromwins <- floor(chromsizes[r, 2]/windowsize)
        brks <- (0:numchromwins) * windowsize
        coords <- read.delim(pipe(paste0("awk '($1==\"", chroms[r], 
            "\" && $3==\"", chroms[r], "\")' ", coordfile, " | cut -f 2,4")), 
            stringsAsFactors = F, header = F)
        coords <- coords[which(coords[, 1] <= brks[length(brks)] & 
            coords[, 2] <= brks[length(brks)]), ]
        a = coords[, 1]
        b = coords[, 2]
        cat("binning x-axis data\n")
        xbinind <- cut(a, brks, labels = F)
        cat("binning y-axis data\n")
        h <- mclapply(1:numchromwins, function(x) {
            hist(b[which(xbinind == x)], breaks = brks, plot = F)
        }, mc.cores = cores)
        cat("creating interaction matrix\n")
        binmat <- data.matrix(as.data.frame(lapply(h, "[[", 2)))
        cat("saving binmat\n")
        write.tsv(binmat, file = outnames[r])
    }, mc.cores = cores, mc.preschedule = F)
  }

dvera/arthur documentation built on June 5, 2019, 5:12 a.m.