motif.list.p: a function to calculate rank correlation p-values for a set...

Usage Arguments Examples

View source: R/motif.list.p.R

Usage

1
motif.list.p(seqlist, motiflist, cores = 1, mode = "bb", order = 1, exact = TRUE, overlap = TRUE)

Arguments

seqlist

A list of ranked sequences in which the motif rank correlation is calculated. The sequence list should be a list object containing 'seq' objects or character strings.

motiflist

A list of motifs (string objects)

cores

Number of cores to use in parallel.

mode

One of "bb", "rw" or "msr" depending on the correlation evaluation method.

order

1 or 2 for mono-nucleotide or di-nucleotide dependency of sequence specific p-value calculation.

exact
overlap

Logical, are motifs allowed to overlap in the sequences.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (seqlist, motiflist, cores = 1, mode = "bb", order = 1, 
    exact = TRUE, overlap = TRUE) 
{
    ptm <- proc.time()
    if (!(mode %in% c("bb", "mw", "rw", "mhg"))) {
        stop("mode should be one of 'bb'(default), 'mw' 'rw' or 'mhg'")
    }
    if (mode == "mhg") {
        length.ml <- length(motiflist)
        cat("preparing ", length.ml, " motifs", "\n")
        cat(paste("start ", Sys.time()), "\n")
        if (is.list(motiflist)) 
            motiflist <- unlist(mclapply(motiflist, function(x) x$pattern, 
                mc.cores = cores))
        cat(paste("\n", "end ", Sys.time()), "\n\n")
        cat("preparing ", length(seqlist), " sequences", "\n")
        if (is.list(seqlist)) 
            seqlist <- unlist(mclapply(seqlist, function(x) x$sequence, 
                mc.cores = cores))
        cat(paste("\n", "end ", Sys.time()), "\n\n")
        cat("calculating probabilities...", "\n")
        cat(paste("start ", Sys.time()), "\n")
        cat("progress... ")
        vec2 <- rep(NA, length.ml)
        idx <- 0
        end <- 0
        if (length.ml < 100) {
            counter <- c(30, 60, 100)
        }
        else counter <- c(1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 
            80, 90, 100)
        for (i in counter) {
            idx <- idx + 1
            start <- end + 1
            end <- round(i/100 * length.ml, 0)
            vec <- unlist(mclapply(motiflist[start:end], function(x) mhg.motif.p(seqlist = seqlist, 
                motif = x, ...), mc.cores = cores))
            cat(i, "%.. ", sep = "")
            vec2[start:end] <- vec
        }
        names(vec2) <- motiflist
        cat(paste("\n", "finished ", Sys.time()), "  ,  total time (s): ", 
            proc.time()[3] - ptm[3], "\n")
        return(vec2)
    }
    if (class(seqlist[[1]]) == "character") {
        cat("preparing sequences", "\n")
        seqlist <- seq.list.con(seqlist, cores = cores)
    }
    if (class(seqlist[[1]]) != "seq") {
        stop("sequences should be character vector or 'seq' object")
    }
    length.ml <- length(motiflist)
    if (class(motiflist) == "character") {
        cat("preparing ", length.ml, " motifs", "\n")
        cat(paste("start ", Sys.time()), "\n")
        cat("progress... ")
        motiflist3 <- list()
        idx <- 0
        end <- 0
        if (length.ml < 100) {
            counter <- c(30, 60, 100)
        }
        else counter <- c(1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 
            80, 90, 100)
        for (i in counter) {
            idx <- idx + 1
            start <- end + 1
            end <- round(i/100 * length.ml, 0)
            cat(i, "%.. ", sep = "")
            motiflist2 <- mclapply(motiflist[start:end], function(motif) pat.con(motif), 
                mc.cores = cores)
            motiflist3 <- append(motiflist3, motiflist2)
        }
    }
    if (class(motiflist[[1]]) == "pattern") {
        motiflist3 <- motiflist
        motiflist <- unlist(mclapply(motiflist3, function(x) x$pattern, 
            mc.cores = cores))
    }
    cat(paste("\n", "end ", Sys.time()), "\n\n")
    if (class(motiflist3[[1]]) != "pattern") {
        stop("motif should be character vector or list of 'pattern' objects")
    }
    cat("calculating probabilities...", "\n")
    cat(paste("start ", Sys.time()), "\n")
    cat("progress... ")
    vec2 <- rep(NA, length.ml)
    end <- 0
    if (length.ml < 100 & length.ml > cores) {
        counter <- c(30, 60, 100)
    }
    if (length.ml > 100) {
        counter <- c(1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 
            90, 100)
    }
    if (length.ml < (cores + 1)) {
        counter <- 100
    }
    for (i in counter) {
        start <- end + 1
        end <- round(i/100 * length.ml, 0)
        vec <- unlist(mclapply(motiflist3[start:end], function(x) motif.p(seqlist, 
            x, mode = mode, order = order, exact = exact, overlap = overlap), 
            mc.cores = cores))
        cat(i, "%.. ", sep = "")
        vec2[start:end] <- vec
    }
    names(vec2) <- motiflist
    cat(paste("\n", "finished ", Sys.time()), "  ,  total time (s): ", 
        proc.time()[3] - ptm[3], "\n")
    return(vec2)
  }

muhligs/Regmex documentation built on Sept. 5, 2020, 1:11 a.m.