library(Biostrings)
options(scipen=999)
em_dat <- report_dat$em_dat
attach(em_dat)

Motif Searches

Sys.time()

Input parameters for the searches

print(prefix)
print(suffix)
print(motif_length)

Matched vs Unmatched sequences

n_matched <- length(motif_dat$matched_seq)
n_unmatched <- length(motif_dat$unmatched_seq)
n_total <- n_matched + n_unmatched
kable(data.frame(seq_matched_count = n_matched,
                 seq_matched_perc = 100*round(n_matched / n_total, 4),
                 seq_unmatched_count = n_unmatched,
                 seq_unmatched_perc = 100*round(n_unmatched / n_total, 4)))

Matched Sequences

Summary of sequences

print(motif_dat$matched_seq)

The letter frequencies

Counts of the letters in the matched sequences.

x <- apply(consensusMatrix(motif_dat$matched_seq), 1, sum)
x <- x[x != 0]
kable(data.frame(letter = names(x),
                 count = x,
                 count_per_seq = round(x/length(motif_dat$matched_seq),2),
                 row.names = 1:length(x)))

Normalized counts of the letters in the matched sequences.

x_freq <- round(x/sum(x), 4)
kable(data.frame(letter = names(x_freq),
                 count = x_freq,
                 row.names = 1:length(x_freq)))
barplot(x/sum(x),
        main = 'Normalized counts of the letters\nin the matched sequences')

The sequence lengths

hist(width(motif_dat$matched_seq),
     main = "Histogram of the lengths of the matched sequences",
     xlab = "Sequence Length",
     ylab = "Number of Sequences")

Unmatched Sequences

Summary of sequences

print(motif_dat$unmatched_seq)

The letter frequencies

Counts of the letters in the unmatched sequences.

x <- apply(consensusMatrix(motif_dat$unmatched_seq), 1, sum)
x <- x[x != 0]
kable(data.frame(letter = names(x),
                 count = x,
                 count_per_seq = round(x/length(motif_dat$unmatched_seq),2),
                 row.names = 1:length(x)))

Normalized counts of the letters in the unmatched sequences.

x_freq <- round(x/sum(x), 4)
kable(data.frame(letter = names(x_freq),
                 count = x_freq,
                 row.names = 1:length(x_freq)))
barplot(x/sum(x),
        main = 'Normalized counts of the letters\nin the unmatched sequences')

The sequence lengths

hist(width(motif_dat$unmatched_seq),
     main = "Histogram of the lengths of the unmatched sequences",
     xlab = "Sequence Length",
     ylab = "Number of Sequences")


philliplab/MotifBinner documentation built on Sept. 2, 2020, 11:41 a.m.