#' grepReads greps for cDNA or gDNA reads cause they are both grouped togehter in the assembly input
#'
#' @param reads the ShortRead class object
#' @param type the pattern to look for
#' @export
#' @keywords internal
grepReads = function(reads, type="cDNA")
{
whichIsMRNA = ShortRead::id(reads) %>% as.character %>% grepl(type, .)
r = reads[whichIsMRNA] %>% sread
names(r) = ShortRead::id(reads[whichIsMRNA])
r
}
#' finds the location of the MDR on the contig
#'
#' finds the location of the MDR on the contig
#'
#' @param contigid the id of the contig
#' @param seq character string of the contig from the MSA
#' @param s starting loc
#' @param e ending loc
#' @export
mapContig = function(contigid, seq, s, e)
{
mdr = seq %>% as.character %>% substr(s, e) %>% gsub("-", "", .)
full = seq %>% gsub("-", "", .)
matchStart = regexpr(mdr, full) %>% as.integer
matchLength = regexpr(mdr, full) %>% attr("match.length")
matchEnd = matchStart + matchLength
data.frame(
id = contigid,
matchStart = matchStart,
matchEnd = matchEnd
)
}
#' Returns contig ranges for those captured within the MDR
#'
#' Header for contigs
#' contig00026 ref|YP_002288046.1| (ntRev) ## spanning:123 msaStart:653 msaEND:944 max10BPwindow:205.4 contigStart: 221 contigEnd: 421
#' Give GRanges output with seqname ko|contig00001
#'
#' @param ko the KO of interest
#' @param passDir path to pAss outputs
#' @importFrom GenomicRanges GRanges
#' @importFrom ShortRead readFasta
#' @export
#' @examples
#' \dontrun{
#' locsMDR = mdrRanges('K00927')
#' }
mdrRanges = function(ko, passDir)
{
mdrPath = sprintf("%s/pAss11/%s.fna", passDir, ko)
msaPath = sprintf("%s/pAss03/%s.msa", passDir, ko)
info = ShortRead::id(readFasta(mdrPath)) %>% head(n=1) %>% as.character
msa.starting = info %>% regexec("msaStart:(\\d+)",.) %>% regmatches(info, .) %>% unlist %>% tail(n=1) %>% as.integer
msa.ending = info %>% regexec("msaEND:(\\d+)",.) %>% regmatches(info, .) %>% unlist %>% tail(n=1) %>% as.integer
msa = readFasta(msaPath)
locs = mapply(mapContig, contigid = ShortRead::id(msa) %>% substr(1, 11), seq= lapply(sread(msa), function(x) x),
MoreArgs = list(s = msa.starting, e = msa.ending),
SIMPLIFY=FALSE
) %>% do.call(rbind,.) %>% tbl_df
mdrContigs = ShortRead::id(readFasta(mdrPath)) %>% as.character %>% substr(1, 11)
filter(locs, id %in% mdrContigs) %$% GRanges(seqnames = paste(ko, id, sep="|"), ranges = IRanges(matchStart, end=matchEnd))
}
#' blatting
#'
#'
#'
#' @param ko the ko of interest
#' @param type the
#' @param newblerInput path to newbler dir eg. /root in /root/K0000X/input/K0000X.1.fq
#' @param blatoutput path to store blat output in tabular blast format
#' @export
#'
#' @importFrom ShortRead readFastq
#' @importFrom GenomicRanges GRanges
#' @examples
#' \dontrun{
#'
#' }
blatting = function(ko, type, newblerInput)
{
combined = lapply(c(1,2), function(readNum){
read = sprintf("%s/%s/input/%s.%s.fq", newblerInput, ko, ko, readNum)
if(file.exists(read))
read.fq = read %>% readFastq %>% grepReads(type)
}) %>% do.call(c,.)
contigs = sprintf("%s/%s/454AllContigs.fna", newblerInput, ko)
blast8 = combined %>% map(contigs) #map is a metamapsDB function
blatRanges = GRanges(seqnames=blast8$subject, ranges=IRanges(blast8$newStart, blast8$newEnd), read=blast8$query)
}
#' overlaps
#' @param ko the ko of interest
#' @importFrom GenomicRanges countOverlaps
#' @importFrom dplyr tbl_df
#' @export
#' @examples
#' \dontrun{
#' mapReads2MDR('K00927', passDir="./out", newblerInput="./newbler2")
#' }
mapReads2MDR = function(ko, passDir, newblerInput)
{
ko = gsub("^ko:", "", ko)
cDNA = blatting(ko, type="cDNA", newblerInput)
gDNA = blatting(ko, type="gDNA", newblerInput)
komdrRanges = mdrRanges(ko, passDir)
data.frame(
name = as.character(seqnames(komdrRanges)),
count.gDNA = countOverlaps(komdrRanges, gDNA),
count.cDNA = countOverlaps(komdrRanges, cDNA),
ko = ko
) %>% tbl_df
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.