R/LoadBlasrBam.R

Defines functions LoadBlasrBam

# Load SAM/BAM files generated by Blasr alignment
LoadBlasrBam <- function(fn, redo=FALSE) {
  require(awsomics);
  require(Rsamtools);
  require('GenomicAlignments');
  
  f0 <- TrimPath(fn);
  pth <- sub(paste0(f0, '$'), '', fn);
  f1 <- sub('.sam$', '', f0, ignore.case = TRUE);
  
  flds <- c(f1 = 'qname', f2 = 'flag', f3 = 'rname', f4 = 'pos', f5 = 'mapq', f6 = 'cigar', 
            f14 = 'qe', f15 = 'qs', f17 = 'as'); 
  
  f2 <- sapply(names(flds), function(nm) {
    f <- paste0(pth, '/', f1, '_', flds[nm], '.txt');
    f <- sub('//', '/', f);
    if (!file.exists(f) | redo) {
      sh <- paste0('grep -v "^@" ', fn, ' | cut -', nm, ' > ', f); 
      system(sh);
    };
    f; 
  }); 
  
  f3 <- sub('sam$', 'bam', fn, ignore.case = TRUE);
  if (!file.exists(f3) | redo) asBam(fn, overwrite=TRUE, indexDestination=FALSE);
  
  bam <- readGAlignments(f3);
  
  gr <- GRanges(as.vector(seqnames(bam)), IRanges(start(bam), end(bam)), strand=as.vector(strand(bam)));
  
  flds <- flds[flds!='pos' & flds!='rname' & flds!='cigar'];
  meta <- lapply(names(flds), function(nm) {
    cat('Processing field:', flds[nm], '\n');
    l <- readLines(f2[nm]);
    
    if (tolower(flds[nm])[1] == 'qname') {
      x <- strsplit(l, '/');
      x <- do.call('rbind', x);
      while (length(unique(x[, 1]))==1) x <- x[, -1];
      
      loc <- strsplit(x[, ncol(x)], '_');
      loc <- do.call('rbind', loc);
      ss <- as.integer(loc[, 1])+1;
      se <- as.integer(loc[, 2]);
      
      hole <- x[, 1];
      qname <- paste(hole, x[, ncol(x)], sep='/');
      if (ncol(x) > 2) for (i in 2:(ncol(x)-1)) hole <- paste(hole, x[, i], sep='/');
      data.frame(qname=qname, hole=hole, ss=ss, se=se, stringsAsFactors = FALSE);
    } else if (tolower(flds[nm])[1] == 'flag') {
      data.frame(flag=as.integer(l), stringsAsFactors = FALSE);
    } else if (tolower(flds[nm])[1] == 'mapq') {
      data.frame(mapq=as.integer(l), stringsAsFactors = FALSE);
    } else if (tolower(flds[nm])[1] == 'qs') {
      data.frame(qs=as.integer(sub('qs:i:', '', l))+1, stringsAsFactors = FALSE);
    } else if (tolower(flds[nm])[1] == 'qe') {
      data.frame(qe=as.integer(sub('qe:i:', '', l)), stringsAsFactors = FALSE);
    } else if (tolower(flds[nm])[1] == 'as') {
      data.frame(as=as.integer(sub('AS:i:', '', l)), stringsAsFactors = FALSE);
    } 
  }); 
  meta <- do.call('cbind', meta);
  meta <- meta[, c('qname', 'hole', 'ss', 'se', 'qs', 'qe', 'as', 'flag', 'mapq')];
  elementMetadata(gr) <- meta;
  names(gr) <- rownames(meta) <- 1:length(gr); 
  
  saveRDS(meta, paste0(pth, '/', f1, '_meta.rds'));
  saveRDS(gr, paste0(pth, '/', f1, '_all.rds'));
}
zhezhangsh/RH documentation built on May 7, 2020, 6:32 p.m.