R/addInvitedSeq.R

Defines functions addInvitedSeq

Documented in addInvitedSeq

#' A function to update barcode summaries with a selected set of DNA sequences (e.g. in case of big rearrangement in barcode sequence).
#'
#' @param pat The barcode backbone pattern for matching.
#' @param bc_data The data returned by the function generateSummaries().
#' @param dir The directory containing the summaries.
#' @param invited_idx The list of invited sequences that missed index matching (A table with matching sequences and sample name. one sequence per line).
#' @param invited_bc The list of invited sequences that missed barcode matching (one sequence per line).
#' @details This function updates the summaries generated by generateSummaries() with a set of provided sequences.
#' @keywords BC32 processing update
#' @export
#' @examples
#' addInvitedSeq(pat, bc_data, getwd(), invited_idx, invited_bc)

addInvitedSeq <- function(pat,
                          bc_data,
                          dir, # directory containing the summaries
                          invited_idx = NULL,
                          invited_bc = NULL) {

  if(!is.null(invited_idx)) invited_idx <- unique(invited_idx)
  if(!is.null(invited_bc))invited_bc <- unique(invited_bc)

  # add invited indexes
  if(!is.null(invited_idx)) {
    for (i in 1:nrow(invited_idx)) {
      seq <- DNAStringSet(invited_idx[i,1])
      name <- invited_idx[i, 2]
      hits <- length(grep(seq, bc_data[[name]]$no_index))
      # extract barcode sequence
      seq <- narrow(seq, 1, 90)
      match <- vmatchPattern(pat, seq, length(strsplit(pat, 'N')[[1]]) + bb_mis)
      seq <- seq[match]
      # add hits in existing table
      table <- read.delim(paste0(dir, '/', name, '_barcode_summary.txt'), stringsAsFactors = F)
      table <- rbind(table, data.frame(seq = seq, n = hits))
      table <- arrange(table, desc(n))
      write.table(table, paste0(dir, '/', name, '_barcode_summary.txt'), sep='\t', row.names = F)
      # write stats
      before <- 100 * as.numeric(bc_data[[name]]$QC_stats[4,2])/as.numeric(bc_data[[name]]$QC_stats[2,2])
      after <- 100 * (as.numeric(bc_data[[name]]$QC_stats[4,2]) + hits)/as.numeric(bc_data[[name]]$QC_stats[2,2])
      write(paste0('Reads matching index increased from ', round(before, 2), ' % to ', round(after, 2), ' %'), paste0(dir, '/', name, '_invited_idx_stats.txt'))
      }
  }

  # add invited barcodes
  if(!is.null(invited_bc)) {
    for (i in 1:length(bc_data)) {
      name <- names(bc_data)[i]
      subject <- bc_data[[i]]$no_bc[,1]
      to_add <- NULL
      for (j in 1:nrow(invited_bc)) {
        seq <- invited_bc[j,1]
        to_add <- rbind(to_add, c(seq, length(grep(seq, subject))))
      }
      to_add <- as.data.frame(to_add)
      colnames(to_add) <- c('seq', 'n')
      to_add$n <- as.numeric(as.character(to_add$n))
      to_add <- filter(to_add, n > 0)
      # merge with existing data
      table <- read.delim(paste0(dir, '/', name, '_barcode_summary.txt'), stringsAsFactors = F)
      table <- rbind(table, to_add)
      table <- arrange(table, desc(n))
      write.table(table, paste0(dir, '/', name, '_barcode_summary.txt'), sep='\t', row.names = F)
      # write stats
      before <- as.numeric(bc_data[[name]]$QC_stats[9,2])
      after <- 100 * ((before / 100) * (as.numeric(bc_data[[name]]$QC_stats[4,2])) + sum(to_add$n)) / as.numeric(bc_data[[name]]$QC_stats[4,2])
      write(paste0('Reads matching barcode increased from ', round(before, 2), ' % to ', round(after, 2), ' %'), paste0(dir, '/', name, '_invited_bc_stats.txt'))

    }
  }

}
vroh/BC32_BarSeq documentation built on Jan. 25, 2021, 9:24 p.m.