R/countReads.R

Defines functions countReads

Documented in countReads

#' Transcript abundance analysis counts and count ratios
#' @param data_dir the data directory.
#' @details Get the transcript read counts and ratios from the "sample_merged_trans_report.txt"
#' and "htsm_rtd_trans_report.txt" from the \code{data_dir}.
countReads <- function(input_dir){
  
  start.time <- Sys.time()
  
  ### Get reads of transcripts 
  file2read <- list.files(path = input_dir,
                          pattern = 'trans_reads.RData',
                          recursive = T,full.names = T)
  if(length(file2read)==0)
    stop('The file "trans_reads.RData" does not exist in the data_dir')
  load(file2read)
  
  #################################################################
  ### Get number of reads for transcripts after merge samples
  file2read <- list.files(path = input_dir,
                          pattern = 'sample_merged_trans_report.txt',
                          recursive = T,full.names = T)
  if(length(file2read)==0)
    stop('The file "sample_merged_trans_report.txt" does not exist in the data_dir')
  
  ### Transcript id after merge; get read counts of transcript from different samples
  sample_merged_trans_report <- readr::read_delim(file2read,col_select = c(1,8))
  sample_merged_trans_report <- tidyr::separate_rows(sample_merged_trans_report,all_source_trans,sep=',')
  sample_merged_trans_report$reads <- trans_reads[sample_merged_trans_report$all_source_trans]
  
  trans_reads_sm <- rowsum(x = sample_merged_trans_report$reads,
                           group = sample_merged_trans_report$transcript_id,
                           reorder = F)
  trans_reads_sm <- unlist(trans_reads_sm[,1])
  
  ### Get number of reads for transcripts after merge redundant in the RTD
  file2read <- list.files(path = input_dir,pattern = 'htsm_rtd_trans_report.txt',
                          recursive = T,full.names = T)
  if(length(file2read)==0)
    stop('The file "htsm_rtd_trans_report.txt')
  htsm_rtd_trans_report <- readr::read_delim(file2read,col_select = c(1,8))
  htsm_rtd_trans_report <- tidyr::separate_rows(htsm_rtd_trans_report,all_source_trans,sep=',')
  htsm_rtd_trans_report$all_source_trans <- gsub('sample_merged_filtered_','',
                                                 htsm_rtd_trans_report$all_source_trans)
  htsm_rtd_trans_report$reads <- trans_reads_sm[htsm_rtd_trans_report$all_source_trans]
    
  ### make data frame
  read_counts <- rowsum(x = htsm_rtd_trans_report$reads,
                        group = htsm_rtd_trans_report$transcript_id,
                        reorder = F)
  
  read_counts <- data.frame(gene_id=gsub('[.].*','',rownames(read_counts)),
                            transcript_id=rownames(read_counts),
                            counts=read_counts[,1],
                            row.names = NULL)
  read_counts$ps <- rowratio(x = read_counts$counts,
                             group = read_counts$gene_id,
                             reorder = F)
  # save(read_counts,file.path(output_dir,'.RData'))
  
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  message(paste('Time for analysis:',round(time.taken,3),attributes(time.taken)$units),'\n')
  return(read_counts)
}
wyguo/RTDBox documentation built on Jan. 31, 2023, 1:19 a.m.