R/view_metabarcodes.R

Defines functions view_metabarcodes

## Requirement : 'DECIPHER'

view_metabarcodes = function(data, manual_data = NULL, data_primer = primer_data, fragment_data = fragment_tab, 
                             nb_sequences = 1, name_col_fragment_data, show_all = F){
  
  primers = unique(data$PRIMERS)
  
  for(j in 1:length(primers)){
    
    subset_data_primer = subset(data, PRIMERS == primers[j])
    
    subset_infos_primer = subset(data_primer, SHORT_NAME == primers[j])
    
    name_primer = primers[j]
    
    cases = table(subset_data_primer$PRIMER_FOUND)
    
    for(i in 1:length(cases)){
      
      random_accession = sample(subset(subset_data_primer, PRIMER_FOUND == names(cases)[i])$ACCESSION, nb_sequences)
      
      if(subset_infos_primer$PRIMER_USED == "2 x 1"){
        
        if(!is.null(manual_data)) aligned_sequences = AlignSeqs(DNAStringSet(c(subset(fragment_data, ACCESSION %in% random_accession)[[name_col_fragment_data]], 
                                                                               subset(manual_data, ACCESSION %in% random_accession & PRIMER_SET == name_primer)$SEQUENCES,
                                                                               subset(subset_data_primer, ACCESSION %in% random_accession)$FRAGMENT,
                                                                               paste(reverseComplement(DNAStringSet(subset_infos_primer$FORWARD))),
                                                                               subset_infos_primer$REVERSE)), verbose = F)
        
        else aligned_sequences = AlignSeqs(DNAStringSet(c(subset(fragment_data, ACCESSION %in% random_accession)[[name_col_fragment_data]], 
                                                          subset(subset_data_primer, ACCESSION %in% random_accession)$FRAGMENT,
                                                          paste(reverseComplement(DNAStringSet(subset_infos_primer$FORWARD))),
                                                          subset_infos_primer$REVERSE)), verbose = F)
        
      }
      
      else {
        
        if(!is.null(manual_data)) aligned_sequences = AlignSeqs(DNAStringSet(c(subset(fragment_data, ACCESSION %in% random_accession)[[name_col_fragment_data]], 
                                                                               subset(manual_data, ACCESSION %in% random_accession & PRIMER_SET == name_primer)$SEQUENCES,
                                                                               subset(subset_data_primer, ACCESSION %in% random_accession)$FRAGMENT,
                                                                               subset_infos_primer$FORWARD,
                                                                               paste(reverseComplement(DNAStringSet(subset_infos_primer$REVERSE))))), verbose = F)
        
        else aligned_sequences = AlignSeqs(DNAStringSet(c(subset(fragment_data, ACCESSION %in% random_accession)[[name_col_fragment_data]], 
                                                          subset(subset_data_primer, ACCESSION %in% random_accession)$FRAGMENT,
                                                          subset_infos_primer$FORWARD,
                                                          paste(reverseComplement(DNAStringSet(subset_infos_primer$REVERSE))))), verbose = F)
        
      }
      
      if(subset_infos_primer$PRIMER_USED == "2 x 1") {
        
        first_pos = which(strsplit(paste(aligned_sequences)[length(aligned_sequences)], "")[[1]] != "-")[1]
        
        last_pos = tail(which(strsplit(paste(aligned_sequences)[length(aligned_sequences) - 1], "")[[1]] != "-"), 1)
        
        subset_aligned_sequences = DNAStringSet(substr(paste(aligned_sequences), first_pos - 20, last_pos + 20))
        
      }
      
      else {
        
        first_pos = which(strsplit(paste(aligned_sequences)[length(aligned_sequences) - 1], "")[[1]] != "-")[1]
        
        last_pos = tail(which(strsplit(paste(aligned_sequences)[length(aligned_sequences)], "")[[1]] != "-"), 1)
        
        subset_aligned_sequences = DNAStringSet(substr(paste(aligned_sequences), first_pos - 20, last_pos + 20))
        
      }
      
      cat("------------------------------------------------------------------------------------------------------------------------------------------------------")
      
      cat("\n")
      
      cat(paste("Sequences with", names(cases)[i], "primer(s) for:", name_primer))
      
      cat("\n")
      
      cat("\n")
      
      print(subset_aligned_sequences)
      
      if(show_all) {
        
        BrowseSeqs(aligned_sequences)
        
        Sys.sleep(1)
        
      }
      
      cat("\n")
      
      cat("\n")
      
    }
    
    cat("------------------------------------------------------------------------------------------------------------------------------------------------------")
    
    cat("\n")
    
    cat("\n")
    
    cat(paste0("PERCENTAGE PRIMERS FOUND: ", round(nrow(subset(subset_data_primer, PRIMER_FOUND == "all")) / nrow(fragment_data) * 100, 2), "%"))
    
    cat("\n")
    
    cat("\n")
    
    cat(paste("ARTICLE LENGTH FOR", name_primer, ":", subset(data_primer, SHORT_NAME == name_primer)$ARTICLE_LENGTH))
    
    cat("\n")
    
    cat("\n")
    
    cat(paste("ZHANG LENGTH FOR", name_primer, ":", subset(data_primer, SHORT_NAME == name_primer)$ZHANG_LENGTH))
    
    cat("\n")
    
    cat("\n")
    
    cat("LENGTH WITH PRIMERS FOR", name_primer, ":")
    
    cat("\n")
    
    print(summary(subset_data_primer$LENGTH_WITH_PRIMERS))
    
    cat("\n")
    
    cat("\n")
    
    cat("LENGTH WITHOUT PRIMERS FOR", name_primer, ":")
    
    cat("\n")
    
    print(summary(subset_data_primer$LENGTH))
    
    cat("\n")
    
    cat("\n")
    
  }
  
}
Eliot-RUIZ/eDNAevaluation documentation built on Dec. 17, 2021, 6:25 p.m.