R/gene_extraction.R

Defines functions gene_extraction

## Requirement: "tibble" & "DECIPHER"

gene_extraction = function(position_data, dna_data){
  
  if(nrow(position_verified) != length(world_dna_verified)) 
    stop("Both position data and dna data must contain exactly the same accession numbers.")
  
  position_data = position_data[order(position_data$ACCESSION),]
  
  dna_data = dna_data[order(names(dna_data))]
  
  fragment_12S = list()
  
  fragment_16S = list()
  
  fragment_COI = list()
  
  fragment_CytB = list()
  
  for(i in 1:length(dna_data)){
    
    if(position_data$START_12S[i] > position_data$END_12S[i]) {
      
      fragment_12S_temporary = paste0(substr(dna_data[i], position_data$START_12S[i], width(dna_data[i])), 
                                      substr(dna_data[i], 1, position_data$END_12S[i]))
      
      if(position_data$ORIENTATION_12S[i] == "complement") fragment_12S_temporary = paste(reverseComplement(DNAStringSet(fragment_12S_temporary)))
      
      fragment_12S[[i]] = fragment_12S_temporary
      
      names(fragment_12S[[i]]) = names(dna_data[i])
      
    }
    
    else {
      
      fragment_12S_temporary = substr(dna_data[i], position_data$START_12S[i], position_data$END_12S[i])
      
      if(position_data$ORIENTATION_12S[i] == "complement") fragment_12S_temporary = paste(reverseComplement(DNAStringSet(fragment_12S_temporary)))
      
      fragment_12S[[i]] = fragment_12S_temporary
      
      names(fragment_12S[[i]]) = names(dna_data[i])
      
    }
    
    if(position_data$START_16S[i] > position_data$END_16S[i]) {
      
      fragment_16S_temporary = paste0(substr(dna_data[i], position_data$START_16S[i], width(dna_data[i])), 
                                      substr(dna_data[i], 1, position_data$END_16S[i]))
      
      if(position_data$ORIENTATION_16S[i] == "complement") fragment_16S_temporary = paste(reverseComplement(DNAStringSet(fragment_16S_temporary)))
      
      fragment_16S[[i]] = fragment_16S_temporary
      
      names(fragment_16S[[i]]) = names(dna_data[i])
      
    }
    
    else {
      
      fragment_16S_temporary = substr(dna_data[i], position_data$START_16S[i], position_data$END_16S[i])
      
      if(position_data$ORIENTATION_16S[i] == "complement") fragment_16S_temporary = paste(reverseComplement(DNAStringSet(fragment_16S_temporary)))
      
      fragment_16S[[i]] = fragment_16S_temporary
      
      names(fragment_16S[[i]]) = names(dna_data[i])
      
    }
    
    if(position_data$START_COI[i] > position_data$END_COI[i]) {
      
      fragment_COI_temporary = paste0(substr(dna_data[i], position_data$START_COI[i], width(dna_data[i])), 
                                      substr(dna_data[i], 1, position_data$END_COI[i]))
      
      if(position_data$ORIENTATION_COI[i] == "complement") fragment_COI_temporary = paste(reverseComplement(DNAStringSet(fragment_COI_temporary)))
      
      fragment_COI[[i]] = fragment_COI_temporary
      
      names(fragment_COI[[i]]) = names(dna_data[i])
      
    }
    
    else {
      
      fragment_COI_temporary = substr(dna_data[i], position_data$START_COI[i], position_data$END_COI[i])
      
      if(position_data$ORIENTATION_COI[i] == "complement") fragment_COI_temporary = paste(reverseComplement(DNAStringSet(fragment_COI_temporary)))
      
      fragment_COI[[i]] = fragment_COI_temporary
      
      names(fragment_COI[[i]]) = names(dna_data[i])
      
    }
    
    if(position_data$START_CytB[i] > position_data$END_CytB[i]) {
      
      fragment_CytB_temporary = paste0(substr(dna_data[i], position_data$START_CytB[i], width(dna_data[i])), 
                                       substr(dna_data[i], 1, position_data$END_CytB[i]))
      
      if(position_data$ORIENTATION_CytB[i] == "complement") fragment_CytB_temporary = paste(reverseComplement(DNAStringSet(fragment_CytB_temporary)))
      
      fragment_CytB[[i]] = fragment_CytB_temporary
      
      names(fragment_CytB[[i]]) = names(dna_data[i])
      
    }
    
    else {
      
      fragment_CytB_temporary = substr(dna_data[i], position_data$START_CytB[i], position_data$END_CytB[i])
      
      if(position_data$ORIENTATION_CytB[i] == "complement") fragment_CytB_temporary = paste(reverseComplement(DNAStringSet(fragment_CytB_temporary)))
      
      fragment_CytB[[i]] = fragment_CytB_temporary
      
      names(fragment_CytB[[i]]) = names(dna_data[i])
      
    }
    
  }
  
  extracted_genes = tibble(data.frame(ACCESSION = names(unlist(fragment_12S)), FRAGMENT_12S = unlist(fragment_12S), 
                                      FRAGMENT_16S = unlist(fragment_16S), FRAGMENT_COI = unlist(fragment_COI), FRAGMENT_CytB = unlist(fragment_CytB)))
  
  return(extracted_genes)
  
}
Eliot-RUIZ/eDNAevaluation documentation built on Dec. 17, 2021, 6:25 p.m.