R/csv_2_gen_2_sql.R

Defines functions csv_2_gen_2_sql

## Requirement: "DECIPHER"
csv_2_gen_2_sql = function(file_name, db_name, csv_modified = F, identifier_level = 0, seq_name = "identifier", show_db = F, overwrite = T){
  
  if(!(seq_name %in% c("accession", "identifier")))
    stop('The argument seq_name must be in "identifier" or "accession".')
  
  if(csv_modified) data = read.csv(paste0(file_name, ".csv"), header = T, sep = ";")
  
  else data = read.csv(paste0(file_name, ".csv"), header = T, sep = ",")
  
  if(length(setdiff(c("ACCESSION","SEQUENCES","SPECIES","GENUS","FAMILY","ORDER","CLASS","PHYLUM","KINGDOM","SUPERKINGDOM"), colnames(data))) != 0)
    stop('The data must contain such column: "ACCESSION", "SEQUENCES", "SPECIES", "GENUS", "FAMILY", "ORDER", "CLASS", "PHYLUM", "KINGDOM" & "SUPERKINGDOM".')
  
  data_gb = c(rbind(rep("LOCUS", nrow(data)),
                    paste0("ACCESSION   ", data$ACCESSION),
                    paste0("ORGANISM    ", data$SPECIES),  
                    paste0("            ", paste(data$GENUS, data$FAMILY, data$ORDER,
                                                 data$CLASS, data$PHYLUM, data$KINGDOM, 
                                                 data$SUPERKINGDOM, sep = ";")),
                    rep("ORIGIN", nrow(data)),
                    data$SEQUENCES, 
                    rep("//", nrow(data))))
  
  write.table(data_gb, file = paste0(file_name, ".gen"), row.names = F, col.names = F, quote = F)
  
  Seqs2DB(paste0(file_name, ".gen"), "GenBank", dbConnect(SQLite(), db_name), identifier = "", processors = NULL,
          fields = c(accession = "ACCESSION", rank = "ORGANISM"), replaceTbl = overwrite)
  
  IdentifyByRank(db_name, level = identifier_level, add2tbl = T)
  
  dbDisconnect(dbConnect(SQLite(), db_name))
  
  if(show_db) BrowseDB(db_name)
  
  return(SearchDB(db_name, nameBy = seq_name))
  
}
Eliot-RUIZ/eDNAevaluation documentation built on Dec. 17, 2021, 6:25 p.m.