## Requirement: "tibble" & "stringr"
gene_position = function(position_gb){
get_gene_position = function(data, search_pattern, type, name){
orientation = list()
position = lapply(data, function(x) grep(search_pattern, x$V1))
index_gene = lapply(data, function(x) grep(type, x$V1))
for(i in 1:length(index_gene)){ index_gene[[i]] = tail(index_gene[[i]][index_gene[[i]] < tail(position[[i]], 1)], 1)}
for(i in 1:length(data)){ position[[i]] = data[[i]][index_gene[[i]],] }
for(j in 1:length(data)){
if(length(grep("complement", position[[j]])) != 0) orientation[j] = "complement"
else orientation[j] = "normal"
}
position = lapply(position, function(x) gsub("join\\(|,|complement\\(|\\)|>|<", "", x))
position = unlist(lapply(position, function(x) ifelse(length(x) == 0, NA, x)))
start_position = as.numeric(word(word(position, -1), 1, sep = fixed("..")))
end_position = as.numeric(word(word(position, -1), -1, sep = fixed("..")))
output = tibble(data.frame(start_position, end_position, unlist(orientation)))
colnames(output) = paste(c("START", "END", "ORIENTATION"), name, sep = "_")
return(output)
}
colnames(position_gb) = "V1"
position_splited = split(position_gb, findInterval(1:nrow(position_gb), which(grepl("LOCUS", position_gb$V1)) + 1))[-1]
position_12S = suppressWarnings(get_gene_position(position_splited, '12S ribosomal RNA"|12S ribosomal RNA subunit"|12 ribosomal RNA"|12S rivbosomal RNA"|small subunit ribosomal RNA"|s-RNA"|s-rRNA"|rrnS"|12S rRNA"|12SrRNA"|12S"|small ribosomal RNA|12 rRNA"', "rRNA |gene |mRNA ", "12S"))
position_16S = suppressWarnings(get_gene_position(position_splited, '16S ribosomal RNA"|16S ribosomal RNA subunit"|16S rivbosomal RNA"|16 ribosomal RNA"|16S ribosamal RNA"|large subunit ribosomal RNA"|l-RNA"|l-rRNA"|rrnL"|16S rRNA"|16S"|large ribosomal RNA|l6S ribosomal RNA"|16 rRNA"', "rRNA |gene |mRNA ", "16S"))
position_COI = suppressWarnings(get_gene_position(position_splited, 'oxidase subunit I"|ocidase subunit I"|oxidase subunit 1"|oxidase subunit-I"|oxidase subunit-1"|cox1"|CoxI"|oxidase I"|oxidase 1"|oxidase I;|COX1"|COI"|oxidase subunit idase subunit I"|oxidase subunits I"|oxidase subunits 1"|oxydase subunit 1"|oxydase subunit I"', "CDS |gene ", "COI"))
position_CytB = suppressWarnings(get_gene_position(position_splited, 'ytochrome b"|ytochorome b"|ytochrome-b"|ytochrome-B"|ytohrome b"|ytchorome b"|CYTB"|ytochome b"|cytB"|cytochrome b;|Cyt b"|cytb"|Cytb"|Cyt b"', "CDS |gene ", "CytB"))
accession = unlist(lapply(position_splited, function(x) x[which(grepl("VERSION", x$V1)), ]))
accession = tibble(data.frame(ACCESSION = word(word(accession, -2, sep = fixed(".")), -1)))
final_df = tibble(cbind(accession, position_12S, position_16S, position_COI, position_CytB))
return(list(COMPLETE_POSITIONS = final_df[rowSums(is.na(final_df)) == 0, ],
MISSING_POSITIONS = final_df[rowSums(is.na(final_df)) %in% c(1:7), ]))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.