Nothing
#' Write a gff3 file from a tidy table
#'
#' @importFrom stats na.omit
#' @param feats tidy feat table
#' @param file name of output file
#' @param seqs a tidy sequence table to generate optional `##sequence-region`
#' directives in the header
#' @param type if no type column exists, use this as the default type
#' @param source if no source column exists, use this as the default source
#' @param score if no score column exists, use this as the default score
#' @param strand if no strand column exists, use this as the default strand
#' @param phase if no phase column exists, use this as the default phase
#' @param id_var the name of the column to use as the GFF3 `ID` tag
#' @param parent_var the name of the column to use as GFF3 `Parent` tag
#' @param ignore_attr attributes not to be included in GFF3 tag list. Defaults
#' to internals: `introns, geom_id`
#' @param head additional information to add to the header section
#' @return No return value, writes to file
#' @examples
#' filename <- tempfile(fileext = ".gff")
#' write_gff3(emale_genes, filename, emale_seqs, id_var = "feat_id")
#' @export
write_gff3 <- function(
feats, file, seqs = NULL, type = NULL, source = ".", score = ".", strand = ".", phase = ".",
id_var = "feat_id", parent_var = "parent_ids", head = "##gff-version 3", ignore_attr = c("introns", "geom_id")) {
if (!is.null(seqs)) {
if (!all(has_name(seqs, c("start", "end")))) {
if (!has_name(seqs, "length")) {
rlang::abort("eigher start/end or length required")
}
seqs <- mutate(seqs, start = 1, end = length)
}
seqs <- mutate(seqs, directive = "##sequence-region", end = .data$end - .data$start + 1, start = 1) %>%
select("directive", "seq_id", "start", "end") %>%
unite("seq_reg", 1:4, sep = " ")
}
require_vars(feats, c("seq_id", "start", "end"))
if (!has_name(feats, "type")) {
if (is.null(type)) rlang::abort("type required")
feats$type <- type
}
if (!has_name(feats, "score")) feats$score <- score
if (!has_name(feats, "phase")) feats$phase <- phase
if (!has_name(feats, "source")) feats$source <- source
if (!has_name(feats, "strand")) {
feats$strand <- strand
} else {
feats$strand <- strand_chr(feats$strand, na = ".")
}
# arrange so that predefined gff attributes come first and in fixed order
if (id_var %in% names(feats)) {
names(feats)[names(feats) == id_var] <- "ID"
id_tag <- "ID"
} else {
id_tag <- NULL
rlang::warn(paste("No ID variable detected. An ID tag is not required, but recommended.",
"Use `id_var` to specify an ID column with a different name",
sep = "\n"
))
}
if (parent_var %in% names(feats)) {
names(feats)[names(feats) == parent_var] <- "Parent"
}
gff3_attr <- c("ID", "Parent", "Name", "Alias", "Target", "Gap", "Derives_from", "Note", "Ontology_term")
gff3_cols <- c("seq_id", "source", "type", "start", "end", "score", "strand", "phase")
# compute starts/ends for CDS and cDNA_match from introns
feats <- unchop_cds(feats)
feats <- select(feats, -all_of(ignore_attr))
feats <- feats %>% mutate(
across(all_of(gff3_cols), ~ replace_na(.x, ".")),
across(where(is_list), function(l) purrr::map_chr(l, stringr::str_c, collapse = ","))
)
attr <- setdiff(names(feats), c(gff3_cols, id_tag))
if (!is.null(id_tag) || length(attr)) {
# convert attributes tags to title case, gff convention
attr_predef <- attr[na.omit(match(gff3_attr, str_to_title(attr)))]
attr_predef_ii <- names(feats) %in% attr_predef
names(feats)[attr_predef_ii] <- str_to_title(names(feats)[attr_predef_ii])
attr_custom <- setdiff(attr, attr_predef)
attr <- c(id_tag, str_to_title(attr_predef), attr_custom)
for (att in attr) {
feats[[att]] <- ifelse(
is.na(feats[[att]]) | !length(feats[[att]]),
NA, paste0(att, "=", feats[[att]])
)
}
feats <- unite(feats, "attr", all_of(attr), sep = ";", na.rm = TRUE)
body <- feats[, c(gff3_cols, "attr")]
} else {
body <- feats[, gff3_cols]
}
write(head, file)
if (!is.null(seqs)) readr::write_tsv(seqs, file, append = T, col_names = F, quote = "none", escape = "none")
readr::write_tsv(body, file, append = T, col_names = F, quote = "none", escape = "none")
}
unchop_cds <- function(x) {
# expand collapsed CDS from start/end/introns to starts/ends list cols
coords <- purrr::pmap_df(x, function(type, start, end, introns, ...) {
if (!type %in% c("CDS", "cDNA_match")) {
tibble(start = list(start), end = list(end))
} else {
se <- c(start, start + introns - c(2, 0), end)
tibble(
start = list(se[c(TRUE, FALSE)]),
end = list(se[c(FALSE, TRUE)])
)
}
})
mutate(x, coords) %>% tidyr::unchop(c(start, end))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.