#' @title Generate ortholog tables by gene locus and splice varaint
#' @description Given an input dNdS table generated by \code{\link{dNdS}} and
#' annotation files for the query and subject species in \code{gtf} or \code{gff}
#' file format, this function selects the best BLAST hit to represent either a gene locus (e.g. the splice variant of the gene locus with lowest e-value) or the best BLAST hit for a splice varaint.
#' @param dnds_output_folder file path to a \code{dNdS_tbl} generated with \code{\link{dNdS}} and
#' stored conform with \code{\link{read.dnds.tbl}}.
#' @param annotation_file_query file path to the annotation file of the query species in \code{gtf} or \code{gff} file format.
#' @param annotation_file_subject file path to the annotation file of the subject species in \code{gtf} or \code{gff} file format.
#' @param output_folder file path to a folder in which orthologs tables shall be stored.
#' @param output_type type of ortholog table that shall be printed out (or stored in a variable).
#' Available options are:
#' \itemize{
#' \item \code{output_type = "gene_locus"} (default): find for each gene locus a representative splice variant that
#' maximizes the sequence homology (in terms of smallest e-value and longest splice variant in case of same evalue)
#' to the subject gene locus and its representative splice variant. The output table contains only once representative splice variant per gene locus.
#' \item \code{output_type = "splice_variant"}: for each homologous gene locus determine for each splice variant their
#' respective splice variant homolog. he output table contains several splice variants and their homologous splice variants per gene locus.
#' }
#' @param format a vector of length 2 storing the annotation file formats of the query annotation file and subject annotation file: either \code{gtf} or \code{gff} format. E.g. \code{format = c("gtf","gtf")}.
#' @author Hajk-Georg Drost
#' @export
internal_import_dnds_across_multiple_species <-
function(dnds_output_folder,
annotation_file_query,
annotation_file_subject,
output_folder = getwd(),
output_type = "gene_locus",
format) {
if (!file.exists(dnds_output_folder))
stop("Please provide a valid path to the dnds_output_folder.", call. = FALSE)
if (!file.exists(annotation_file_query))
stop("Please provide a valid path to the annotation_file_query.", call. = FALSE)
if (!file.exists(annotation_file_subject))
stop("Please provide a valid path to the annotation_file_subject.", call. = FALSE)
if (!file.exists(output_folder))
stop("Please provide a valid path to the output_folder.", call. = FALSE)
if (!is.element(output_type, c("gene_locus", "splice_variant")))
stop("Please specify an output_type that is supported by this function.", call. = FALSE)
message("Generating orthologs table for ", basename(annotation_file_query), " vs ", basename(annotation_file_subject), " ...")
qry_species_name <-
unlist(stringr::str_split(basename(annotation_file_query), "[.]"))[1]
sbj_species_name <-
unlist(stringr::str_split(basename(annotation_file_subject), "[.]"))[1]
select_orthologs_by_gene_locus_output_name <-
paste0("orthologs_by_gene_",
qry_species_name,
"_vs_",
sbj_species_name,
".csv")
select_orthologs_by_splice_variant_output_name <-
paste0(
"orthologs_by_splice_variant_",
qry_species_name,
"_vs_",
sbj_species_name,
".csv"
)
# import dNdS file
dNdS_tbl <- import_dnds_tbl(dnds_output_folder)
# check if annotation file is corrupt and remove outliers
annotation_file_query <-
check_annotation_file_completeness(annotation_file_query, remove_annotation_outliers = TRUE)
annotation_file_subject <- check_annotation_file_completeness(annotation_file_subject, remove_annotation_outliers = TRUE)
# select orthologs by gene_locus
select_orthologs_by_gene_locus <- select_orthologs(
dnds_tbl = dnds_output_folder,
annotation_file_query = annotation_file_query,
annotation_file_subject = annotation_file_subject,
collapse_by = "gene_locus",
format = format
)
# select orthologs by splice_variant
select_orthologs_splice_variant <- select_orthologs(
dnds_tbl = dNdS_tbl,
annotation_file_query = annotation_file_query,
annotation_file_subject = annotation_file_subject,
collapse_by = "splice_variant",
format = format
)
if (!file.exists(output_folder))
dir.create(output_folder)
by_gene_output_folder <- file.path(output_folder, paste0("orthologs_by_gene_locus_qry_", stringr::str_to_lower(qry_species_name)))
by_splice_variant_output_folder <- file.path(output_folder, paste0("orthologs_by_splice_variant_qry_", stringr::str_to_lower(qry_species_name)))
message("\n")
message(
"Store ortholog by gene locus table at '",
file.path(by_gene_output_folder, select_orthologs_by_gene_locus_output_name),
"'."
)
if (!dir.exists(by_gene_output_folder))
dir.create(by_gene_output_folder)
if (!dir.exists(by_splice_variant_output_folder))
dir.create(by_splice_variant_output_folder)
readr::write_delim(
select_orthologs_by_gene_locus,
file.path(by_gene_output_folder, select_orthologs_by_gene_locus_output_name),
col_names = TRUE,
delim = ";"
)
message("\n")
message(
"Store ortholog by splice variant table at '",
file.path(by_splice_variant_output_folder, select_orthologs_by_splice_variant_output_name),
"'."
)
readr::write_delim(
select_orthologs_splice_variant,
file.path(by_splice_variant_output_folder, select_orthologs_by_splice_variant_output_name),
col_names = TRUE,
delim = ";"
)
if (output_type == "gene_locus")
return(select_orthologs_by_gene_locus)
if (output_type == "splice_variant")
return(select_orthologs_splice_variant)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.