# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
# vi: set ts=2 noet:
#' This is the full pipeline taking path to a fasta sequence or a
#' seqinr AA file and generating a newick tree
#'
#' @export
sequences_to_tree <- function(
sequences,
output_path,
run_id,
user_agent_arg,
sequence_name_to_tree_name = function(x){tibble::data_frame(tree_name=x)},
force=FALSE,
verbose=TRUE
){
output_prefix <- paste(
output_path,
paste(run_id, date_code(), sep="_"),
sep="/")
if(file.exists(output_prefix)){
if(force){
if(verbose){
cat("Output path already exists, cleaning it ... \n")
}
unlink(output_prefix, recursive=T)
} else {
stop(paste0("Output path '", output_prefix, "' already exists, if you would like to overwrite it, please use force=TRUE", sep=""))
}
}
dir.create(output_prefix, recursive=T)
name_map_fname <- paste0(output_prefix, "/name_map.tsv")
clustalo_input_fname <- paste0(output_prefix, "/clustalo_input.fasta")
clustalo_output_fname <- paste0(output_prefix, "/clustalo_output.fasta")
phyml_input_fname <- paste0(output_prefix, "/phyml_input.phylip")
phyml_output_tree_fname <- paste0(phyml_input_fname, "_phyml_tree_", run_id, ".txt")
output_tree_fname <- paste0(output_prefix, "/", run_id, ".newick")
run_script_fname <- paste0(output_prefix, "/run.sh")
if(class(sequences) == "character"){
# assume sequences is actually a path to a (possibly zipped) fasta file
sequences_input_fname <- sequences
sequences_output_fname <- paste0(
output_prefix, "/",
sequences_input_fname %>%
stringr::str_split("/") %>%
unlist %>%
tail(1))
file.copy(sequences_input_fname, sequences_output_fname)
if(verbose){
cat("Reading in sequences from '", sequences_input_fname, "' ...\n", sep="")
}
sequences <- seqinr::read.fasta(sequences_input_fname, "AA")
} else if(
# assume sequences is actually something read in from seqinr::read.fasta
class(sequences) == "list" &&
length(sequences) > 0 &&
class(sequences[[1]]) == "SeqFastaAA"){
sequences_output_fname <- paste0(output_prefix, "/sequences.fa")
seqinr::write.fasta(
sequences=sequences,
names=names(sequences),
file.out=sequences_output_fname)
} else {
stop(paste0("Unable to handle input sequences of class ", class(sequences), ".\n", sep=""))
}
if(verbose){
cat("Generating tree for ", length(sequences), " uniprot sequences ... \n", sep="")
}
if(verbose){
cat("Generating phylip and tree names for sequences ... \n")
}
name_map <- sequences %>%
seqinr::getName() %>%
sequence_name_to_tree_name() %>%
dplyr::mutate(phylip_name = sequences %>% length %>% generate_phylip_names)
readr::write_tsv(name_map, name_map_fname)
cat(
"#!/bin/sh\n\n",
"### Autogenerated from Bethany::sequences_to_tree ####\n",
"### date: ", date_code(), "\n",
"### run_id: ", run_id, "\n\n",
sep="",
file=run_script_fname)
if(verbose){
cat("Writing out fasta for input to Clustal Omega: '", clustalo_input_fname, "' ...\n", sep="")
}
seqinr:::write.fasta(
sequences = sequences,
names = name_map$phylip_name %>% plyr::llply(identity), # it wants a list
file.out= clustalo_input_fname)
if(verbose){
cat("Generating alignment with Clustal Omega: '", clustalo_input_fname, "' -> '", clustalo_output_fname, "' ... \n", sep="")
cat("#########################\n")
}
clustalo(clustalo_input_fname, clustalo_output_fname, run_script_fname)
if(verbose){
cat("#########################\n")
cat("Done generating alignment with Clustal Omega\n\n")
}
if(verbose){
cat("Converting Clustal Omeaga output from fasta to phylip4 format: '", phyml_input_fname, "' ...\n", sep="")
cat("#########################\n")
}
readseq(
clustalo_output_fname,
phyml_input_fname,
run_script_fname = run_script_fname,
informat="fasta",
format="Phylip4")
if(verbose){
cat("#########################\n")
cat("Done converting alignment to phylip4 format\n\n")
}
if(verbose){
cat("Generating phylogeny with PhyML, run_id: '", run_id, "' ... \n", sep="")
cat("#########################\n")
}
phyml(
phyml_input_fname,
run_id=run_id,
run_script_fname=run_script_fname)
if(verbose){
cat("#########################\n")
cat("Done generating phylogeny\n\n")
}
if(verbose){
cat("Renaming generated tree with for viewing ... \n")
}
tree <- ape::read.tree(phyml_output_tree_fname)
if(tree %>% is.null){
stop(paste0("Failed to generate a phylogeny. Check '", output_prefix, "' for output files.\n"))
}
tree_2 <- tree %>%
rename_tree(name_map)
if(verbose){
cat("Writing out final tree: '", output_tree_fname, "' ... \n", sep="")
}
ape::write.tree(tree_2, output_tree_fname)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.