#' DolloImport3
#'
#' Import data from compare analysis
#'
#' @param tree a phylo object describing the species tree used in the COMPARE analysis
#' @param files a character containing the path with the name of the the output file from the step Dollo parsimony of the COMPARE analysis.
#' @param partitioning
#' @param groups
#' @param part_name
#'
#' @return this function returns a \code{list} object with the following elements: tree,
#' Compare matrices and raw_data. raw data is a matrix with two columns (1 = gains, 2 = losses)
#' and as many rows as many nodes + tips the species tree has. The rows and tree nodes are ligned so the first rows are the tips in order followed by the innernodes
#' @export
#' @import ape readr plyr stringr tidyr
#'
#' @examples
#'
#'
DolloImport3 <- function(tree, files = NULL){
if(is.null(files)){
stop("You need to give a path with the file name!")
}
#import raw data
raw_file <- read_lines(files)
rows_needed <- c(which(str_detect(raw_file, "GAIN") == TRUE), which(str_detect(raw_file, "LOSSES") == TRUE))
raw_df <- raw_file[rows_needed]
raw_df2 <- str_split(raw_df, "\t")
raw_df2 <- ldply(raw_df2, rbind)
raw_df2 <- raw_df2[,-c(2,4)]
colnames(raw_df2) <- c("event", "node", "chars")
# itt eloszor le kene gyartani az objektumot a tree-vel es a raw file-al
#match the phylo object nodes and the compare output nodes:
node_def <- which(str_detect(raw_file, "node\\d") == TRUE)
node_df <- raw_file[node_def]
node_df2 <- str_split(node_df, "\t")
node_df2 <- ldply(node_df2, rbind)
colnames(node_df2) <- c("nodes", "species")
node_df2$nodes <- str_replace(node_df2$nodes, "node", "")
node_df2$species <- str_sub(node_df2$species, start = 2) # spacet kitakaritom
dollo_species <- unique(unlist(sapply(node_df2$species, function(x) str_split(x, " "))))
if(any(dollo_species %in% tree$tip.label == FALSE)){
wrong_names_d <- which(dollo_species %in% tree$tip.label == FALSE)
warning(paste0("The following species names don't match!\t", "Species in Dollo file:\t", dollo_species[wrong_names]))
}
node_df2$tree_nodes <- rep(NA, nrow(node_df2))
for(i in 1:nrow(node_df2)){
species <- unlist(str_split(node_df2[i,"species"], " "))
if(length(species) > 1){
node_df2[i, "tree_nodes"] <- getMRCA(tree, species)
}else{
node_df2[i, "tree_nodes"] <- which(tree$tip.label == species)
}
}
compare_ls <- list()
compare_ls[[1]] <- list()
compare_ls[[1]] <- tree
#create the raw matrix
node_df2$gain_event <- rep(NA, nrow(node_df2))
node_df2$loss_event <- rep(NA, nrow(node_df2))
gain_split <- raw_df2[raw_df2$event == "GAIN", ]
loss_split <- raw_df2[raw_df2$event == "LOSSES", ]
node_df2$gain_event <- gain_split$chars[match(node_df2$nodes, gain_split$node)]
node_df2$loss_event <- loss_split$chars[match(node_df2$nodes, loss_split$node)]
node_df2 <- node_df2[,-c(1,2)]
node_df2 <- node_df2[order(node_df2$tree_nodes),]
raw_matrix <- as.matrix(node_df2[,2:3])
raw_matrix <- unname(raw_matrix)
compare_ls[[2]] <- raw_matrix
names(compare_ls) <- c("tree", "raw_data")
# use the retrieve_event function
events_ls <- retrieve_event3(compare = compare_ls)
return(events_ls)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.