R/SuperMatrix.R

Defines functions SuperMatrix

Documented in SuperMatrix

SuperMatrix <- function(missing = "-",
                        prefix = "concatenated",
                        save = T,
                        input = "",
                        format.in = "f",
                        format.out = "f",
                        concatenate = T){
  # get file names
  file.names <- list.files()
  if(input != ""){
    file.names <- grep(input, file.names, value=T)
  }
  # read DNA
  DNA <- list()
  for(i in 1:length(file.names)){
    print(paste("Reading alignment", i))
    DNA[[i]] <- read.dna(file=file.names[i],
                         format = format.in,
                         as.character=T)
  }
  # get all taxa names
  taxa <- vector()
  for(i in 1:length(DNA)){
    counter <- length(taxa) + 1
    for(j in 1:nrow(DNA[[i]])){
      taxa <- c(taxa,
                row.names(DNA[[i]])[!row.names(DNA[[i]]) %in% taxa])
    }
  }

  if(concatenate == T){
    # calculate total alignment length
    total.length <- 0
    for(i in 1:length(DNA)){
      total.length <- total.length + ncol(DNA[[i]])
    }
    # create empty alignment
    seqmatrix <- matrix(as.character(missing),
                        length(taxa),
                        total.length)
    rownames(seqmatrix) <- taxa
    # create partition record holder
    partitions <- as.data.frame(matrix(,length(DNA),3))
    colnames(partitions) <- c("part", "start", "stop")
    #build the supermatrix
    c.col <- 0
    print("Creating supermatrix")
    for(i in 1:length(DNA)){
      print(paste("Processing alignment", i))
      gene <- DNA[[i]]
      print(i)
      for(j in 1:nrow(gene)){
        c.row <- which(rownames(seqmatrix) == rownames(gene)[j])
        seqmatrix[c.row, (c.col + 1):(c.col + ncol(gene))] <- gene[j, ]
      }
      partitions[i,1:3] <- c(file.names[i], c.col + 1, c.col + ncol(gene))
      c.col <- c.col + ncol(gene)
    }
    results <- list()
    results[[1]] <- partitions
    results[[2]] <- seqmatrix
    if(save == T){
      print("saving files")
      write.dna(seqmatrix,
                file = paste(prefix, ".fasta", sep = ""),
                format = format.out)
      write.csv(partitions,
                row.names = F,
                file = paste(prefix, ".partitions.csv", sep = ""))
    }
  }

  if(concatenate == F){
    results <- list()
    for(i in 1:length(DNA)){
      seqmatrix <- matrix(as.character(missing),
                          length(taxa),
                          ncol(DNA[[i]]))
      rownames(seqmatrix) <- taxa
      for(j in 1:nrow(DNA[[i]])){
        hit <- which(row.names(DNA[[i]])[j] == row.names(seqmatrix))
        seqmatrix[hit, ] <- DNA[[i]][j, ]
      }
      results[[i]] <- seqmatrix
      if(save == T){
      write.dna(seqmatrix,
                file = paste(prefix, file.names[i], ".fasta", sep = ""),
                format = format.out)
      }
    }
  # create empty alignment
  }
  return(results)
}
coleoguy/evobir documentation built on July 27, 2023, 12:40 p.m.