R/assign_esu.R

##' Assign/create ESUs based on the sequences
##'
##' @importFrom ape read.dna root nj dist.dna
##' @importFrom chopper mergeSeq
##' @importFrom phylobase tipLabels
##' @import phylobase
##' @export
##' @param path path indicating where the COI sequences are stored
##' @param ... additional arguments to pass to \code{findGroups}
assign_esu <- function(path = "~/Documents/plankton-larvae-data/seqs/COI",
                       ...) {

    lst_seq <- list_sequences(seq_path = path)

    tmp_file <- tempfile()

    ## to create an unique key, write all the sequences to a file and
    ## generate its md5sum...
    seqs <- lapply(lst_seq, function(x) readLines(file.path(path,  x)))
    cat(paste(unlist(seqs), collapse="\n"), file=tmp_file)
    key <- tools::md5sum(tmp_file)[[1]]
    seq_store$set(key, lst_seq)
    ape_alg <- alg_store()$get(key)

    tr <- ape::nj(ape::dist.dna(ape_alg, model = "raw"))

    tr <- ape::root(tr, 1, resolve.root = TRUE)
    tr$edge.length[tr$edge.length < 0] <- 0

    tr <- as(tr, "phylo4")
    grp <- findGroups(tr, ...)
    grp_data <- tdata(grp, "tip")

    grp_lst <- split(rownames(grp_data), grp_data$Groups)

    grp_phylum <- lapply(grp_lst, function(x) get_phylum(x))

    ## Make sure that all members of each group belong to a single
    ## phylum
    is_not_unique_phylum <- vapply(grp_phylum, function(x) {
        length(unique(x)) > 1
    }, logical(1))

    if (length(grp_phylum[is_not_unique_phylum])) {
        msg <- mapply(
            function(x, y) {
            paste(
                "  Samples: ", paste(x, collapse = ", "), "\n",
                "   Phyla: ", paste(y, collapse = ", "), "\n\n"
            )
        }, grp_lst[is_not_unique_phylum], grp_phylum[is_not_unique_phylum])
        stop("Members of the same group should be in the same phylum: \n",
             msg, call. = FALSE)
    }

    ## Assemble data frame for the results
    esu_lst <- mapply(function(x, phy) {
        if (!identical(length(x), length(phy)))
            stop("problem: more than one phylum specified: ", paste(phy, collapse = ", "))
        cbind(voucher_number = x,
              phylum = phy,
              group_esu = rep(get_esu(x, phy), length(x))
              )
    }, grp_lst, grp_phylum)

    res <- do.call("rbind", esu_lst)
    res <- as.data.frame(res, stringsAsFactors = FALSE)

    tipLabels(tr) <- paste(tipLabels(tr),
                           apply(res[match(tipLabels(tr), res$voucher_number),
                                     c("phylum", "group_esu")], 1,
                                 function(x) paste(x[1], x[2], sep="-")),
                           sep="_")

    tr <- as(tr, "phylo")
    data_store()$set("esu_tree", tr)

    res
}

##' @export
get_phylum <- function(ids) {
    res <- get_lab("sample_data")
    res <- res[match(ids, res$voucher_number), "phylum", drop = TRUE]

    if (length(res) < 1)
        stop("missing phylum for ", sQuote(paste(ids, collapse = ", ")))
    else
        res
}

get_esu <- function(ids, phylum) {
    if (is.null(ids))
        stop("ids is NULL")
    sample_esu <- get_lab("sample_esu")

    if (length(unique(phylum)) > 1)
        stop("duplicated phylum")

    phylum <- unique(phylum)

    tt <- sample_esu %>%
        dplyr::filter(voucher_number %in% ids) %>%
        dplyr::select(group_esu) %>%
        .[, 1]

    if (length(tt) > 0) {
        esu_id <- unique(tt)
        if (length(esu_id) > 1) {stop("Already assigned ESU with length greater than 1")}
        to_add <- ids[! ids %in% sample_esu$voucher_number]
        if (length(to_add) > 0)
            uu <- data.frame(
                voucher_number = to_add,
                phylum = rep(unique(phylum), length(to_add)),
                group_esu = rep(esu_id, length(to_add))
            )
        else return(esu_id)
    } else {
        smpl_esu <- get_lab("sample_esu")
        esu_id <- smpl_esu[smpl_esu[["phylum"]] == phylum, "group_esu"]

        if (length(esu_id) == 0) {
            esu_id <- 1
        } else {
            esu_id <- max(esu_id) + 1
        }

        uu <- data.frame(
            voucher_number = ids,
            phylum = rep(phylum, length(ids)),
            group_esu = rep(esu_id, length(ids))
        )
    }
    write.table(uu, file = "~/Documents/plankton-larvae-data/sample_esu.csv",
                    sep = ",", dec = ".", qmethod = "double", row.names = FALSE,
                    col.names = FALSE, append = TRUE)
    esu_id
}

fetch_hook_alignment <- function(key, namespace) {
    alg_dir <- tempdir()
    merg  <- chopper::mergeSeq(seq_store$get(key),
                               output = alg_dir, markers = "COI",
                               seqFolder = "~/Documents/plankton-larvae-data/seqs",
                               checkAmbiguity = FALSE
                               )
    ape_alg <- ape::read.dna(file = attr(merg, "aligned_file")[1], format = "fasta",
                             as.matrix = TRUE)
    ape_alg
}
fmichonneau/labmanager documentation built on May 16, 2019, 1:44 p.m.