R/methods.r

Defines functions old2new_genclone mlg.filter mll.levels mll.custom mll.reset nmll mll make_haplotypes as.genambig genclone2genind as.genclone multiploid_string is.genclone as.snpclone is.clone is.snpclone bootgen2genind

Documented in as.genambig as.genclone as.snpclone bootgen2genind genclone2genind is.clone is.genclone is.snpclone make_haplotypes mlg.filter mll mll.custom mll.levels mll.reset nmll old2new_genclone

#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#
# This software was authored by Zhian N. Kamvar and Javier F. Tabima, graduate
# students at Oregon State University; Jonah C. Brooks, undergraduate student at
# Oregon State University; and Dr. Nik Grünwald, an employee of USDA-ARS.
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for educational, research and non-profit purposes, without fee,
# and without a written agreement is hereby granted, provided that the statement
# above is incorporated into the material, giving appropriate attribution to the
# authors.
#
# Permission to incorporate this software into commercial products may be
# obtained by contacting USDA ARS and OREGON STATE UNIVERSITY Office for
# Commercialization and Corporate Development.
#
# The software program and documentation are supplied "as is", without any
# accompanying services from the USDA or the University. USDA ARS or the
# University do not warrant that the operation of the program will be
# uninterrupted or error-free. The end-user understands that the program was
# developed for research purposes and is advised not to rely exclusively on the
# program for any reason.
#
# IN NO EVENT SHALL USDA ARS OR OREGON STATE UNIVERSITY BE LIABLE TO ANY PARTY
# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
# LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION,
# EVEN IF THE OREGON STATE UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE. USDA ARS OR OREGON STATE UNIVERSITY SPECIFICALLY DISCLAIMS ANY
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY STATUTORY
# WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
# BASIS, AND USDA ARS AND OREGON STATE UNIVERSITY HAVE NO OBLIGATIONS TO PROVIDE
# MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
#==============================================================================#

# bootgen methods ---------------------------------------------------------

#==============================================================================#
#' Methods used for the bootgen object.
#'
#' This is not designed for user interaction.
#'
#' @rdname bootgen-methods
#' @param x a \code{"\linkS4class{bootgen}"} object
#' @param i vector of numerics indicating number of individuals desired
#' @param j a vector of numerics corresponding to the loci desired.
#' @param ... unused.
#' @param drop set to \code{FALSE}
#' @keywords internal
#' @author Zhian N. Kamvar
#==============================================================================#
setMethod(
  f = "[",
  signature(x = "bootgen"),
  definition = function(x, i, j, ..., drop = FALSE) {
    if (missing(i)) {
      i <- TRUE
    }
    if (missing(j)) {
      j <- TRUE
    }
    loc <- dim(x)[2]
    if (length(j) > loc | any(j > loc)) {
      stop('subscript out of bounds')
    }

    # Taking Names
    locnall <- x@loc.n.all[j]
    allnames <- x@all.names[j]
    names(allnames) <- names(x@all.names)[seq_along(allnames)]
    names(locnall) <- names(allnames)
    alllist <- slot(x, "alllist")[j]
    indices <- unlist(alllist)
    locnames <- rep(names(allnames), locnall)
    tabnames <- paste(locnames, unlist(allnames), sep = ".")
    res <- slot(x, "tab")[i, indices, drop = drop]
    colnames(res) <- tabnames

    ## Resetting all factors that need to be set.
    slot(x, "tab") <- res
    slot(x, "loc.fac") <- factor(locnames, names(allnames))
    slot(x, "loc.n.all") <- locnall
    slot(x, "all.names") <- allnames
    slot(x, "alllist") <- .Call(
      "expand_indices",
      cumsum(locnall),
      length(alllist),
      PACKAGE = "poppr"
    )
    slot(x, "names") <- slot(x, "names")[i]
    return(x)
  }
)

#==============================================================================#
#' @rdname bootgen-methods
#==============================================================================#
setMethod(
  f = "dim",
  signature(x = "bootgen"),
  definition = function(x) {
    return(c(length(slot(x, "names")), nlevels(slot(x, "loc.fac"))))
  }
)
setGeneric("dist")
#==============================================================================#
# @rdname bootgen-methods
#==============================================================================#
setMethod(
  f = "dist",
  signature(x = "bootgen"),
  definition = function(x) {
    return(dist(tab(x)))
  }
)

#==============================================================================#
#' @rdname bootgen-methods
#==============================================================================#
setMethod(
  f = "$",
  signature(x = "bootgen"),
  definition = function(x, name) {
    return(slot(x, name))
  }
)

#==============================================================================#
#' @rdname bootgen-methods
#' @param .Object a character, "bootgen"
#' @param gen a genind, genclone, or genpop object
#' @param na how missing data should be treated. Default is "mean", averaging
#'   over other values in the matrix. Possible values are listed in
#'   \code{\link[adegenet]{tab}}.
#' @param freq if \code{TRUE}, the matrix will be a genotype frequency matrix.
#'   If \code{FALSE}, the matrix will be allele counts.
#==============================================================================#
setMethod(
  f = "initialize",
  signature = "bootgen",
  definition = function(.Object, gen, na = "mean", freq = TRUE) {
    if (missing(gen)) {
      return(.Object)
    }
    if (is.genind(gen)) {
      objnames <- indNames(gen)
    } else if (is.genpop(gen)) {
      objnames <- popNames(gen)
    } else {
      stop("gen must be a valid genind or genpop object.")
    }
    num_alleles <- slot(gen, "loc.n.all")
    num_loci <- length(num_alleles)
    slot(.Object, "tab") <- tab(gen, NA.method = na, freq = freq)
    slot(.Object, "loc.fac") <- slot(gen, "loc.fac")
    slot(.Object, "loc.n.all") <- num_alleles
    slot(.Object, "all.names") <- slot(gen, "all.names")
    slot(.Object, "alllist") <- .Call(
      "expand_indices",
      cumsum(num_alleles),
      num_loci,
      PACKAGE = "poppr"
    )
    slot(.Object, "names") <- objnames
    slot(.Object, "type") <- slot(gen, "type")
    slot(.Object, "ploidy") <- as.integer(slot(gen, "ploidy"))
    return(.Object)
  }
)

setMethod(
  f = "tab",
  signature = "bootgen",
  definition = function(x, freq = TRUE) {
    # freq is a dummy argument
    return(x@tab)
  }
)

#==============================================================================#
#' @export
#' @rdname coercion-methods
#' @param bg a bootgen object
#' @aliases bootgen2genind,bootgen-method
#' @docType methods
#==============================================================================#
bootgen2genind <- function(bg) {
  standardGeneric("bootgen2genind")
}

#' @export
setGeneric("bootgen2genind")

setMethod(
  f = "bootgen2genind",
  signature = "bootgen",
  definition = function(bg) {
    xtab <- tab(bg)
    if (is.numeric(xtab)) {
      xtab[] <- as.integer(xtab * bg@ploidy)
    }
    res <- new("genind", tab = xtab)
    return(res)
  }
)


# bruvomat methods --------------------------------------------------------

#==============================================================================#
#' @rdname bruvomat-methods
#' @param .Object a character, "bruvomat"
#' @param gen \code{"\link[adegenet:genind-class]{genind}"} object
#' @param replen a vector of numbers indicating the repeat length for each
#'   microsatellite locus.
#' @keywords internal
#' @author Zhian N. Kamvar
#==============================================================================#
setMethod(
  f = "initialize",
  signature = "bruvomat",
  definition = function(.Object, gen, replen) {
    if (missing(gen)) {
      return(.Object)
    }
    if (
      missing(replen) ||
        length(replen) < nLoc(gen) ||
        (is.null(names(replen)) && length(replen) != nLoc(gen))
    ) {
      replen <- vapply(
        gen@all.names,
        function(y) guesslengths(as.numeric(y)),
        1
      )
    }
    replen <- match_replen_to_loci(locNames(gen), replen)
    ploid <- max(ploidy(gen))
    popdf <- genind2df(gen, sep = "/", usepop = FALSE)
    mat <- generate_bruvo_mat(
      popdf,
      maxploid = ploid,
      sep = "/",
      mat_type = "numeric"
    )
    mat[is.na(mat)] <- 0
    slot(.Object, "mat") <- mat
    slot(.Object, "replen") <- replen
    slot(.Object, "ploidy") <- ploid
    slot(.Object, "ind.names") <- indNames(gen)
    return(.Object)
  }
)

#==============================================================================#
#' @rdname bruvomat-methods
#' @keywords internal
#==============================================================================#
setMethod(
  f = "dim",
  signature(x = "bruvomat"),
  definition = function(x) {
    return(c(nrow(x@mat), ncol(x@mat) / x@ploidy))
  }
)

#==============================================================================#
#' Methods used for the bruvomat object.
#'
#' This is not designed for user interaction.
#'
#' @rdname bruvomat-methods
#' @param x a \code{"\linkS4class{bruvomat}"} object
#' @param i vector of numerics indicating number of individuals desired
#' @param j a vector of numerics corresponding to the loci desired.
#' @param ... unused.
#' @keywords internal
#' @param drop set to \code{FALSE}
#==============================================================================#
setMethod(
  f = "[",
  signature(x = "bruvomat"),
  definition = function(x, i, j, ..., drop = FALSE) {
    if (missing(i)) {
      i <- TRUE
    }
    if (missing(j)) {
      j <- TRUE
    }
    x@replen <- x@replen[j]
    x@ind.names <- x@ind.names[i]
    cols <- rep(seq(ncol(x)), each = x@ploidy)
    if (length(j) == 1 && is.logical(j)) {
      replacement <- j
    } else if (is.logical(j)) {
      replacement <- rep(j, each = x@ploidy)
    } else {
      replacement <- vapply(j, function(ind) which(cols == ind), 1:x@ploidy)
    }
    x@mat <- x@mat[i, as.vector(replacement), drop = FALSE]
    return(x)
  }
)


# snpclone methods --------------------------------------------------------

#==============================================================================#
#' @export
#' @rdname is.clone
#' @examples
#' (sc <- as.snpclone(glSim(100, 1e3, ploid=2, parallel = FALSE),
#'                    parallel = FALSE, n.cores = 1L))
#' is.snpclone(sc)
#==============================================================================#
is.snpclone <- function(x) {
  res <- (is(x, "snpclone"))
  return(res)
}

#==============================================================================#
#' @export
#' @rdname is.clone
#' @examples
#' is.clone(sc)
#==============================================================================#
is.clone <- function(x) {
  res <- (is(x, "snpclone") || is(x, "genclone"))
  return(res)
}

#==============================================================================#
#' Methods used for the snpclone object
#'
#' Default methods for subsetting snpclone objects.
#'
#' @rdname snpclone-method
#' @param x a snpclone object
#' @param i vector of numerics indicating number of individuals desired
#' @param j a vector of numerics corresponding to the loci desired.
#' @param ... passed on to the \code{\link[adegenet:genlight-class]{genlight}} object.
#' @param mlg.reset logical. Defaults to \code{FALSE}. If \code{TRUE}, the mlg
#'   vector will be reset
#' @param drop set to \code{FALSE}
#' @author Zhian N. Kamvar
#==============================================================================#
setMethod(
  f = "[",
  signature(x = "snpclone", i = "ANY", j = "ANY", drop = "ANY"),
  definition = function(x, i, j, ..., mlg.reset = FALSE, drop = FALSE) {
    if (missing(i)) {
      i <- TRUE
    }

    # handle MLG indices
    ismlgclass <- inherits(x@mlg, "MLG")
    # Tue Sep 27 12:08:37 2016 ------------------------------
    # Methods for subsetting genind object by sample name currently don't exist
    i <- handle_mlg_index(i, x)
    # mlg        <- if (ismlgclass) x@mlg[i, all = TRUE] else x@mlg[i]

    # Subset data; replace MLGs
    x <- callNextMethod(x = x, i = i, j = j, ..., drop = drop)
    if (!mlg.reset) {
      if (inherits(x@mlg, "MLG")) {
        x@mlg <- x@mlg[i, all = TRUE]
      } else {
        x@mlg <- x@mlg[i]
      }
    } else {
      if (!inherits(x@mlg, "MLG")) {
        x@mlg <- mlg.vector(x, reset = TRUE)
      } else {
        x@mlg <- new("MLG", mlg.vector(x, reset = TRUE))
      }
    }

    # x@mlg <- mlg
    return(x)
  }
)

#==============================================================================#
#' @rdname snpclone-method
#' @param .Object a character, "snpclone"
#' @param mlg a vector where each element assigns the multilocus genotype of
#' that individual in the data set.
#' @param mlgclass a logical value specifying whether or not to translate the
#' mlg object into an MLG class object.
#' @author Zhian N. Kamvar
#' @keywords internal
#==============================================================================#
setMethod(
  f = "initialize",
  signature("snpclone"),
  definition = function(.Object, ..., mlg, mlgclass = TRUE) {
    .Object <- callNextMethod(.Object, ..., mlg)
    if (missing(mlg)) {
      mlg <- mlg.vector(.Object)
    }
    if (mlgclass) {
      mlg <- new("MLG", mlg)
      distname(mlg) <- "bitwise.dist"
    }
    slot(.Object, "mlg") <- mlg
    return(.Object)
  }
)

#==============================================================================#
#' @rdname snpclone-method
#' @param object a snpclone object
#==============================================================================#
setMethod(
  f = "show",
  signature("snpclone"),
  definition = function(object) {
    y <- utils::capture.output(callNextMethod())
    y <- gsub("GENLIGHT", "SNPCLONE", y)
    y <- gsub("/", "|", y)
    genspot <- grepl("@gen", y)
    ismlg <- inherits(object@mlg, "MLG")

    mlgtype <- if (ismlg) paste0(visible(object@mlg), " ") else ""
    # mlgtype  <- ifelse(ismlg, paste0(the_type, " "), "")
    mlgtype <- paste0(mlgtype, "multilocus genotypes")

    msg <- paste("   @mlg:", length(unique(object@mlg[])), mlgtype)
    if (mlgtype == "contracted multilocus genotypes") {
      thresh <- round(cutoff(object@mlg)["contracted"], 3)
      algo <- strsplit(distalgo(object@mlg), "_")[[1]][1]
      dist <- distname(object@mlg)
      if (!is.character(dist)) {
        dist <- paste0(utils::capture.output(dist), collapse = "")
      }
      msg <- paste0(
        msg,
        "\n         ",
        "(",
        thresh,
        ") [t], (",
        dist,
        ") [d], (",
        algo,
        ") [a]"
      )
    }
    y[genspot] <- paste(y[genspot], msg, sep = "\n")
    cat(y, sep = "\n")
  }
)


#==============================================================================#
#' Create a snpclone object from a genlight object.
#'
#' Wrapper for snpclone initializer.
#'
#' @export
#' @rdname snpclone-coercion-methods
#' @aliases as.snpclone,genlight-method
#' @param x a \code{\link[adegenet:genlight-class]{genlight}} or \code{\linkS4class{snpclone}}
#'   object
#' @param ... arguments to be passed on to the genlight constructor. These are
#'   not used if x is not missing.
#' @param parallel should the parallel package be used to construct the object?
#' @param n.cores how many cores should be utilized? See documentation for
#'   \code{\link[adegenet:genlight-class]{genlight}} for details.
#' @param mlg a vector of multilocus genotypes or an object of class MLG for the
#'   new snpclone object.
#' @param mlgclass if \code{TRUE} (default), the multilocus genotypes will be
#'   represented as an \code{\linkS4class{MLG}} object.
#'
#' @docType methods
#'
#' @author Zhian N. Kamvar
#' @examples
#' (x <- as.snpclone(glSim(100, 1e3, ploid=2)))
#' \dontrun{
#' # Without parallel processing
#' system.time(x <- as.snpclone(glSim(1000, 1e5, ploid=2)))
#'
#' # With parallel processing... doesn't really save you much time.
#' system.time(x <- as.snpclone(glSim(1000, 1e5, ploid=2, parallel = TRUE),
#'                              parallel = TRUE))
#' }
#'
#==============================================================================#
as.snpclone <- function(
  x,
  ...,
  parallel = FALSE,
  n.cores = NULL,
  mlg,
  mlgclass = TRUE
) {
  standardGeneric("as.snpclone")
}

#' @export
setGeneric("as.snpclone")


setMethod(
  f = "as.snpclone",
  signature(x = "genlight"),
  definition = function(
    x,
    ...,
    parallel = FALSE,
    n.cores = NULL,
    mlg,
    mlgclass = TRUE
  ) {
    if (!missing(x) && is(x, "genlight")) {
      if (missing(mlg)) {
        mlg <- mlg.vector(x)
      }
      res <- new(
        "snpclone",
        gen = x@gen,
        n.loc = nLoc(x),
        ind.names = indNames(x),
        loc.names = locNames(x),
        loc.all = alleles(x),
        chromosome = chr(x),
        position = position(x),
        strata = strata(x),
        hierarchy = x@hierarchy,
        ploidy = ploidy(x),
        pop = pop(x),
        other = other(x),
        parallel = parallel,
        n.cores = n.cores,
        mlg = mlg,
        mlgclass = mlgclass
      )
    } else {
      res <- new("snpclone", ..., mlg = mlg, mlgclass = mlgclass)
    }
    return(res)
  }
)

# genclone methods --------------------------------------------------------

#==============================================================================#
#' Check for validity of a genclone or snpclone object
#'
#' @note a \linkS4class{genclone} object will always be a valid
#' \link[adegenet:genind-class]{genind} object and a \linkS4class{snpclone} object will always
#' be a valid \link[adegenet:genlight-class]{genlight} object.
#'
#' @export
#' @rdname is.clone
#' @param x a genclone or snpclone object
#' @author Zhian N. Kamvar
#' @examples
#' data(nancycats)
#' nanclone <- as.genclone(nancycats)
#' is.genclone(nanclone)
#==============================================================================#
is.genclone <- function(x) {
  res <- (is(x, "genclone"))
  return(res)
}
#==============================================================================#
#' @rdname genclone-method
#' @param .Object a character, "genclone"
#' @param mlg a vector where each element assigns the multilocus genotype of
#'   that individual in the data set.
#' @param mlgclass a logical value specifying whether or not to translate the
#'   mlg object into an MLG class object.
#' @keywords internal
#==============================================================================#
setMethod(
  f = "initialize",
  signature("genclone"),
  definition = function(.Object, ..., mlg, mlgclass = TRUE) {
    .Object <- callNextMethod(.Object, ..., mlg = mlg)
    if (missing(mlg)) {
      mlg <- mlg.vector(.Object)
    }
    if (mlgclass) {
      mlg <- new("MLG", mlg)
    }
    slot(.Object, "mlg") <- mlg
    return(.Object)
  }
)

#==============================================================================#
#' Methods used for the genclone object
#'
#' Default methods for subsetting genclone objects.
#'
#' @rdname genclone-method
#' @param x a genclone object
#' @param i vector of numerics indicating number of individuals desired
#' @param j a vector of numerics corresponding to the loci desired.
#' @param ... passed on to the \code{\link[adegenet:genind-class]{genind}} object.
#' @param drop set to \code{FALSE}
#' @param mlg.reset logical. Defaults to \code{FALSE}. If \code{TRUE}, the mlg
#'   vector will be reset
#' @author Zhian N. Kamvar
#==============================================================================#
setMethod(
  f = "[",
  signature(x = "genclone", i = "ANY", j = "ANY", drop = "ANY"),
  definition = function(x, i, j, ..., mlg.reset = FALSE, drop = FALSE) {
    if (missing(i)) {
      i <- TRUE
    }
    newi <- i
    # Handling populations. This takes precedence over sample subsetting.
    dots <- list(...)
    if (any(names(dots) == "pop")) {
      newi <- handle_pops_index(dots[["pop"]], x)
    }
    mlgi <- handle_mlg_index(newi, x)

    x <- callNextMethod(x = x, i = i, j = j, ..., drop = drop)

    if (!mlg.reset) {
      if (inherits(x@mlg, "MLG")) {
        x@mlg <- x@mlg[mlgi, all = TRUE]
      } else {
        x@mlg <- x@mlg[mlgi]
      }
    } else {
      if (!inherits(x@mlg, "MLG")) {
        x@mlg <- mlg.vector(x, reset = TRUE)
      } else {
        x@mlg <- new("MLG", mlg.vector(x, reset = TRUE))
      }
    }
    return(x)
  }
)

#' create a string of ploidy counts
#'
#' @param x a gen object
#' @param ploid a string of ploidy prefixes.
#'
#' @return a string containing ploidy levels with counts for all samples
#' @noRd
#'
multiploid_string <- function(x, ploid) {
  ploid <- paste(ploid, paste0("(", table(x@ploidy), ")"))
  ploid1 <- paste0(ploid[-length(ploid)], collapse = ", ")
  sep <- ifelse(length(ploid) > 2, ", ", " ")
  ploid <- paste0(ploid1, sep, "and ", ploid[length(ploid)])
}
#==============================================================================#
#' @rdname genclone-method
#' @param object a genclone object
#==============================================================================#
setMethod(
  f = "show",
  signature("genclone"),
  definition = function(object) {
    ploid <- c(
      "ha",
      "di",
      "tri",
      "tetra",
      "penta",
      "hexa",
      "hepta",
      "octa",
      "nona",
      "deca",
      "hendeca",
      "dodeca"
    )
    ploid <- paste0(unique(ploid[sort(object@ploidy)]), "ploid")
    ploid <- if (length(ploid) > 1) multiploid_string(object, ploid) else ploid
    nind <- nInd(object)
    type <- ifelse(object@type == "PA", "dominant", "codominant")
    nmlg <- length(unique(object@mlg))
    nloc <- nLoc(object)
    npop <- ifelse(is.null(object@pop), 0, nPop(object))
    strata <- length(object@strata)
    chars <- nchar(c(nmlg, nind, nloc, strata, 1, npop))
    ltab <- max(chars) - chars
    ltab <- vapply(ltab, function(x) substr("       ", 1, x + 1), character(1))
    pops <- popNames(object)
    poplen <- length(pops)
    the_type <- ifelse(is(object@mlg, "MLG"), visible(object@mlg), "nope")
    mlgtype <- ifelse(is(object@mlg, "MLG"), paste0(the_type, " "), "")
    mlgtype <- paste0(mlgtype, "multilocus genotypes")
    if (the_type == "contracted") {
      thresh <- round(cutoff(object@mlg)["contracted"], 3)
      dist <- distname(object@mlg)
      algo <- strsplit(distalgo(object@mlg), "_")[[1]][1]
      if (!is.character(dist)) {
        dist <- paste(utils::capture.output(dist), collapse = "")
      }
      contab <- max(chars)
      contab <- substr("             ", 1, contab + 4)
      mlgthresh <- paste0(
        "\n",
        contab,
        "(",
        thresh,
        ") [t], (",
        dist,
        ") [d], (",
        algo,
        ") [a]"
      )
      mlgtype <- paste0(mlgtype, mlgthresh)
    }
    if (poplen > 7) {
      pops <- c(pops[1:3], "...", pops[(poplen - 2):poplen])
    }
    stratanames <- names(object@strata)
    stratalen <- length(stratanames)
    if (stratalen > 7) {
      stratanames <- c(
        stratanames[1:3],
        "...",
        stratanames[(stratalen - 2):stratalen]
      )
    }
    cat("\nThis is a genclone object\n")
    cat("-------------------------\n")
    cat(
      "Genotype information:\n\n",
      ltab[1],
      nmlg,
      mlgtype,
      "\n",
      ltab[2],
      nind,
      ploid,
      "individuals\n",
      ltab[3],
      nloc,
      type,
      "loci\n\n"
    )
    popstrata <- ifelse(strata > 1, "strata -", "stratum -")
    if (strata == 0) {
      popstrata <- "strata."
    }
    popdef <- ifelse(npop > 0, "defined -", "defined.")
    cat("Population information:\n\n")
    cat(
      "",
      ltab[4],
      strata,
      popstrata,
      paste(stratanames, collapse = ", "),
      fill = TRUE
    )
    if (!is.null(object@hierarchy)) {
      cat(
        "",
        ltab[5],
        "1",
        "hierarchy -",
        paste(object@hierarchy, collapse = ""),
        fill = TRUE
      )
    }
    cat(
      "",
      ltab[6],
      npop,
      "populations",
      popdef,
      paste(pops, collapse = ", "),
      fill = TRUE
    )
  }
)

setGeneric("print")
#==============================================================================#
#' @rdname genclone-method
#' @export
#' @param x a genclone object
#' @param fullnames \code{logical}. If \code{TRUE}, then the full names of the
#'   populations will be printed. If \code{FALSE}, then only the first and last
#'   three population names are displayed.
#==============================================================================#
setMethod(
  f = "print",
  signature("genclone"),
  definition = function(x, ..., fullnames = TRUE) {
    nmlg <- length(unique(x@mlg))
    ploid <- c(
      "ha",
      "di",
      "tri",
      "tetra",
      "penta",
      "hexa",
      "hepta",
      "octa",
      "nona",
      "deca",
      "hendeca",
      "dodeca"
    )
    ploid <- paste0(unique(ploid[sort(x@ploidy)]), "ploid")
    ploid <- if (length(ploid) > 1) multiploid_string(x, ploid) else ploid
    nind <- nInd(x)
    type <- ifelse(x@type == "PA", "dominant", "codominant")
    nloc <- nLoc(x)
    npop <- ifelse(is.null(x@pop), 0, nPop(x))
    strata <- length(x@strata)

    chars <- nchar(c(nmlg, nind, nloc, strata, 1, npop))
    ltab <- max(chars) - chars
    ltab <- vapply(ltab, function(x) substr("       ", 1, x + 1), character(1))
    pops <- popNames(x)
    stratanames <- names(x@strata)
    the_type <- ifelse(is(x@mlg, "MLG"), visible(x@mlg), "nope")
    mlgtype <- ifelse(is(x@mlg, "MLG"), paste0(the_type, " "), "")
    mlgtype <- paste0(mlgtype, "multilocus genotypes")
    if (the_type == "contracted") {
      thresh <- round(cutoff(x@mlg)["contracted"], 3)
      dist <- distname(x@mlg)
      algo <- strsplit(distalgo(x@mlg), "_")[[1]][1]
      if (!is.character(dist)) {
        dist <- paste(utils::capture.output(dist), collapse = "")
      }
      contab <- max(chars)
      contab <- substr("             ", 1, contab + 4)
      mlgthresh <- paste0(
        "\n",
        contab,
        "(",
        thresh,
        ") [t], (",
        dist,
        ") [d], (",
        algo,
        ") [a]"
      )
      mlgtype <- paste0(mlgtype, mlgthresh)
    }
    if (!fullnames) {
      poplen <- length(pops)
      stratalen <- length(stratanames)
      if (poplen > 7) {
        pops <- c(pops[1:3], "...", pops[(poplen - 2):poplen])
      }
      if (stratalen > 7) {
        stratanames <- c(
          stratanames[1:3],
          "...",
          stratanames[(stratalen - 2):stratalen]
        )
      }
    }
    cat("\nThis is a genclone object\n")
    cat("-------------------------\n")
    cat(
      "Genotype information:\n\n",
      ltab[1],
      nmlg,
      mlgtype,
      "\n",
      ltab[2],
      nind,
      ploid,
      "individuals\n",
      ltab[3],
      nloc,
      type,
      "loci\n\n",
      fill = TRUE
    )
    popstrata <- ifelse(strata > 1, "strata -", "stratum -")
    if (strata == 0) {
      popstrata <- "strata."
    }
    popdef <- ifelse(npop > 0, "defined -", "defined.")
    cat("Population information:\n\n")
    cat("", ltab[4], strata, popstrata, stratanames, fill = TRUE)
    if (!is.null(x@hierarchy)) {
      cat(
        "",
        ltab[5],
        "1",
        "hierarchy -",
        paste(x@hierarchy, collapse = ""),
        fill = TRUE
      )
    }
    cat("", ltab[6], npop, "populations", popdef, pops, fill = TRUE)
  }
)

#==============================================================================#
#' Switch between genind and genclone objects.
#'
#' as.genclone will create a genclone object from a genind object OR anything
#' that can be passed to the genind initializer.
#'
#' genclone2genind will remove the mlg slot from the genclone object, creating a
#' genind object.
#'
#' as.genambig will convert a genind or genclone object to a polysat genambig
#' class.
#'
#' @export
#' @rdname coercion-methods
#' @aliases as.genclone,genind-method
#' @param x a \code{\link[adegenet:genind-class]{genind}} or \code{\linkS4class{genclone}}
#'   object
#' @param ... arguments passed on to the \code{\link[adegenet:genind-class]{genind}} constructor
#' @param mlg an optional vector of multilocus genotypes as integers
#' @param mlgclass should the mlg slot be of class MLG?
#' @docType methods
#'
#' @seealso \code{\link[adegenet]{splitStrata}}, \code{\linkS4class{genclone}},
#'   \code{\link{read.genalex}}
#'   \code{\link{aboot}}
#' @author Zhian N. Kamvar
#' @examples
#' data(Aeut)
#' Aeut
#'
#' # Conversion to genclone --------------------------------------------------
#' Aeut.gc <- as.genclone(Aeut)
#' Aeut.gc
#'
#' # Conversion to genind ----------------------------------------------------
#' Aeut.gi <- genclone2genind(Aeut.gc)
#' Aeut.gi
#'
#' # Conversion to polysat's "genambig" class --------------------------------
#' if (require("polysat")) {
#'   data(Pinf)
#'   Pinf.gb <- as.genambig(Pinf)
#'   summary(Pinf.gb)
#' }
#'
#' data(nancycats)
#'
#' # Conversion to bootgen for random sampling of loci -----------------------
#' nan.bg  <- new("bootgen", nancycats[pop = 9])
#' nan.bg
#'
#' # Conversion back to genind -----------------------------------------------
#' nan.gid <- bootgen2genind(nan.bg)
#' nan.gid
#'
#==============================================================================#
as.genclone <- function(x, ..., mlg, mlgclass = TRUE) {
  standardGeneric("as.genclone")
}

#' @export
setGeneric("as.genclone")


setMethod(
  f = "as.genclone",
  signature(x = "genind"),
  definition = function(x, ..., mlg, mlgclass = TRUE) {
    theCall <- match.call()
    if (!missing(x) && is.genind(x)) {
      theOther <- x@other
      if (missing(mlg)) {
        mlg <- mlg.vector(x)
      }
      res <- new(
        "genclone",
        tab = tab(x),
        ploidy = ploidy(x),
        pop = pop(x),
        type = x@type,
        prevcall = theCall,
        strata = x@strata,
        hierarchy = x@hierarchy,
        mlg = mlg,
        mlgclass = mlgclass
      )
      res@other <- theOther
    } else {
      res <- new("genclone", ..., mlg = mlg, mlgclass = mlgclass)
    }
    return(res)
  }
)
#==============================================================================#
#' @export
#' @rdname coercion-methods
#' @aliases genclone2genind,genclone-method
#' @docType methods
#==============================================================================#
genclone2genind <- function(x) {
  standardGeneric("genclone2genind")
}

#' @export
setGeneric("genclone2genind")

setMethod(
  f = "genclone2genind",
  signature(x = "genclone"),
  definition = function(x) {
    attributes(x) <- attributes(x)[slotNames(new("genind"))]
    class(x) <- "genind"
    x@call <- match.call()
    return(x)
  }
)

#' @export
#' @rdname coercion-methods
#' @aliases as.genambig,genind-method
#' @docType methods
as.genambig <- function(x) {
  standardGeneric("as.genambig")
}

#' @export
setMethod(
  f = "as.genambig",
  signature(x = "genind"),
  definition = function(x) {
    suppressWarnings({
      gen <- recode_polyploids(x, newploidy = max(x@ploidy, na.rm = TRUE))
    })
    gendf <- genind2df(gen, sep = "/", usepop = FALSE)
    gendf <- lapply(gendf, strsplit, "/")
    gendf <- lapply(gendf, lapply, as.numeric)
    ambig <- new("genambig", samples = indNames(gen), loci = locNames(gen))
    for (i in names(gendf)) {
      res <- lapply(gendf[[i]], function(x) {
        ifelse(is.na(x), polysat::Missing(ambig), x)
      })
      polysat::Genotypes(ambig, loci = i) <- res
    }
    polysat::Ploidies(ambig) <- info_table(x, type = "ploidy")
    if (!is.null(pop(x))) {
      polysat::PopInfo(ambig) <- as.integer(pop(x))
      polysat::PopNames(ambig) <- popNames(x)
    }
    return(ambig)
  }
)

#==============================================================================#
# Seploc method for genclone objects.
# Note that this is not the most efficient, but it is difficult to work around
# the method for genind objects as they are forced to be genind objects later.
#==============================================================================#
setMethod(
  f = "seploc",
  signature(x = "genclone"),
  definition = function(x, truenames = TRUE, res.type = c("genind", "matrix")) {
    ARGS <- c("genind", "matrix")
    res.type <- match.arg(res.type, ARGS)
    if (res.type == "matrix") {
      splitsville <- split(colnames(x@tab), locFac(x))
      listx <- lapply(splitsville, function(i) x@tab[, i, drop = FALSE])
    } else {
      listx <- lapply(locNames(x), function(i) x[loc = i])
    }
    names(listx) <- locNames(x)
    return(listx)
  }
)

#' Split samples from a genind object into pseudo-haplotypes
#'
#'
#' @param gid a [genind][adegenet::genind] or [genlight][adegenet::genlight] object.
#'
#' @return a haploid genind object with an extra [strata][adegenet::strata]
#'   column called "Individual".
#' @export
#' @md
#'
#' @note The [other slot][adegenet::other] will not be copied over to the new
#'   genind object.
#'
#' @seealso [poppr.amova()] [pegas::amova()] [as.genambig()]
#'
#' @details
#' Certain analyses, such as [amova][poppr.amova] work best if within-sample
#' variance (error) can be estimated. Practically, this is performed by
#' splitting the genotypes across all loci to create multiple haplotypes. This
#' way, the within-sample distance can be calculated and incorporated into the
#' model. Please note that the haplotypes generated are based on the order of
#' the unphased alleles in the genind object and do not represent true
#' haplotypes.
#'
#' Haploid data will be returned un-touched.
#'
#' @rdname make_haplotypes-method
#' @docType methods
#' @aliases make_haplotypes,genclone-method
#'   make_haplotypes,snpclone-method
#'   make_haplotypes,genind-method
#'   make_haplotypes,genlight-method
#'   make_haplotypes,ANY-method
#' @examples
#' # Diploid data is doubled -------------------------------------------------
#'
#' data(nancycats)
#' nan9 <- nancycats[pop = 9]
#' nan9hap <- make_haplotypes(nan9)
#' nan9              # 9 individuals from population 9
#' nan9hap           # 18 haplotypes
#' strata(nan9hap)   # strata gains a new column: Individual
#' indNames(nan9hap) # individuals are renamed sequentially
#'
#'
#' # Mix ploidy data can be split, but should be treated with caution --------
#' #
#' # For example, the Pinf data set contains 86 tetraploid individuals,
#' # but there appear to only be diploids and triploid genotypes. When
#' # we convert to haplotypes, those with all missing data are dropped.
#' data(Pinf)
#' Pinf
#' pmiss <- info_table(Pinf, type = "ploidy", plot = TRUE)
#'
#' # No samples appear to be triploid across all loci. This will cause
#' # several haplotypes to have a lot of missing data.
#' p_haps <- make_haplotypes(Pinf)
#' p_haps
#' head(genind2df(p_haps), n = 20)
make_haplotypes <- function(gid) standardGeneric("make_haplotypes")

#' @export
setGeneric("make_haplotypes")

setMethod(
  f = "make_haplotypes",
  signature(gid = "ANY"),
  definition = function(gid) {
    stop("This function can only take genind or genlight objects")
  }
)

setMethod(
  f = "make_haplotypes",
  signature(gid = "genind"),
  definition = function(gid) {
    make_haplotypes_genind(gid)
  }
)

setMethod(
  f = "make_haplotypes",
  signature(gid = "genlight"),
  definition = function(gid) {
    make_haplotypes_genlight(gid)
  }
)

# multilocus lineage methods ----------------------------------------------

#==============================================================================#
#' Access and manipulate multilocus lineages.
#'
#' The following methods allow the user to access and manipulate multilocus
#' lineages in genclone or snpclone objects.
#'
#' @export
#' @param x a \linkS4class{genclone} or \linkS4class{snpclone} object.
#' @param type a character specifying "original", "contracted", or "custom"
#'   defining they type of mlgs to return. Defaults to what is set in the
#'   object.
#' @param value a character specifying which mlg type is visible in the object.
#'   See details.
#'
#' @return an object of the same type as x.
#'
#' @details \linkS4class{genclone} and \linkS4class{snpclone} objects have a
#'   slot for an internal class of object called \linkS4class{MLG}. This class
#'   allows the storage of flexible mll definitions: \itemize{ \item "original"
#'   - naive mlgs defined by string comparison. This is default. \item
#'   "contracted" - mlgs defined by a genetic distance threshold. \item "custom"
#'   - user-defined MLGs }
#'
#' @rdname mll-method
#' @aliases mll,genclone-method
#'   mll,snpclone-method
#'   mll,genind-method
#'   mll,genlight-method
#' @docType methods
#' @author Zhian N. Kamvar
#' @seealso \code{\link{mll.custom}} \code{\link{mlg.table}}
#' @examples
#'
#' data(partial_clone)
#' pc <- as.genclone(partial_clone)
#' mll(pc)
#' mll(pc) <- "custom"
#' mll(pc)
#' mll.levels(pc) <- LETTERS
#' mll(pc)
#==============================================================================#
mll <- function(x, type = NULL) standardGeneric("mll")

#' @export
setGeneric("mll")

setMethod(
  f = "mll",
  signature(x = "genind"),
  definition = function(x, type = NULL) {
    mll.gen.internal(x, type)
  }
)

setMethod(
  f = "mll",
  signature(x = "genlight"),
  definition = function(x, type = NULL) {
    mll.gen.internal(x, type)
  }
)

setMethod(
  f = "mll",
  signature(x = "genclone"),
  definition = function(x, type = NULL) {
    mll.internal(x, type, match.call())
  }
)

setMethod(
  f = "mll",
  signature(x = "snpclone"),
  definition = function(x, type = NULL) {
    mll.internal(x, type, match.call())
  }
)

#==============================================================================#
#' @export
#' @rdname mll-method
#' @aliases nmll,genclone-method
#'   nmll,snpclone-method
#'   nmll,genind-method
#'   nmll,genlight-method
#' @docType methods
#==============================================================================#
nmll <- function(x, type = NULL) standardGeneric("nmll")

#' @export
setGeneric("nmll")

setMethod(
  f = "nmll",
  signature(x = "genclone"),
  definition = function(x, type = NULL) {
    length(unique(mll(x, type)))
  }
)

setMethod(
  f = "nmll",
  signature(x = "snpclone"),
  definition = function(x, type = NULL) {
    length(unique(mll(x, type)))
  }
)

setMethod(
  f = "nmll",
  signature(x = "genind"),
  definition = function(x, type = NULL) {
    mlg(x, quiet = TRUE)
  }
)

setMethod(
  f = "nmll",
  signature(x = "genlight"),
  definition = function(x, type = NULL) {
    mlg(x, quiet = TRUE)
  }
)

#==============================================================================#
#' @export
#' @rdname mll-method
#' @aliases mll<-,genclone-method
#'   mll<-,snpclone-method
#' @docType methods
#==============================================================================#
"mll<-" <- function(x, value) standardGeneric("mll<-")

#' @export
setGeneric("mll<-")

setMethod(
  f = "mll<-",
  signature(x = "genclone"),
  definition = function(x, value) {
    TYPES <- c("original", "expanded", "contracted", "custom")
    value <- match.arg(value, TYPES)
    if (!"MLG" %in% class(x@mlg)) {
      x@mlg <- new("MLG", x@mlg)
    }
    visible(x@mlg) <- value
    return(x)
  }
)

setMethod(
  f = "mll<-",
  signature(x = "snpclone"),
  definition = function(x, value) {
    TYPES <- c("original", "expanded", "contracted", "custom")
    value <- match.arg(value, TYPES)
    if (!"MLG" %in% class(x@mlg)) {
      x@mlg <- new("MLG", x@mlg)
      distname(x@mlg) <- "bitwise.dist"
    }
    visible(x@mlg) <- value
    return(x)
  }
)

#==============================================================================#
#' Reset multilocus lineages
#'
#' This function will allow you to reset multilocus lineages for your data set.
#'
#' @export
#' @param x a \linkS4class{genclone} or \linkS4class{snpclone} object.
#' @param value a character vector that specifies which levels you wish to be
#'   reset.
#'
#' @note This method has no assignment method. If "original" is not contained in
#'   "value", it is assumed that the "original" definition will be used to reset
#'   the MLGs.
#' @return an object of the same type as x
#' @rdname mll.reset-method
#' @aliases mll.reset,genclone-method
#'   mll.reset,snpclone-method
#' @docType methods
#' @author Zhian N. Kamvar
#' @seealso \code{\link{mll}} \code{\link{mlg.table}} \code{\link{mll.custom}}
#' @examples
#'
#' # This data set was a subset of a larger data set, so the multilocus
#' # genotypes are not all sequential
#' data(Pinf)
#' mll(Pinf) <- "original"
#' mll(Pinf)
#'
#' # If we use mll.reset, then it will become sequential
#' Pinf.new <- mll.reset(Pinf, TRUE) # reset all
#' mll(Pinf.new)
#'
#' \dontrun{
#'
#' # It is possible to reset only specific mll definitions. For example, let's
#' # say that we wanted to filter our multilocus genotypes by nei's distance
#' mlg.filter(Pinf, dist = nei.dist, missing = "mean") <- 0.02
#'
#' # And we wanted to set those as custom genotypes,
#' mll.custom(Pinf) <- mll(Pinf, "contracted")
#' mll.levels(Pinf) <- .genlab("MLG", nmll(Pinf, "custom"))
#'
#' # We could reset just the original and the filtered if we wanted to and keep
#' # the custom as it were.
#'
#' Pinf.new <- mll.reset(Pinf, c("original", "contracted"))
#'
#' mll(Pinf.new, "original")
#' mll(Pinf.new, "contracted")
#' mll(Pinf.new, "custom")
#'
#' # If "original" is not one of the values, then that is used as a baseline.
#' Pinf.orig <- mll.reset(Pinf, "contracted")
#' mll(Pinf.orig, "contracted")
#' mll(Pinf.new, "contracted")
#' }
#'
#==============================================================================#
mll.reset <- function(x, value) standardGeneric("mll.reset")

#' @export
setGeneric("mll.reset")

setMethod(
  f = "mll.reset",
  signature(x = "genclone"),
  definition = function(x, value) {
    mll.reset.internal(x, value)
  }
)

setMethod(
  f = "mll.reset",
  signature(x = "snpclone"),
  definition = function(x, value) {
    mll.reset.internal(x, value)
  }
)
#==============================================================================#
#' Define custom multilocus lineages
#'
#' This function will allow you to define custom multilocus lineages for your
#' data set.
#'
#' @export
#' @param x a \linkS4class{genclone} or \linkS4class{snpclone} object.
#' @param set logical. If \code{TRUE} (default), the visible mlls will be set to
#' 'custom'.
#' @param value a vector that defines the multilocus lineages for your data.
#' This can be a vector of ANYTHING that can be turned into a factor.
#'
#' @return an object of the same type as x
#' @rdname mll.custom
#' @aliases mll.custom,genclone-method
#'   mll.custom,snpclone-method
#' @docType methods
#' @author Zhian N. Kamvar
#' @seealso \code{\link{mll}} \code{\link{mlg.table}}
#' @examples
#' data(partial_clone)
#' pc <- as.genclone(partial_clone)
#' mll.custom(pc) <- LETTERS[mll(pc)]
#' mll(pc)
#'
#' # Let's say we had a mistake and the A mlg was actually B.
#' mll.levels(pc)[mll.levels(pc) == "A"] <- "B"
#' mll(pc)
#'
#' # Set the MLL back to the original definition.
#' mll(pc) <- "original"
#' mll(pc)
#==============================================================================#
mll.custom <- function(x, set = TRUE, value) standardGeneric("mll.custom")

#' @export
setGeneric("mll.custom")

setMethod(
  f = "mll.custom",
  signature(x = "genclone"),
  definition = function(x, set = TRUE, value) {
    mll.custom.internal(x, set, value)
  }
)

setMethod(
  f = "mll.custom",
  signature(x = "snpclone"),
  definition = function(x, set = TRUE, value) {
    mll.custom.internal(x, set, value)
  }
)

#==============================================================================#
#' @export
#' @rdname mll.custom
#' @aliases mll.custom<-,genclone-method
#'   mll.custom<-,snpclone-method
#' @docType methods
#==============================================================================#
"mll.custom<-" <- function(x, set = TRUE, value) standardGeneric("mll.custom<-")

#' @export
setGeneric("mll.custom<-")

setMethod(
  f = "mll.custom<-",
  signature(x = "genclone"),
  definition = function(x, set = TRUE, value) {
    mll.custom.internal(x, set, value)
  }
)

setMethod(
  f = "mll.custom<-",
  signature(x = "snpclone"),
  definition = function(x, set = TRUE, value) {
    mll.custom.internal(x, set, value)
  }
)

#==============================================================================#
#' @export
#' @rdname mll.custom
#' @aliases mll.levels,genclone-method
#'   mll.levels,snpclone-method
#' @docType methods
#==============================================================================#
mll.levels <- function(x, set = TRUE, value) standardGeneric("mll.levels")

#' @export
setGeneric("mll.levels")

setMethod(
  f = "mll.levels",
  signature(x = "genclone"),
  definition = function(x, set = TRUE, value) {
    mll.levels.internal(x, set, value)
  }
)

setMethod(
  f = "mll.levels",
  signature(x = "snpclone"),
  definition = function(x, set = TRUE, value) {
    mll.levels.internal(x, set, value)
  }
)


#==============================================================================#
#' @export
#' @rdname mll.custom
#' @aliases mll.levels<-,genclone-method
#'   mll.levels<-,snpclone-method
#' @docType methods
#==============================================================================#
"mll.levels<-" <- function(x, set = TRUE, value) standardGeneric("mll.levels<-")

#' @export
setGeneric("mll.levels<-")

setMethod(
  f = "mll.levels<-",
  signature(x = "genclone"),
  definition = function(x, set = TRUE, value) {
    mll.levels.internal(x, set, value)
  }
)

setMethod(
  f = "mll.levels<-",
  signature(x = "snpclone"),
  definition = function(x, set = TRUE, value) {
    mll.levels.internal(x, set, value)
  }
)

#==============================================================================#
#' MLG definitions based on genetic distance
#'
#' Multilocus genotypes are initially defined by naive string matching, but this
#' definition does not take into account missing data or genotyping error,
#' casting these as unique genotypes. Defining multilocus genotypes by genetic
#' distance allows you to incorporate genotypes that have missing data o
#' genotyping error into their parent clusters.
#'
#' @param pop a \code{\linkS4class{genclone}}, \code{\linkS4class{snpclone}}, or
#'   \code{\link[adegenet:genind-class]{genind}} object.
#' @param threshold a number indicating the minimum distance two MLGs must be
#'   separated by to be considered different. Defaults to 0, which will reflect
#'   the original (naive) MLG definition.
#' @param missing any method to be used by \code{\link{missingno}}: "mean",
#'   "zero", "loci", "genotype", or "asis" (default).
#' @param memory whether this function should remember the last distance matrix
#'   it generated. TRUE will attempt to reuse the last distance matrix if the
#'   other parameters are the same. (default) FALSE will ignore any stored
#'   matrices and not store any it generates.
#' @param algorithm determines the type of clustering to be done.
#' \describe{
#'   \item{"farthest_neighbor"}{\emph{ (default) }merges clusters based on the
#'   maximum distance between points in either cluster. This is the strictest of
#'   the three.}
#'   \item{"nearest_neighbor"}{ merges clusters based on the minimum distance
#'   between points in either cluster. This is the loosest of the three.}
#'   \item{"average_neighbor"}{ merges clusters based on the average distance
#'   between every pair of points between clusters.}
#' }
#' @param distance a character or function defining the distance to be applied
#'   to pop. Defaults to \code{\link{diss.dist}} for genclone objects and
#'   \code{\link{bitwise.dist}} for snpclone objects. A matrix or table
#'   containing distances between individuals (such as the output of
#'   \code{\link{rogers.dist}}) is also accepted for this parameter.
#' @param threads (unused) Previously, this was the maximum number of parallel
#'  threads to be used within this function. Default is 1 indicating that this
#'  function will run serially. Any other number will result in a warning.
#' @param stats a character vector specifying which statistics should be
#'   returned (details below). Choices are "MLG", "THRESHOLDS", "DISTANCES",
#'   "SIZES", or "ALL". If choosing "ALL" or more than one, a named list will be
#'   returned.
#' @param ... any parameters to be passed off to the distance method.
#'
#' @details This function will take in any distance matrix or function and
#' collapse multilocus genotypes below a given threshold. If you use this
#' function as the assignment method (mlg.filter(myData, distance = myDist) <-
#' 0.5), the distance function or matrix will be remembered by the object. This
#' means that if you define your own distance matrix or function, you must keep
#' it in memory to further utilize mlg.filter.
#'
#' @return Default, a vector of collapsed multilocus genotypes. Otherwise, any
#'   combination of the following:
#' \subsection{MLGs}{
#'   a numeric vector defining the multilocus genotype cluster of each
#'   individual in the dataset. Each genotype cluster is separated from every
#'   other genotype cluster by at least the defined threshold value, as
#'   calculated by the selected algorithm.
#' }
#' \subsection{THRESHOLDS}{
#'   A numeric vector representing the thresholds \strong{beyond} which clusters
#'   of multilocus genotypes were collapsed.
#' }
#' \subsection{DISTANCES}{
#'   A square matrix representing the distances between each cluster.
#' }
#' \subsection{SIZES}{
#'  The sizes of the multilocus genotype clusters in order.
#' }
#'
#' @note \code{mlg.vector} makes use of \code{mlg.vector} grouping prior to
#'   applying the given threshold. Genotype numbers returned by
#'   \code{mlg.vector} represent the lowest numbered genotype (as returned by
#'   \code{mlg.vector}) in in each new multilocus genotype. Therefore
#'   \strong{\code{mlg.filter} and \code{mlg.vector} return the same vector when
#'   threshold is set to 0 or less}.
#' @seealso \code{\link{filter_stats}},
#'   \code{\link{cutoff_predictor}},
#'   \code{\link{mll}},
#'   \code{\link{genclone}},
#'   \code{\link{snpclone}},
#'   \code{\link{diss.dist}},
#'   \code{\link{bruvo.dist}}
#' @export
#' @rdname mlg.filter
#' @aliases mlg.filter,genclone-method
#'   mlg.filter,snpclone-method
#'   mlg.filter,genind-method
#'   mlg.filter,genlight-method
#' @docType methods
#' @examples
#'
#' data(partial_clone)
#' pc <- as.genclone(partial_clone, threads = 1L) # convert to genclone object
#'
#' # Basic Use ---------------------------------------------------------------
#'
#'
#' # Show MLGs at threshold 0.05
#' mlg.filter(pc, threshold = 0.05, distance = "nei.dist", threads = 1L)
#' pc # 26 mlgs
#'
#' # Set MLGs at threshold 0.05
#' mlg.filter(pc, distance = "nei.dist", threads = 1L) <- 0.05
#' pc # 25 mlgs
#'
#' \dontrun{
#'
#' # The distance definition is persistant
#' mlg.filter(pc) <- 0.1
#' pc # 24 mlgs
#'
#' # But you can still change the definition
#' mlg.filter(pc, distance = "diss.dist", percent = TRUE) <- 0.1
#' pc
#'
#' # Choosing a threshold ----------------------------------------------------
#'
#'
#' # Thresholds for collapsing multilocus genotypes should not be arbitrary. It
#' # is important to consider what threshold is suitable. One method of choosing
#' # a threshold is to find a gap in the distance distribution that represents
#' # clonal groups. You can look at this by analyzing the distribution of all
#' # possible thresholds with the function "cutoff_predictor".
#'
#' # For this example, we'll use Bruvo's distance to predict the cutoff for
#' # P. infestans.
#'
#' data(Pinf)
#' Pinf
#' # Repeat lengths are necessary for Bruvo's distance
#' (pinfreps <- fix_replen(Pinf, c(2, 2, 6, 2, 2, 2, 2, 2, 3, 3, 2)))
#'
#' # Now we can collect information of the thresholds. We can set threshold = 1
#' # because we know that this will capture the maximum possible distance:
#' (thresholds <- mlg.filter(Pinf, distance = bruvo.dist, stats = "THRESHOLDS",
#'                           replen = pinfreps, threshold = 1))
#' # We can use these thresholds to find an appropriate cutoff
#' (pcut <- cutoff_predictor(thresholds))
#' mlg.filter(Pinf, distance = bruvo.dist, replen = pinfreps) <- pcut
#' Pinf
#'
#' # This can also be visualized with the "filter_stats" function.
#'
#' # Special case: threshold = 0 ---------------------------------------------
#'
#'
#' # It's important to remember that a threshold of 0 is equal to the original
#' # MLG definition. This example will show a data set that contains genotypes
#' # with missing data that share all alleles with other genotypes except for
#' # the missing one.
#'
#' data(monpop)
#' monpop # 264 mlg
#' mlg.filter(monpop) <- 0
#' nmll(monpop) # 264 mlg
#'
#' # In order to merge these genotypes with missing data, we should set the
#' # threshold to be slightly higher than 0. We will use the smallest fraction
#' # the computer can store.
#'
#' mlg.filter(monpop) <- .Machine$double.eps ^ 0.5
#' nmll(monpop) # 236 mlg
#'
#' # Custom distance ---------------------------------------------------------
#'
#' # Custom genetic distances can be used either in functions from other
#' # packages or user-defined functions
#'
#' data(Pinf)
#' Pinf
#' mlg.filter(Pinf, distance = function(x) dist(tab(x))) <- 3
#' Pinf
#' mlg.filter(Pinf) <- 4
#' Pinf
#'
#' # genlight / snpclone objects ---------------------------------------------
#'
#'
#' set.seed(999)
#' gc <- as.snpclone(glSim(100, 0, n.snp.struc = 1e3, ploidy = 2))
#' gc # 100 mlgs
#' mlg.filter(gc) <- 0.25
#' gc # 82 mlgs
#'
#' }
#==============================================================================#
mlg.filter <- function(
  pop,
  threshold = 0.0,
  missing = "asis",
  memory = FALSE,
  algorithm = "farthest_neighbor",
  distance = "diss.dist",
  threads = 1L,
  stats = "MLGs",
  ...
) {
  standardGeneric("mlg.filter")
}

#' @export
setGeneric("mlg.filter")

setMethod(
  f = "mlg.filter",
  signature(pop = "genind"),
  definition = function(
    pop,
    threshold = 0.0,
    missing = "asis",
    memory = FALSE,
    algorithm = "farthest_neighbor",
    distance = "diss.dist",
    threads = 1L,
    stats = "MLGs",
    ...
  ) {
    the_call <- match.call()
    mlg.filter.internal(
      pop,
      threshold,
      missing,
      memory,
      algorithm,
      distance,
      denv = parent.frame(),
      threads,
      stats,
      the_call,
      ...
    )
  }
)

setMethod(
  f = "mlg.filter",
  signature(pop = "genlight"),
  definition = function(
    pop,
    threshold = 0.0,
    missing = "asis",
    memory = FALSE,
    algorithm = "farthest_neighbor",
    distance = "bitwise.dist",
    threads = 1,
    stats = "MLGs",
    ...
  ) {
    the_call <- match.call()
    mlg.filter.internal(
      pop,
      threshold,
      missing,
      memory,
      algorithm,
      distance,
      denv = parent.frame(),
      threads,
      stats,
      the_call,
      ...
    )
  }
)

setMethod(
  f = "mlg.filter",
  signature(pop = "genclone"),
  definition = function(
    pop,
    threshold = 0.0,
    missing = "asis",
    memory = FALSE,
    algorithm = "farthest_neighbor",
    distance = "diss.dist",
    threads = 1L,
    stats = "MLGs",
    ...
  ) {
    the_call <- match.call()
    mlg.filter.internal(
      pop,
      threshold,
      missing,
      memory,
      algorithm,
      distance,
      denv = parent.frame(),
      threads,
      stats,
      the_call,
      ...
    )
  }
)

setMethod(
  f = "mlg.filter",
  signature(pop = "snpclone"),
  definition = function(
    pop,
    threshold = 0.0,
    missing = "asis",
    memory = FALSE,
    algorithm = "farthest_neighbor",
    distance = "bitwise.dist",
    threads = 1L,
    stats = "MLGs",
    ...
  ) {
    the_call <- match.call()
    mlg.filter.internal(
      pop,
      threshold,
      missing,
      memory,
      algorithm,
      distance,
      denv = parent.frame(),
      threads,
      stats,
      the_call,
      ...
    )
  }
)

#==============================================================================#
#' @export
#' @rdname mlg.filter
#' @param value the threshold at which genotypes should be collapsed.
#' @aliases mlg.filter<-,genclone-method
#'   mlg.filter<-,genind-method
#'   mlg.filter<-,snpclone-method
#'   mlg.filter<-,genlight-method
#' @docType methods
#==============================================================================#
"mlg.filter<-" <- function(
  pop,
  missing = "asis",
  memory = FALSE,
  algorithm = "farthest_neighbor",
  distance = "diss.dist",
  threads = 1L,
  ...,
  value
) {
  standardGeneric("mlg.filter<-")
}

#' @export
setGeneric("mlg.filter<-")


setMethod(
  f = "mlg.filter<-",
  signature(pop = "genind"),
  definition = function(
    pop,
    missing = "asis",
    memory = FALSE,
    algorithm = "farthest_neighbor",
    distance = "diss.dist",
    threads = 1L,
    ...,
    value
  ) {
    if (!is.genclone(pop)) {
      the_warning <- paste(
        "mlg.filter<- only has an effect on genclone",
        "objects.\n",
        "If you want to utilize this",
        "functionality, please convert to a genclone object.\n",
        "No modifications are taking place."
      )
      warning(the_warning, call. = FALSE)
    }

    return(pop)
  }
)

setMethod(
  f = "mlg.filter<-",
  signature(pop = "genlight"),
  definition = function(
    pop,
    missing = "asis",
    memory = FALSE,
    algorithm = "farthest_neighbor",
    distance = "bitwise.dist",
    threads = 1L,
    ...,
    value
  ) {
    if (!is.snpclone(pop)) {
      the_warning <- paste(
        "mlg.filter<- only has an effect on snpclone",
        "objects.\n",
        "If you want to utilize this",
        "functionality, please convert to a snpclone object.\n",
        "No modifications are taking place."
      )
      warning(the_warning, call. = FALSE)
    }

    return(pop)
  }
)


setMethod(
  f = "mlg.filter<-",
  signature(pop = "genclone"),
  definition = function(
    pop,
    missing = "asis",
    memory = FALSE,
    algorithm = "farthest_neighbor",
    distance = "diss.dist",
    threads = 1L,
    ...,
    value
  ) {
    pop <- callNextMethod()
    the_call <- match.call()
    callnames <- names(the_call)
    the_dots <- list(...)
    parsed_distance <- distance

    if (!is(pop@mlg, "MLG")) {
      pop@mlg <- new("MLG", pop@mlg)
    }
    # Thu Aug  6 16:39:25 2015 ------------------------------
    # Treatment of the incoming distance is not a trivial task.
    # We want the object to have a memory of what distance function was used so
    # the user does not have to re-specify the distance measure in the function
    # call. Thus, there could be three possabilities whenever the user calls
    # mlg.filter:
    #  1. The user does not specify a distance
    #  2. The distance is specified as text
    #  3. The distance is specified as an actual function.
    #
    if (!"distance" %in% callnames) {
      # This is dealing with the situation where the user does not specify a
      # distance function. In this case, we use the function that was defined in
      # the object itself.
      #
      # If the user supplied dots, then we append them to the current parameters
      # and take the newer ones.

      distance <- distname(pop@mlg)
      d_env <- distenv(pop@mlg)
      the_dots <- c(the_dots, distargs(pop@mlg))
      the_dots <- the_dots[unique(names(the_dots))]
    } else {
      # If the user supplied a distance, then we should store the environment
      # from the call and test that the function is defined in the environment
      # or is a oneliner

      d_env <- parent.frame()
      the_dist <- substitute(distance)
      the_distc <- as.character(the_dist)

      if (length(the_distc) > 1) {
        # this succeeds if the function is a oneliner
        parsed_distance <- the_dist # this will be a function
      } else {
        parsed_distance <- the_distc # this will be a character
      }
    }

    # Trying to evaluate the function/matrix in its environment
    distfun <- try(eval(distance, envir = d_env), silent = TRUE)
    if ("try-error" %in% class(distfun)) {
      stop(
        "cannot evaluate distance function, it might be missing.",
        call. = FALSE
      )
    }

    # This part may be unnecessary, but I believe I was having issues with
    # passing the call to the main function. The reason for this is that
    # dealing with missing data is done in different ways depending on the
    # distance function used. If it's diss.dist, nothing is done, if it's
    # Nei's etc. dist, then the correction is done by converting it to a
    # bootgen object since that will quietly convert missing data; otherwise
    # the function missingno is used.
    the_call[["distance"]] <- distance

    # The distance isn't a function, so it should be a distance matrix/character.
    if (!is.function(distfun)) {
      parsed_distance <- distfun
    }

    # Storing the specified algorithm.
    if (!"algorithm" %in% callnames) {
      # <simpsonsreference>
      # Do you have any of thoses arrows that are, like, double arrows?
      # </simpsonsreference>
      the_call[["algorithm"]] <- distalgo(pop@mlg) -> algorithm
    }
    # The arguments are built up in a list here and then passed using do.call.
    the_args <- list(
      gid = pop,
      threshold = value,
      missing = missing,
      memory = memory,
      algorithm = algorithm,
      distance = parsed_distance,
      denv = d_env,
      threads = threads,
      stats = "MLGs"
    )

    fmlgs <- do.call("mlg.filter.internal", c(the_args, the_dots))

    # here we are resetting the values in the MLG object including the
    # algorithm used and the parameters to the distance
    algos <- c("nearest_neighbor", "average_neighbor", "farthest_neighbor")
    mll(pop) <- "contracted"
    pop@mlg[] <- fmlgs
    cutoff(pop@mlg)["contracted"] <- value
    distname(pop@mlg) <- substitute(distance)
    distenv(pop@mlg) <- d_env
    distargs(pop@mlg) <- the_dots
    distalgo(pop@mlg) <- match.arg(algorithm, algos)
    return(pop)
  }
)


setMethod(
  f = "mlg.filter<-",
  signature(pop = "snpclone"),
  definition = function(
    pop,
    missing = "mean",
    memory = FALSE,
    algorithm = "farthest_neighbor",
    distance = "bitwise.dist",
    threads = 1L,
    ...,
    value
  ) {
    pop <- callNextMethod()
    the_call <- match.call()
    callnames <- names(the_call)
    the_dots <- list(...)
    parsed_distance <- distance

    if (!is(pop@mlg, "MLG")) {
      pop@mlg <- new("MLG", pop@mlg)
    }
    # Thu Aug  6 16:39:25 2015 ------------------------------
    # Treatment of the incoming distance is not a trivial task.
    # We want the object to have a memory of what distance function was used so
    # the user does not have to re-specify the distance measure in the function
    # call. Thus, there could be three possabilities whenever the user calls
    # mlg.filter:
    #  1. The user does not specify a distance
    #  2. The distance is specified as text
    #  3. The distance is specified as an actual function.
    #
    if (!"distance" %in% callnames) {
      # This is dealing with the situation where the user does not specify a
      # distance function. In this case, we use the function that was defined in
      # the object itself.
      #
      # If the user supplied dots, then we append them to the current parameters
      # and take the newer ones.

      distance <- distname(pop@mlg)
      d_env <- distenv(pop@mlg)
      the_dots <- c(the_dots, distargs(pop@mlg))
      the_dots <- the_dots[unique(names(the_dots))]
    } else {
      # If the user supplied a distance, then we should store the environment
      # from the call and test that the function is defined in the environment
      # or is a oneliner

      d_env <- parent.frame()
      the_dist <- substitute(distance)
      the_distc <- as.character(the_dist)

      if (length(the_distc) > 1) {
        # this succeeds if the function is a oneliner
        parsed_distance <- the_dist # this will be a function
      } else {
        parsed_distance <- the_distc # this will be a character
      }
    }

    # Trying to evaluate the function/matrix in its environment
    distfun <- try(eval(distance, envir = d_env), silent = TRUE)
    if ("try-error" %in% class(distfun)) {
      stop(
        "cannot evaluate distance function, it might be missing.",
        call. = FALSE
      )
    }

    # This part may be unnecessary, but I believe I was having issues with
    # passing the call to the main function. The reason for this is that
    # dealing with missing data is done in different ways depending on the
    # distance function used. If it's diss.dist, nothing is done, if it's
    # Nei's etc. dist, then the correction is done by converting it to a
    # bootgen object since that will quietly convert missing data; otherwise
    # the function missingno is used.
    the_call[["distance"]] <- distance

    # The distance isn't a function, so it should be a distance matrix/character.
    if (!is.function(distfun)) {
      parsed_distance <- distfun
    }

    # Storing the specified algorithm.
    if (!"algorithm" %in% callnames) {
      # <simpsonsreference>
      # Do you have any of thoses arrows that are, like, double arrows?
      # </simpsonsreference>
      the_call[["algorithm"]] <- distalgo(pop@mlg) -> algorithm
    }
    # The arguments are built up in a list here and then passed using do.call.
    the_args <- list(
      gid = pop,
      threshold = value,
      missing = missing,
      memory = memory,
      algorithm = algorithm,
      distance = parsed_distance,
      denv = d_env,
      threads = threads,
      stats = "MLGs"
    )

    fmlgs <- do.call("mlg.filter.internal", c(the_args, the_dots))

    # here we are resetting the values in the MLG object including the
    # algorithm used and the parameters to the distance
    algos <- c("nearest_neighbor", "average_neighbor", "farthest_neighbor")
    mll(pop) <- "contracted"
    pop@mlg[] <- fmlgs
    cutoff(pop@mlg)["contracted"] <- value
    distname(pop@mlg) <- substitute(distance)
    distenv(pop@mlg) <- d_env
    distargs(pop@mlg) <- the_dots
    distalgo(pop@mlg) <- match.arg(algorithm, algos)
    return(pop)
  }
)

#==============================================================================#
#' Convert an old genclone object to a new genclone object
#'
#' @param object a genclone object from poppr v. 1.1
#' @param donor a new genclone object from poppr v. 2.0
#'
#' @export
#' @author Zhian N. Kamvar
#==============================================================================#
old2new_genclone <- function(object, donor = new(class(object))) {
  has_strata <- .hasSlot(object, "strata")
  has_loc.n.all <- .hasSlot(object, "loc.n.all")
  if (has_loc.n.all) {
    warning("this object appears to be up to date.")
    return(object)
  }
  if (!has_strata && is(object, "genclone")) {
    object@strata <- object@hierarchy
    object@hierarchy <- NULL
  }
  n <- old2new_genind(object, donor)
  if ("genclone" %in% class(n)) {
    n@mlg <- object@mlg
  }
  return(n)
}

Try the poppr package in your browser

Any scripts or data that you put into this service are public.

poppr documentation built on Aug. 24, 2025, 1:09 a.m.