R/utils.R

Defines functions split_mappoly .mappoly_map_skeleton .mappoly_data_skeleton detect_info_par compare_maps v_2_m get_states_and_emission_single_parent aggregate_matrix get_dosage_type get_ols_map sample_data update_map get_tab_mrks summary_maps merge_datasets check_data_dist_sanity check_data_dose_sanity check_data_sanity plot.mappoly.geno.ord print.mappoly.geno.ord get_genomic_order mrk_chisq_test update_missing gg_color_hue perm_pars perm_tot check_if_rf_is_possible plot_compare_haplotypes plot_GIC compare_haplotypes imf_m imf_h imf_k mf_m mf_h mf_k text_col msg export_data_to_polymapR dist_prob_to_class rev_map get_w_m is.prob.data get_rf_from_mat get_LOD

Documented in aggregate_matrix check_data_dist_sanity check_data_dose_sanity check_data_sanity check_if_rf_is_possible compare_haplotypes compare_maps detect_info_par dist_prob_to_class export_data_to_polymapR get_dosage_type get_genomic_order get_LOD get_ols_map get_rf_from_mat get_states_and_emission_single_parent get_tab_mrks get_w_m gg_color_hue imf_h imf_k imf_m is.prob.data merge_datasets mf_h mf_k mf_m mrk_chisq_test msg perm_pars perm_tot plot_compare_haplotypes plot_GIC plot.mappoly.geno.ord print.mappoly.geno.ord rev_map sample_data split_mappoly summary_maps update_map update_missing v_2_m

#' Extract the LOD Scores in a \code{'mappoly.map'} object
#' @param x an object of class \code{mappoly.map}
#' @param sorted logical. if \code{TRUE}, the LOD Scores are displayed
#'     in a decreasing order
#' @return a numeric vector containing the LOD Scores
#' @keywords internal
#' @export
get_LOD <- function(x, sorted = TRUE) {
  w <- sapply(x$maps, function(x) x$loglike)
  LOD <- w - max(w)
  if (sorted)
    LOD <- sort(LOD, decreasing = TRUE)
  abs(LOD)
}

#' Get recombination fraction from a matrix
#' @keywords internal
get_rf_from_mat <- function(M){
  r <- numeric(nrow(M)-1)
  for(i in 1:(nrow(M)-1)){
    r[i] <- M[i, i+1]
  }
  r
}


#' Check if Object is a Probability Dataset in MAPpoly
#'
#' Determines whether the specified object is a probability dataset
#' by checking for the existence of the 'geno' component within a
#' `"mappoly.data"` object. 
#'
#' @param x An object of class `"mappoly.data"`
#'
#' @return A logical value: `TRUE` if the 'geno' component exists within `x`,
#' indicating it is a valid probability dataset for genetic analysis; `FALSE`
#' otherwise.
#'
#' @keywords internal
#' @export
is.prob.data <- function(x){
  # Verify if 'x' is indeed an object of class 'mappoly.data'
  if (!inherits(x, "mappoly.data")) {
    stop("Input is not an object of class 'mappoly.data'")
  }
  
  # Check for the existence of 'geno' within the 'mappoly.data' object
  exists('geno', where = x)
}


#' Get the number of bivalent configurations
#' @keywords internal
get_w_m <- function(ploidy){
  if(ploidy%%2 != 0) stop("ploidy level should be an even number") 
  if(ploidy <= 0) stop("ploidy level should be greater than zero")
  1/factorial((ploidy/2)) * prod(choose(seq(2, ploidy, 2),2))
}

#' Reverse map
#'
#' Provides the reverse of a given map.
#'
#' @param input.map an object of class \code{mappoly.map}
#' 
#' @examples
#' plot_genome_vs_map(solcap.mds.map[[1]])
#' plot_genome_vs_map(rev_map(solcap.mds.map[[1]]))
#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu}
#'
#'@export
#'
rev_map <- function(input.map)
{
  output.map <- input.map
  output.map$info <- lapply(output.map$info, rev)
  for(i in 1:length(output.map$maps))
  {
    output.map$maps[[i]]$seq.num <- rev(input.map$maps[[i]]$seq.num)
    output.map$maps[[i]]$seq.rf <- rev(input.map$maps[[i]]$seq.rf)
    output.map$maps[[i]]$seq.ph$P <- rev(input.map$maps[[i]]$seq.ph$P)
    output.map$maps[[i]]$seq.ph$Q <- rev(input.map$maps[[i]]$seq.ph$Q)
  }
  return(output.map)
}

#' Returns the class with the highest probability in 
#' a genotype probability distribution
#'
#' @param geno the probabilistic genotypes contained in the object
#'     \code{'mappoly.data'}
#' @param prob.thres probability threshold to select the genotype. 
#'     Values below this genotype are assumed as missing data
#' @return a matrix containing the doses of each genotype and
#'     marker. Markers are disposed in rows and individuals are 
#'     disposed in columns. Missing data are represented by NAs
#' @keywords internal
#' @examples
#' \donttest{
#' geno.dose <- dist_prob_to_class(hexafake.geno.dist$geno)
#' geno.dose$geno.dose[1:10,1:10]
#'}   
#' @importFrom magrittr "%>%"
#' @importFrom reshape2 melt dcast
#' @importFrom dplyr group_by filter arrange
#' @export
dist_prob_to_class <- function(geno, prob.thres = 0.9) {
  a <- reshape2::melt(geno, id.vars = c("mrk", "ind"))
  mrk <- ind <- value <- variable <- NULL # Setting the variables to NULL first
  a$variable <- as.numeric(levels(a$variable))[a$variable]
  b <- a %>%
    dplyr::group_by(mrk, ind) %>%
    dplyr::filter(value > prob.thres) %>%
    dplyr::arrange(mrk, ind, variable)
  z <- reshape2::dcast(data = b[,1:3], formula = mrk ~ ind, value.var = "variable")
  rownames(z) <- z[,"mrk"]
  z <- data.matrix(frame = z[,-1])
  n <- setdiff(unique(geno$mrk), rownames(z))
  if(length(n) > 0)
  {
    ploidy <- matrix(NA, nrow = length(n), ncol = ncol(z), dimnames = list(n, colnames(z)))
    z <- rbind(z,ploidy)
  }
  rm.ind <- setdiff(unique(geno$ind), colnames(z))
  flag <- FALSE
  if(length(rm.ind) > 0){
    flag <- TRUE
    warning("Inividual(s) ", paste(rm.ind, collapse = " "), 
            "\n  did not meet the 'prob.thres' criteria for any of\n  the markers and was (were) removed.")
    geno <- geno %>% dplyr::filter(ind %in% colnames(z))
  }
  z <- z[as.character(unique(geno$mrk)), as.character(unique(geno$ind))]
  list(geno.dose = z, geno = geno, flag = flag)
}

#' Export data to \code{polymapR}
#' 
#' See examples at \url{https://rpubs.com/mmollin/tetra_mappoly_vignette}.
#' 
#' @param data.in an object of class \code{mappoly.data}
#' @return a dosage \code{matrix} 
#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu}
#' @export export_data_to_polymapR
export_data_to_polymapR <- function(data.in)
{
  data.out <- as.matrix(data.frame(P1 = data.in$dosage.p1,     
                                   P2 = data.in$dosage.p2,
                                   data.in$geno.dose))
  data.out[data.out  ==  (data.in$ploidy + 1)] <- NA
  return(data.out)
}

#' Msg function
#' @keywords internal
#' @importFrom cli rule
msg <- function(text, line = 1){
  cli::rule(line = line, right = text) %>%
    text_col() %>%
    message()
}

#' @importFrom rstudioapi isAvailable hasFun getThemeInfo
#' @importFrom crayon white black
text_col <- function(x) {
  # If RStudio not available, messages already printed in black
  if (!rstudioapi::isAvailable()) {
    return(x)
  }
  if (!rstudioapi::hasFun("getThemeInfo")) {
    return(x)
  }
  theme <- rstudioapi::getThemeInfo()
  if (isTRUE(theme$dark)) crayon::white(x) else crayon::black(x)
}

#' Genetic Mapping Functions
#'
#' These functions facilitate the conversion between recombination fractions (r) and genetic distances (d) 
#' using various mapping models. The functions starting with `mf_` convert recombination fractions to genetic distances,
#' while those starting with `imf_` convert genetic distances back into recombination fractions.
#'
#' @name genetic-mapping-functions
#' @aliases mf_k mf_h mf_m imf_k imf_h imf_m
#' @usage mf_k(d)
#' @usage mf_h(d)
#' @usage mf_m(d)
#' @usage imf_k(r)
#' @usage imf_h(r)
#' @usage imf_m(r)
#' @param d Numeric or numeric vector, representing genetic distances in centiMorgans (cM) for direct functions (mf_k, mf_h, mf_m).
#' @param r Numeric or numeric vector, representing recombination fractions for inverse functions (imf_k, imf_h, imf_m).
#' @details
#' The `mf_` prefixed functions apply different models to convert recombination fractions into genetic distances:
#' \itemize{
#'   \item \code{mf_k}: Kosambi mapping function.
#'   \item \code{mf_h}: Haldane mapping function.
#'   \item \code{mf_m}: Morgan mapping function.
#'}
#' The `imf_` prefixed functions convert genetic distances back into recombination fractions:
#' \itemize{
#'   \item \code{imf_k}: Inverse Kosambi mapping function.
#'   \item \code{imf_h}: Inverse Haldane mapping function.
#'   \item \code{imf_m}: Inverse Morgan mapping function.
#'}
#' @references
#' Kosambi, D.D. (1944). The estimation of map distances from recombination values. Ann Eugen., 12, 172-175.
#' Haldane, J.B.S. (1919). The combination of linkage values, and the calculation of distances between the loci of linked factors. J Genet, 8, 299-309.
#' Morgan, T.H. (1911). Random segregation versus coupling in Mendelian inheritance. Science, 34(873), 384.
#' @keywords genetics
#' @export
mf_k <- function(d) 0.5 * tanh(d / 50)
mf_h <- function(d) 0.5 * (1 - exp(-d / 50))
mf_m <- function(d) sapply(d, function(a) min(a / 100, 0.5))
imf_k <- function(r) {
  r[r >= 0.5] <- 0.5 - 1e-14
  50 * atanh(2 * r)
}
imf_h <- function(r) {
  r[r >= 0.5] <- 0.5 - 1e-14
  -50 * log(1 - 2 * r)
}
imf_m <- function(r) sapply(r, function(a) min(a * 100, 50))

#' Compare two polyploid haplotypes stored in list format
#'
#' @param ploidy ploidy level
#' @param h1 homology group 1
#' @param h2 homology group 2
#' @return a numeric vector of size \code{ploidy} indicating which
#'     homolog in h2 represents the homolog in h1. If there is
#'     no correspondence, i.e. different homolog, it returns NA for
#'     that homolog.
#' @keywords internal
#' @export compare_haplotypes
#' 
compare_haplotypes <- function(ploidy, h1, h2) {
  I1 <- matrix(0, ploidy, length(h1))
  I2 <- matrix(0, ploidy, length(h2))
  for (i in 1:length(h1)) {
    I1[h1[[i]], i] <- 1
    I2[h2[[i]], i] <- 1
  }
  a <- apply(I1, 1, paste, collapse = "")
  b <- apply(I2, 1, paste, collapse = "")
  haplo.ord <- match(a, b)
  list(is.same.haplo = !any(is.na(haplo.ord)), haplo.ord = haplo.ord)
}

#' Genotypic information content 
#' 
#' This function plots the genotypic information content given 
#' an object of class \code{mappoly.homoprob}.
#' 
#' @param hprobs an object of class \code{mappoly.homoprob}
#' 
#' @param P a string containing the name of parent P
#' 
#' @param Q a string containing the name of parent Q
#' 

#' 
#' @examples
#' \donttest{
#'      w <- lapply(solcap.err.map[1:3], calc_genoprob)
#'      h.prob <- calc_homologprob(w)
#'      plot_GIC(h.prob)
#' }
#' 
#' @importFrom dplyr mutate 
#' @importFrom ggplot2 facet_wrap element_text ylim scale_color_discrete

#' @export plot_GIC 
plot_GIC <- function(hprobs, P = "P1", Q = "P2"){
  if(!inherits(hprobs, "mappoly.homoprob"))
    stop(" 'hprobs' should be of class 'mappoly.homoprob'")
  LG <- map.position <- GIC <- p1 <- m1 <- homolog <- marker <- probability <- NULL
  DF <- hprobs$homoprob %>% 
    dplyr::mutate(p1 = probability * (1-probability)) %>%
    group_by(marker, homolog, map.position, LG) %>%
    summarise(m1 = sum(p1)) %>%
    mutate(GIC = 1-(4/hprobs$info$n.ind)*m1 , parent = ifelse(homolog%in%letters[1:hprobs$info$ploidy], P, Q))
  head(as.data.frame(DF))
  
  print(ggplot(DF, aes(map.position, GIC, colour = homolog)) +
          geom_smooth(alpha = .8, se = FALSE) + facet_wrap(~LG, nrow = 3, ncol = 5) +
          facet_grid(parent~LG, scales = "free_x", space = "free_x") +
          geom_hline(yintercept = .8, linetype = "dashed") + ylim(0,1) +
          theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
          scale_color_discrete(name = "Homologs") +
          ylab("Genotypic Information Content") + xlab("Distance (cM)"))
  return(invisible(DF))
}

#' Plot Two Overlapped Haplotypes
#'
#' This function plots two sets of haplotypes for comparison, allowing for visual
#' inspection of homologous allele patterns across two groups or conditions.
#' It is designed to handle and display genetic data for organisms with varying ploidy levels.
#'
#' @param ploidy Integer, specifying the ploidy level of the organism being represented.
#' @param hom.allele.p1 A list where each element represents the alleles for a marker in the first haplotype group, for 'p' parent.
#' @param hom.allele.q1 A list where each element represents the alleles for a marker in the first haplotype group, for 'q' parent.
#' @param hom.allele.p2 Optionally, a list where each element represents the alleles for a marker in the second haplotype group, for 'p' parent.
#' @param hom.allele.q2 Optionally, a list where each element represents the alleles for a marker in the second haplotype group, for 'q' parent.
#'
#' @details The function creates a graphical representation of haplotypes, where each marker's alleles are plotted
#' along a line for each parent ('p' and 'q'). If provided, the second set of haplotypes (for comparison) are overlaid
#' on the same plot. This allows for direct visual comparison of allele presence or absence across the two sets.
#' Different colors are used to distinguish between the first and second sets of haplotypes.
#'
#' The function uses several internal helper functions (`ph_list_to_matrix` and `ph_matrix_to_list`) to manipulate
#' haplotype data. These functions should correctly handle the conversion between list and matrix representations
#' of haplotype data.
#'
#' @return Invisible. The function primarily generates a plot for visual analysis and does not return any data.
#' @export
#' @keywords internal
plot_compare_haplotypes <- function(ploidy, hom.allele.p1, hom.allele.q1, hom.allele.p2 = NULL, hom.allele.q2 = NULL) {
  nmmrk <- names(hom.allele.p1)
  oldpar <- par(mar = c(5.1, 4.1, 4.1, 2.1))
  on.exit(par(oldpar))
  o1 <- order(apply(ph_list_to_matrix(hom.allele.p1, ploidy), 2, paste, collapse = ""), decreasing = TRUE)
  hom.allele.p1 <- ph_matrix_to_list(ph_list_to_matrix(hom.allele.p1, ploidy)[, o1])
  o2 <- order(apply(ph_list_to_matrix(hom.allele.q1, ploidy), 2, paste, collapse = ""), decreasing = TRUE)
  hom.allele.q1 <- ph_matrix_to_list(ph_list_to_matrix(hom.allele.q1, ploidy)[, o2])
  
  if (!is.null(hom.allele.p2)) {
    o3 <- order(apply(ph_list_to_matrix(hom.allele.p2, ploidy), 2, paste, collapse = ""), decreasing = TRUE)
    hom.allele.p2 <- ph_matrix_to_list(ph_list_to_matrix(hom.allele.p2, ploidy)[, o3])
  }
  if (!is.null(hom.allele.q2)) {
    o4 <- order(apply(ph_list_to_matrix(hom.allele.q2, ploidy), 2, paste, collapse = ""), decreasing = TRUE)
    hom.allele.q2 <- ph_matrix_to_list(ph_list_to_matrix(hom.allele.q2, ploidy)[, o4])
  }
  col2 <- 0  #rgb(1,0,.5,0.5)
  col1 <- rgb(1, 0, 0, 0.5)
  col3 <- rgb(0, 0, 1, 0.5)
  n.mrk <- length(hom.allele.p1)
  plot(c(0, 22), c(0, -(ploidy + 15)), type = "n", axes = FALSE, xlab = "", main = "", ylab = "")
  for (i in -(1:ploidy)) {
    lines(c(0, 10), c(i, i), lwd = 1, col = "darkgray", lty = 2)
    lines(c(12, 22), c(i, i), lwd = 1, col = "darkgray", lty = 2)
  }
  pos.p <- cumsum(c(0, rep(1, n.mrk - 1)/sum(rep(1, n.mrk - 1))) * 10)
  for (i in 1:n.mrk) {
    points(x = rep(pos.p[i], ploidy), y = -c(1:ploidy), pch = 15, col = col2, cex = 2)
    if (any(hom.allele.p1[[i]] != 0))
      points(x = rep(pos.p[i], length(hom.allele.p1[[i]])), y = -hom.allele.p1[[i]], col = col1, pch = 15, cex = 2)
    if (any(hom.allele.p2[[i]] != 0))
      if (!is.null(hom.allele.p2))
        points(x = rep(pos.p[i], length(hom.allele.p2[[i]])), y = -hom.allele.p2[[i]], col = col3, pch = 15, cex = 2)
  }
  text(x = pos.p, y = rep(-(ploidy+1), length(pos.p)), labels = nmmrk, cex = .5)
  pos.q <- pos.p + 12
  for (i in 1:n.mrk) {
    points(x = rep(pos.q[i], ploidy), y = -c(1:ploidy), col = col2, pch = 15, cex = 2)
    if (any(hom.allele.q1[[i]] != 0))
      points(x = rep(pos.q[i], length(hom.allele.q1[[i]])), y = -hom.allele.q1[[i]], col = col1, pch = 15, cex = 2)
    if (any(hom.allele.q2[[i]] != 0))
      if (!is.null(hom.allele.q2))
        points(x = rep(pos.q[i], length(hom.allele.q2[[i]])), y = -hom.allele.q2[[i]], col = col3, pch = 15, cex = 2)
  }
  text(x = pos.q, y = rep(-(ploidy+1) , length(pos.q)), labels = nmmrk, cex = .5)
  text(x = 11, y = -(ploidy + 1)/2, labels = "X", cex = 1)
}

#' Check if it is possible to estimate the recombination
#' fraction between neighbor markers using two-point
#' estimation
#' @keywords internal
check_if_rf_is_possible <- function(input.seq){
  dp <- abs(abs(input.seq$seq.dose.p1-(input.seq$ploidy/2))-(input.seq$ploidy/2))
  dq <- abs(abs(input.seq$seq.dose.p2-(input.seq$ploidy/2))-(input.seq$ploidy/2))
  y <- numeric(length(input.seq$seq.num)-1)
  for(i in 1:length(y))
    y[i] <- (dp[i]  ==  0 && dq[i+1]  ==  0) ||
    (dp[i+1]  ==  0 && dq[i]  ==  0) ||
    (dp[i]  ==  0 && dq[i]  ==  0) ||
    (dp[i+1]  ==  0 && dq[i+1]  ==  0)
  y <- as.logical(y)
  !y
}

#' N! combination
#' @keywords internal
perm_tot <- function(v) {
  n <- length(v)
  result <- v
  if (n > 1) {
    M <- perm_tot(v[2:n])
    result <- cbind(v[1], M)
    if (n > 2) {
      for (i in 2:(n - 1)) {
        N <- cbind(M[, 1:(i - 1)], v[1], M[, i:(n - 1)])
        result <- rbind(result, N)
      }
    }
    N <- cbind(M, v[1])
    result <- rbind(result, N)
  }
  return(result)
}

#' N!/2 combination
#' @keywords internal
perm_pars <- function(v) {
  n <- length(v)
  result <- v
  if (n > 2) {
    Mt <- perm_tot(v[2:n])
    result <- cbind(v[1], Mt)
    f <- floor(n/2)
    c <- ceiling(n/2)
    if (n > 3) {
      for (i in 2:f) {
        N <- cbind(Mt[, 1:(i - 1)], v[1], Mt[, i:(n - 1)])
        result <- rbind(result, N)
      }
    }
    if (c > f) {
      Ms <- perm_pars(v[2:n])
      if (n > 3) {
        N <- cbind(Ms[, 1:f], v[1], Ms[, c:(n - 1)])
      } else {
        N <- cbind(Ms[1:f], v[1], Ms[c:(n - 1)])
      }
      result <- rbind(result, N)
    }
  }
  return(result)
}

#' Color pallet ggplot-like
#' @keywords internal
#' @importFrom grDevices hcl col2rgb hsv rgb2hsv
gg_color_hue <- function(n) {
  x <- rgb2hsv(col2rgb("steelblue"))[, 1]
  cols = seq(x[1], x[1] + 1, by = 1/n)
  cols = cols[1:n]
  cols[cols > 1] <- cols[cols > 1] - 1
  return(hsv(cols, x[2], x[3]))
}

#' MAPpoly Color Palettes
#'
#' Provides a set of color palettes designed for use with MAPpoly, 
#' a package for genetic mapping in polyploids. These palettes are 
#' intended to enhance the visual representation of genetic data.
#'
#' The available palettes are:
#' \describe{
#'   \item{\code{mp_pallet1}}{A palette with warm colors ranging from yellow to dark red and brown.}
#'   \item{\code{mp_pallet2}}{A palette with cool colors, including purples, blues, and green.}
#'   \item{\code{mp_pallet3}}{A comprehensive palette that combines colors from both \code{mp_pallet1} and \code{mp_pallet2}, offering a broad range of colors.}
#'}
#'
#' Each palette function returns a function that can generate color vectors of variable length, suitable for mapping or plotting functions in R.
#'
#' @name mappoly-color-palettes
#' @aliases mp_pallet1 mp_pallet2 mp_pallet3
#' @keywords internal
#' @export mp_pallet1 mp_pallet2 mp_pallet3
#' @examples
#' # Generate a palette of 5 colors from mp_pallet1
#' pal1 <- mp_pallet1(5)
#' plot(1:5, pch=19, col=pal1)
#'
#' # Generate a palette of 10 colors from mp_pallet2
#' pal2 <- mp_pallet2(10)
#' plot(1:10, pch=19, col=pal2)
#'
#' # Generate a palette of 15 colors from mp_pallet3
#' pal3 <- mp_pallet3(15)
#' plot(1:15, pch=19, col=pal3)

mp_pallet1 <- colorRampPalette(c("#ffe119", "#f58231","#e6194b","#808000","#9a6324", "#800000"))
mp_pallet2 <- colorRampPalette(c("#911eb4", "#000075","#4363d8","#42d4f4","#469990", "#3cb44b"))
mp_pallet3 <- colorRampPalette(c("#ffe119", "#f58231","#e6194b","#808000","#9a6324", "#800000","#911eb4", "#000075","#4363d8","#42d4f4","#469990", "#3cb44b"))

#' Update missing information
#' @keywords internal
update_missing <- function(input.data, prob.thres = 0.95){
  geno.dose <- dist_prob_to_class(geno = input.data$geno, prob.thres = prob.thres)
  if(geno.dose$flag)
  {
    geno <- geno.dose$geno
    geno.dose <- geno.dose$geno.dose
  } else {
    geno.dose <- geno.dose$geno.dose
  }
  geno.dose[is.na(geno.dose)] <- input.data$ploidy + 1
  input.data$geno.dose <- geno.dose
  input.data$prob.thres <- prob.thres
  return(input.data)
}

#' Chi-square test
#' @keywords internal
mrk_chisq_test <- function(x, ploidy){
  y <- x[-c(1:(ploidy+1))]
  y[y == ploidy+1] <- NA
  y <- table(y, useNA = "always")
  names(y) <- c(names(y)[-length(y)], "NA") 
  seg.exp <- x[0:(ploidy+1)]
  seg.exp <- seg.exp[seg.exp != 0]
  seg.obs <- seg.exp
  seg.obs[names(y)[-length(y)]] <- y[-length(y)]
  pval <- suppressWarnings(stats::chisq.test(x = seg.obs, p = seg.exp[names(seg.obs)])$p.value)
  pval
}

#' Get the genomic position of markers in a sequence
#'
#' This functions gets the genomic position of markers in a sequence and
#' return an ordered data frame with the name and position of each marker
#' @param input.seq a sequence object of class \code{mappoly.sequence}
#' @param verbose if \code{TRUE} (default), the current progress is shown; if
#'     \code{FALSE}, no output is produced
#' @param x 	an object of the class mappoly.geno.ord
#' @param ... 	currently ignored   
#'   
#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu}
#' @examples
#' s1 <- make_seq_mappoly(tetra.solcap, "all")
#' o1 <- get_genomic_order(s1)
#' plot(o1)
#' s.geno.ord <- make_seq_mappoly(o1)
#' @export get_genomic_order
get_genomic_order <- function(input.seq, verbose = TRUE){
  if (!inherits(input.seq, "mappoly.sequence")) {
    stop(deparse(substitute(input.seq)), 
         " is not an object of class 'mappoly.sequence'")
  }
  if(all(is.na(input.seq$genome.pos))){
    if(all(is.na(input.seq$chrom))) 
      stop("No sequence or sequence position information found.")
    else{
      if (verbose) message("Ordering markers based on chromosome information")
      M <- data.frame(seq = input.seq$chrom, row.names = input.seq$seq.mrk.names)
      M.out <- M[order(M[,1]),]
    }
  } else if(all(is.na(input.seq$chrom))){
    if(all(is.na(input.seq$genome.pos))) 
      stop("No sequence or sequence position information found.")
    else{
      if (verbose) message("Ordering markers based on sequence position information")
      M <- data.frame(seq.pos = input.seq$genome.pos, row.names = input.seq$seq.mrk.names)
      M.out <- M[order(as.numeric(M[,1])),]
    }
  } else{
    M <- data.frame(seq = input.seq$chrom, 
                    seq.pos = input.seq$genome.pos, 
                    row.names = input.seq$seq.mrk.names)
    M.out <- M[order(as.numeric(M[,1]), as.numeric(M[,2])),]
  }
  structure(list(data.name = input.seq$data.name, ord = M.out), class = "mappoly.geno.ord")
}

#' @rdname get_genomic_order
#' @export
print.mappoly.geno.ord <- function(x, ...){
  print(head(x$ord))
}

#' @rdname get_genomic_order
#' @export
#' @importFrom ggplot2 ggplot aes geom_point xlab ylab theme
plot.mappoly.geno.ord <- function(x, ...){
  seq <- seq.pos <- NULL
  w <- x$ord
  w$seq <- as.factor(w$seq)
  w$seq = with(w, reorder(seq))
  ggplot2::ggplot(w, 
                  ggplot2::aes(x = seq.pos, y = seq, group = as.factor(seq))) +
    ggplot2::geom_point(ggplot2::aes(color = as.factor(seq)), shape = 108, size = 5, show.legend = FALSE) +
    ggplot2::xlab("Position") + 
    ggplot2::ylab("Chromosome") 
}

#' Data sanity check
#'
#' Checks the consistency of a dataset
#'
#' @param x an object of class \code{mappoly.data}
#' 
#' @return if consistent, returns 0. If not consistent, returns a 
#'         vector with a number of tests, where \code{TRUE} indicates
#'         a failed test.
#' 
#' @examples
#' check_data_sanity(tetra.solcap)
#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu}
#'
#' @references
#'     Mollinari, M., and Garcia, A.  A. F. (2019) Linkage
#'     analysis and haplotype phasing in experimental autopolyploid
#'     populations with high ploidy level using hidden Markov
#'     models, _G3: Genes, Genomes, Genetics_. 
#'     \doi{10.1534/g3.119.400378}
#'     
#' @export check_data_sanity
check_data_sanity <- function(x){
  if(exists('geno', where = x)){
    check_data_dist_sanity(x)
  } else if (exists('geno.dose', where = x)){
    check_data_dose_sanity(x)
  } else
    stop("Inconsistent genotypic information.")
}

#' Checks the consistency of dataset (dosage)
#' @keywords internal
check_data_dose_sanity <- function(x){
  test <- logical(24L)
  names(test) <- 1:24
  
  # ploidy
  test[1] <- x$ploidy%%2 != 0 #is ploidy even?
  test[2] <- any(sapply(x$dosage.p1, function(y) max(y) > x$ploidy | min(y) < 0)) #are dosages in P higher than ploidy?
  test[3] <- any(sapply(x$dosage.p2, function(y) max(y) > x$ploidy | min(y) < 0)) #are dosages in Q higher than ploidy?
  test[4] <- max(x$geno.dose) > x$ploidy + 1 #is there any dose in offspring higher than ploidy?
  test[5] <- min(x$geno.dose) < 0 #is there any negative dose in offspring?
  
  # number of individuals
  test[6] <- x$n.ind < 0 #is the number of individuals greater than zero?
  test[7] <- length(x$ind.names) != x$n.ind #is the number of individual names equal to the number of individuals?
  test[8] <- ncol(x$geno.dose) != x$n.ind #is the number of columns in the dosage matrix equal to the number of individuals?
  
  # number of markers
  test[9] <- x$n.mrk < 0 #is the number of markers greater than zero?
  test[10] <- length(x$mrk.names) != x$n.mrk #is the number of marker names equal to the number of markers?
  test[11] <- length(x$dosage.p1) != x$n.mrk #is the number of marker dosages in P equal to the number of markers?
  test[12] <- length(x$dosage.p2) != x$n.mrk #is the number of marker dosages in Q equal to the number of markers?
  if(length(x$chrom) > 0)
    test[13] <- length(x$chrom) != x$n.mrk #is the number of sequences equal to the number of markers?
  if(length(x$genome.pos) > 0)
    test[14] <- length(x$genome.pos) != x$n.mrk #is the number of sequence positions equal to the number of markers?
  test[15] <- nrow(x$geno.dose) != x$n.mrk #is the number of rows in the dosage matrix equal to the number of markers?
  if(length(x$chisq.pval) > 0)
    test[16] <- length(x$chisq.pval) != x$n.mrk #is the number of chi-square tests equal to the number of markers?
  
  # individual names in the dosage dataset
  test[17] <- !is.character(x$ind.names) # are individual's names characters
  test[18] <- !identical(colnames(x$geno.dose), x$ind.names) # are column names in dosage matrix identical to individual names?
  
  # markers names in the dosage dataset
  test[19] <- !is.character(x$mrk.names)# are marker's names characters
  test[20] <- !identical(rownames(x$geno.dose), x$mrk.names)# are row names in dosage matrix identical to marker names?
  
  # dosage in both parents
  test[21] <- !is.integer(x$dosage.p1) # are dosages in P numeric
  test[22] <- !is.integer(x$dosage.p2) # are dosages in Q numeric 
  test[23] <- !identical(names(x$dosage.p1), x$mrk.names) # are names in P's dosage vector identical to marker names?
  test[24] <- !identical(names(x$dosage.p2), x$mrk.names) # are names in Q's dosage vector identical to marker names?
  
  if(any(test))
    return(test)
  else
    return(0)
}

#' Checks the consistency of dataset (probability distribution)
#' @keywords internal
check_data_dist_sanity <- function(x){
  test <- logical(29L)
  names(test) <- 1:29
  
  # ploidy
  test[1] <- x$ploidy%%2 != 0 #is ploidy even?
  test[2] <- any(sapply(x$dosage.p1, function(y) max(y) > x$ploidy | min(y) < 0)) #are dosages in P higher than ploidy?
  test[3] <- any(sapply(x$dosage.p2, function(y) max(y) > x$ploidy | min(y) < 0)) #are dosages in Q higher than ploidy?
  test[4] <- ncol(x$geno) > x$ploidy + 3 #is the number of columns in the probability data frame correct? (ploidy + 3)
  test[5] <- max(x$geno.dose) > x$ploidy + 1 #is there any dose in offspring higher than ploidy?
  test[6] <- min(x$geno.dose) < 0 #is there any negative dose in offspring?
  
  # number of individuals
  test[7] <- x$n.ind < 0 #is the number of individuals greater than zero?
  test[8] <- length(x$ind.names) != x$n.ind #is the number of individual names equal to the number of individuals?
  test[9] <- length(unique(x$geno$ind)) != x$n.ind #is the number of individuals in the probability data frame equal to the number of individuals?
  test[10] <- ncol(x$geno.dose) != x$n.ind #is the number of columns in the dosage matrix equal to the number of individuals?
  
  # number of markers
  test[11] <- x$n.mrk < 0 #is the number of markers greater than zero?
  test[12] <- length(x$mrk.names) != x$n.mrk #is the number of marker names equal to the number of markers?
  test[13] <- length(x$dosage.p1) != x$n.mrk #is the number of marker dosages in P equal to the number of markers?
  test[14] <- length(x$dosage.p2) != x$n.mrk #is the number of marker dosages in Q equal to the number of markers?
  if(length(x$chrom) > 0)
    test[15] <- length(x$chrom) != x$n.mrk #is the number of sequences equal to the number of markers?
  if(length(x$genome.pos) > 0)
    test[16] <- length(x$genome.pos) != x$n.mrk #is the number of sequence positions equal to the number of markers?
  test[17] <- nrow(x$geno)/x$n.ind != x$n.mrk#is the number of rows in the probability data frame divided by n.ind equal to the number of markers?
  test[18] <- nrow(x$geno.dose) != x$n.mrk#is the number of rows in the dosage matrix equal to the number of markers?
  if(length(x$chisq.pval) > 0)
    test[19] <- length(x$chisq.pval) != x$n.mrk#is the number of chi-square tests equal to the number of markers?
  
  # individual names in the dosage and probability dataset
  test[20] <- !is.character(x$ind.names) # are individual's names characters
  test[21] <- !identical(x$geno$ind, rep(x$ind.names, each = x$n.mrk)) #are individual's names in the probability data frame properly 
  #arranged and consistent with the informed individual's names 
  test[22] <- !identical(colnames(x$geno.dose), x$ind.names)# are column names in dosage matrix identical to individual names?
  
  # marker names in the dosage and probability dataset
  test[23] <- !is.character(x$mrk.names)# are marker's names characters
  test[24] <- !identical(x$geno$mrk, rep(x$mrk.names, x$n.ind))#are marker names in the probability data frame properly 
  #arranged and consistent with the informed marker names 
  test[25] <- !identical(rownames(x$geno.dose), x$mrk.names)# are row names in dosage matrix identical to marker names?
  
  # dosage in both parents
  test[26] <- !is.integer(x$dosage.p1)# are dosages in P numeric
  test[27] <- !is.integer(x$dosage.p2)# are dosages in Q numeric 
  test[28] <- !identical(names(x$dosage.p1), x$mrk.names)# are names in P's dosage vector identical to marker names?
  test[29] <- !identical(names(x$dosage.p2), x$mrk.names)# are names in Q's dosage vector identical to marker names?
  
  if(any(test))
    return(test)
  else
    return(0)
}

#' Merge datasets
#'
#' This function merges two datasets of class \code{mappoly.data}. This can be useful
#' when individuals of a population were genotyped using two or more techniques
#' and have datasets in different files or formats. Please notice that the datasets 
#' should contain the same number of individuals and they must be represented identically 
#' in both datasets  (e.g. \code{Ind_1} in both datasets, not \code{Ind_1}
#' in one dataset and \code{ind_1} or \code{Ind.1} in the other).
#'
#' @param dat.1 the first dataset of class \code{mappoly.data} to be merged
#'
#' @param dat.2 the second dataset of class \code{mappoly.data} to be merged (default = NULL);
#' if \code{dat.2 = NULL}, the function returns \code{dat.1} only
#'
#' @return An object of class \code{mappoly.data} which contains all markers
#' from both datasets. It will be a list with the following components:
#'     \item{ploidy}{ploidy level}
#'     \item{n.ind}{number individuals}
#'     \item{n.mrk}{total number of markers}
#'     \item{ind.names}{the names of the individuals}
#'     \item{mrk.names}{the names of the markers}
#'     \item{dosage.p1}{a vector containing the dosage in
#'       parent P for all \code{n.mrk} markers}
#'     \item{dosage.p2}{a vector containing the dosage in
#'       parent Q for all \code{n.mrk} markers}
#'     \item{chrom}{a vector indicating which sequence each marker
#'       belongs. Zero indicates that the marker was not assigned to any
#'       sequence}
#'     \item{genome.pos}{Physical position of the markers into the
#'       sequence}
#'     \item{seq.ref}{if one or both datasets originated from read_vcf, it keeps
#' reference alleles from sequencing platform, otherwise is NULL}
#'     \item{seq.alt}{if one or both datasets originated from read_vcf, it keeps
#' alternative alleles from sequencing platform, otherwise is NULL}
#'     \item{all.mrk.depth}{if one or both datasets originated from read_vcf, it keeps
#' marker read depths from sequencing, otherwise is NULL}
#'     \item{prob.thres}{(unused field)}
#'     \item{geno.dose}{a matrix containing the dosage for each markers (rows) 
#'       for each individual (columns). Missing data are represented by 
#'       \code{ploidy_level + 1}}
#'     \item{geno}{if both datasets contain genotype distribution information,
#' the final object will contain 'geno'. This is set to NULL otherwise}
#'     \item{nphen}{(0)}
#'     \item{phen}{(NULL)}
#'     \item{chisq.pval}{a vector containing p-values related to the chi-squared 
#'     test of Mendelian segregation performed for all markers in both datasets}
#'     \item{kept}{if elim.redundant = TRUE when reading any dataset, holds all non-redundant markers}
#'     \item{elim.correspondence}{if elim.redundant = TRUE when reading any dataset,
#' holds all non-redundant markers and its equivalence to the redundant ones}
#' 
#' @author Gabriel Gesteira, \email{gdesiqu@ncsu.edu}
#' @examples
#' \donttest{
#' ## Loading a subset of SNPs from chromosomes 3 and 12 of sweetpotato dataset 
#' ## (SNPs anchored to Ipomoea trifida genome)
#' dat <- NULL
#' for(i in c(3, 12)){
#'   cat("Loading chromosome", i, "...\n")
#'     tempfl <- tempfile(pattern = paste0("ch", i), fileext = ".vcf.gz")
#'     x <- "https://github.com/mmollina/MAPpoly_vignettes/raw/master/data/sweet_sample_ch"
#'     address <- paste0(x, i, ".vcf.gz")
#'     download.file(url = address, destfile = tempfl)
#'     dattemp <- read_vcf(file = tempfl, parent.1 = "PARENT1", parent.2 = "PARENT2",
#'                         ploidy = 6, verbose = FALSE)
#'     dat <- merge_datasets(dat, dattemp)
#'   cat("\n")
#' }
#' dat
#' plot(dat)
#'}
#'
#' @references
#'     Mollinari, M., and Garcia, A.  A. F. (2019) Linkage
#'     analysis and haplotype phasing in experimental autopolyploid
#'     populations with high ploidy level using hidden Markov
#'     models, _G3: Genes, Genomes, Genetics_. 
#'     \doi{10.1534/g3.119.400378} 
#'
#' @export merge_datasets
#' @importFrom dplyr bind_rows arrange
merge_datasets = function(dat.1 = NULL, dat.2 = NULL){
  ## Check objects class
  if (is.null(dat.1)){
    if (is.null(dat.2)) return(dat.1)
    if (!inherits(dat.2, "mappoly.data")) {
      stop(deparse(substitute(dat.2)), " is not an object of class 'mappoly.data'")
    } else {
      return(dat.2)
    }
  } 
  if (!inherits(dat.1, "mappoly.data")) {
    stop(deparse(substitute(dat.1)), " is not an object of class 'mappoly.data'")
  }
  if (is.null(dat.2)) return(dat.1)
  if (!inherits(dat.2, "mappoly.data")) {
    stop(deparse(substitute(dat.2)), " is not an object of class 'mappoly.data'")
  }
  ## Check ploidy
  if (dat.1$ploidy != dat.2$ploidy){
    stop("The ploidy levels in the datasets do not match. Please check your files and try again.")
  }
  ## Check individuals
  if (dat.1$n.ind != dat.2$n.ind){
    stop("The datasets have different number of individuals. Please check your files and try again.")
  }
  ## Check individual names
  if (!all(dat.1$ind.names %in% dat.2$ind.names)){
    nmi.1 = dat.1$ind.names[!(dat.1$ind.names %in% dat.2$ind.names)]
    nmi.2 = dat.2$ind.names[!(dat.2$ind.names %in% dat.1$ind.names)]
    cat("Some individuals' names don't match:\n")
    cat("Dataset 1\tDataset 2\n")
    for (i in 1:length(nmi.1)) cat(nmi.1[i], "\t\t", nmi.2[i], "\n")
    stop("Your datasets have the same number of individuals, but some of them are not the same. Please check the list above, fix your files and try again.")
  }
  ## Checking (and fixing) individuals order in geno.dose
  if (!all(colnames(dat.1$geno.dose)  ==  colnames(dat.2$geno.dose))){
    dat.2$geno.dose = dat.2$geno.dose[,colnames(dat.1$geno.dose)]
  }
  ## Merging all items
  dat.1$geno.dose = rbind(dat.1$geno.dose, dat.2$geno.dose)
  dat.1$dosage.p1 = c(dat.1$dosage.p1, dat.2$dosage.p1)
  dat.1$dosage.p2 = c(dat.1$dosage.p2, dat.2$dosage.p2)
  ##dat.1$mrk.names = c(dat.1$mrk.names, dat.2$mrk.names) ## Fixing possible name incompatibilities  
  dat.1$mrk.names = rownames(dat.1$geno.dose) ## Fixing possible name incompatibilities  
  dat.1$n.mrk = dat.1$n.mrk + dat.2$n.mrk
  dat.1$chrom = c(dat.1$chrom, dat.2$chrom)
  dat.1$genome.pos = c(dat.1$genome.pos, dat.2$genome.pos)
  # VCF info
  ## seq.ref and seq.alt
  if (!is.null(dat.1$seq.ref) && !is.null(dat.2$seq.ref)){
    dat.1$seq.ref = c(dat.1$seq.ref,dat.2$seq.ref)
    dat.1$seq.alt = c(dat.1$seq.alt,dat.2$seq.alt)  
  }  else if (is.null(dat.1$seq.ref) && is.null(dat.2$seq.ref)){
    dat.1$seq.ref = dat.1$seq.alt = NULL
  }  else if (!is.null(dat.1$seq.ref) && is.null(dat.2$seq.ref)){
    dat.1$seq.ref = c(dat.1$seq.ref,rep(NA,length(dat.2$n.mrk)))
    dat.1$seq.alt = c(dat.1$seq.alt,rep(NA,length(dat.2$n.mrk)))
  } else if (is.null(dat.1$seq.ref) && !is.null(dat.2$seq.ref)){
    dat.1$seq.ref = c(rep(NA,length(dat.1$n.mrk)),dat.2$seq.ref)
    dat.1$seq.alt = c(rep(NA,length(dat.1$n.mrk)),dat.2$seq.alt)
  }
  ## all.mrk.depth
  if (!is.null(dat.1$all.mrk.depth) && !is.null(dat.2$all.mrk.depth)){
    dat.1$all.mrk.depth = c(dat.1$all.mrk.depth,dat.2$all.mrk.depth)
  }  else if (is.null(dat.1$all.mrk.depth) && is.null(dat.2$all.mrk.depth)){
    dat.1$all.mrk.depth = NULL
  }  else if (!is.null(dat.1$all.mrk.depth) && is.null(dat.2$all.mrk.depth)){
    dat.1$all.mrk.depth = c(dat.1$all.mrk.depth,rep(NA,length(dat.2$n.mrk)))
  } else if (is.null(dat.1$all.mrk.depth) && !is.null(dat.2$all.mrk.depth)){
    dat.1$all.mrk.depth = c(rep(NA,length(dat.1$n.mrk)),dat.2$all.mrk.depth)
  }
  # Chisq info
  if (!is.null(dat.1$chisq.pval) && !is.null(dat.2$chisq.pval)){
    dat.1$chisq.pval = c(dat.1$chisq.pval,dat.2$chisq.pval)
  }  else if (is.null(dat.1$chisq.pval) && is.null(dat.2$chisq.pval)){
    dat.1$chisq.pval = NULL
  }  else if (!is.null(dat.1$chisq.pval) && is.null(dat.2$chisq.pval)){
    dat.1$chisq.pval = c(dat.1$chisq.pval,rep(NA,length(dat.2$n.mrk)))
  } else if (is.null(dat.1$chisq.pval) && !is.null(dat.2$chisq.pval)){
    dat.1$chisq.pval = c(rep(NA,length(dat.1$n.mrk)),dat.2$chisq.pval)
  }
  # Redundant info
  ## Kept info
  if (!is.null(dat.1$kept) && !is.null(dat.2$kept)){
    dat.1$kept = c(dat.1$kept,dat.2$kept)
  }  else if (is.null(dat.1$kept) && is.null(dat.2$kept)){
    dat.1$kept = NULL
  }  else if (!is.null(dat.1$kept) && is.null(dat.2$kept)){
    dat.1$kept = c(dat.1$kept, dat.2$mrk.names)
  } else if (is.null(dat.1$kept) && !is.null(dat.2$kept)){
    dat.1$kept = c(dat.1$mrk.names,dat.2$kept)
  }  
  ## elim.correspondence info
  if (!is.null(dat.1$elim.correspondence) && !is.null(dat.2$elim.correspondence)){
    dat.1$elim.correspondence = rbind(dat.1$elim.correspondence,dat.2$elim.correspondence)
  }  else if (is.null(dat.1$elim.correspondence) && is.null(dat.2$elim.correspondence)){
    dat.1$elim.correspondence = NULL
  }  else if (!is.null(dat.1$elim.correspondence) && is.null(dat.2$elim.correspondence)){
    dat.1$elim.correspondence = dat.1$elim.correspondence
  } else if (is.null(dat.1$elim.correspondence) && !is.null(dat.2$elim.correspondence)){
    dat.1$elim.correspondence = dat.2$elim.correspondence
  }  
  ## geno dist info (just keep if both datasets contain this information)
  if (exists('geno', where = dat.1) && exists('geno', where = dat.2)){
    #dat.1$geno = rbind(dat.1$geno,dat.2$geno)
    ind <- NULL
    dat.1$geno <- dplyr::bind_rows(dat.1$geno, dat.2$geno) %>% arrange(ind)
  } else dat.1$geno = NULL
  if(!is.null(dat.1$chisq.pval) | !any(is.na(dat.1$chisq.pval)))
    dat.1$chisq.pval <- dat.1$chisq.pval[dat.1$mrk.names]
  ## Fixing possible issues with marker names
  names(dat.1$dosage.p1) = names(dat.1$dosage.p2) = names(dat.1$genome.pos) = names(dat.1$chrom) = names(dat.1$chisq.pval) = dat.1$mrk.names
  return(dat.1)
}

#' Summary maps
#'
#' This function generates a brief summary table of a list of \code{mappoly.map} objects
#' @param map.list a list of objects of class \code{mappoly.map}
#' @param verbose if \code{TRUE} (default), the current progress is shown; if
#'     \code{FALSE}, no output is produced
#' @return a data frame containing a brief summary of all maps contained in \code{map.list}
#' @examples
#' tetra.sum <- summary_maps(solcap.err.map)
#' tetra.sum
#' @author Gabriel Gesteira, \email{gdesiqu@ncsu.edu}
#' @export summary_maps
summary_maps = function(map.list, verbose = TRUE){
  ## Check data
  if (any(!sapply(map.list, inherits, "mappoly.map"))) 
    stop(deparse(substitute(map.list)), 
         " is not a list containing 'mappoly.map' objects.")
  md <- unlist(lapply(map.list, function(x) sum(get_tab_mrks(x)) - sum(get_tab_mrks(x)[rbind(c(2,2),c(x$info$ploidy,x$info$ploidy))]) - sum(get_tab_mrks(x)[rbind(c(1,2),c(2,1),c(x$info$ploidy,(x$info$ploidy+1)),c((x$info$ploidy+1),x$info$ploidy))])))
  if(map.list[[1]]$info$ploidy == 2)
    md[] <- 0
  results = data.frame("LG" = as.character(seq(1,length(map.list),1)),
                       "Genomic sequence" = as.character(unlist(lapply(map.list, function(x) paste(unique(x$info$chrom), collapse = "-")))),
                       "Map length (cM)" = unlist(lapply(map.list, function(x) round(sum(c(0, imf_h(x$maps[[1]]$seq.rf))), 2))),
                       "Markers/cM" = round(unlist(lapply(map.list, function(x) x$info$n.mrk/(round(sum(c(0, imf_h(x$maps[[1]]$seq.rf))), 2)))),2),
                       "Simplex" = unlist(lapply(map.list, function(x) sum(get_tab_mrks(x)[rbind(c(1,2),c(2,1),c(x$info$ploidy,(x$info$ploidy+1)),c((x$info$ploidy+1),x$info$ploidy))]))),
                       "Double-simplex" = unlist(lapply(map.list, function(x) sum(get_tab_mrks(x)[rbind(c(2,2),c(x$info$ploidy,x$info$ploidy))]))),
                       "Multiplex" = md,
                        "Total" = unlist(lapply(map.list, function(x) x$info$n.mrk)),
                       "Max gap" = unlist(lapply(map.list, function(x) round(imf_h(max(x$maps[[1]]$seq.rf)),2))),
                       check.names = FALSE, stringsAsFactors = F)
  results = rbind(results, c('Total', NA, sum(as.numeric(results$`Map length (cM)`)), round(mean(as.numeric(results$`Markers/cM`)),2), sum(as.numeric(results$Simplex)), sum(as.numeric(results$`Double-simplex`)), sum(as.numeric(results$Multiplex)), sum(as.numeric(results$Total)), round(mean(as.numeric(results$`Max gap`)),2)))
  if (verbose){
    all.mrks = unlist(lapply(map.list, function(x) return(x$info$mrk.names)))
    if (!any(get(map.list[[1]]$info$data.name, pos = 1)$elim.correspondence$elim %in% all.mrks))
      message("\nYour dataset contains removed (redundant) markers. Once finished the maps, remember to add them back with the function 'update_map'.\n")
  }
  return(results)
}

#' Get table of dosage combinations
#' 
#' Internal function
#' @param x an object of class \code{mappoly.map}
#' @author Gabriel Gesteira, \email{gdesiqu@ncsu.edu}
get_tab_mrks = function(x){
  tab = table(get(x$info$data.name, pos = 1)$dosage.p1[which(get(x$info$data.name, pos = 1)$mrk.names %in% x$info$mrk.names)], get(x$info$data.name, pos = 1)$dosage.p2[which(get(x$info$data.name, pos = 1)$mrk.names %in% x$info$mrk.names)])
  doses = as.character(seq(0,x$info$ploidy,1))
  # checking dimensions
  if (!all(doses %in% colnames(tab))){
    newmat = matrix(NA, nrow(tab), length(doses))
    newmat[,which(doses %in% colnames(tab))] = tab[,which(colnames(tab) %in% doses)]
    newmat[,which(!doses %in% colnames(tab))] = 0
    rownames(newmat) = rownames(tab)
    colnames(newmat) = doses
    tab = newmat
  }
  if (!all(doses %in% rownames(tab))){
    newmat = matrix(NA, length(doses), ncol(tab))
    newmat[which(doses %in% rownames(tab)),] = tab[which(rownames(tab) %in% doses),]
    newmat[which(!doses %in% rownames(tab)),] = 0
    colnames(newmat) = colnames(tab)
    rownames(newmat) = doses
    tab = newmat
  }
  return(tab)
}

#' Update map
#' 
#' This function takes an object of class \code{mappoly.map} and checks for
#' removed redundant markers in the original dataset. Once redundant markers
#' are found, they are re-added to the map in their respective equivalent positions
#' and another HMM round is performed.
#' 
#' @param input.maps a single map or a list of maps of class \code{mappoly.map}
#' @param verbose if TRUE (default), shows information about each update process
#' @return an updated map (or list of maps) of class \code{mappoly.map}, containing the original map(s) plus redundant markers
#' @author Gabriel Gesteira, \email{gdesiqu@ncsu.edu}
#' @examples
#' orig.map <- solcap.err.map
#' up.map <- lapply(solcap.err.map, update_map)
#' summary_maps(orig.map)
#' summary_maps(up.map)
#' @export update_map
#' 
update_map = function(input.maps, verbose = TRUE){
  ## Checking object
  if (inherits(input.maps, "mappoly.map"))
    input.maps = list(input.maps)
  if(!inherits(input.maps, "list"))
    stop(deparse(substitute(input.maps)), 
         " is not an object of class 'mappoly.map' neither a list containing 'mappoly.map' objects.")
  if (any(!sapply(input.maps, inherits, "mappoly.map"))) 
    stop(deparse(substitute(input.maps)), 
         " is not an object of class 'mappoly.map' neither a list containing 'mappoly.map' objects.")
  ## Checking the existence of redundant markers
  if (is.null(get(input.maps[[1]]$info$data.name, pos = 1)$elim.correspondence))
    stop('Your dataset does not contain redundant markers. Please check it and try again.')
  ## Creating list to handle results
  results = list()
  for (i in 1:length(input.maps)){
    if (verbose) cat("Updating map", i , "\n")    
    input.map = input.maps[[i]]
    ## Checking if redundant markers belong to the informed map
    map.kept.mrks = which(as.character(get(input.map$info$data.name, pos = 1)$elim.correspondence$kept) %in% input.map$info$mrk.names)
    if (is.null(map.kept.mrks)){
      if (verbose) cat("There is no redundant marker on map ",i,". Skipping it.\n")
      next
    }
    corresp = get(input.map$info$data.name, pos = 1)$elim.correspondence[which(as.character(get(input.map$info$data.name, pos = 1)$elim.correspondence$kept) %in% input.map$info$mrk.names),]
    ## Check if redundant markers were not already added to the map
    if (any(corresp$elim %in% input.map$info$mrk.names)) {
      if (verbose) cat("Some redundant markers were already added to the map ", i,". These markers will be skipped.\n")
      corresp = corresp[!(corresp$elim %in% input.map$info$mrk.names),]
    }
    ## Updating number of markers
    input.map$info$n.mrk = input.map$info$n.mrk + nrow(corresp)
    ## Adding markers to the sequence
    while (nrow(corresp) > 0){
      pos.kep = match(as.character(corresp$kept), get(input.map$info$data.name, pos = 1)$mrk.names)
      input.map$info$seq.num = append(input.map$info$seq.num, NA, after = which(input.map$info$seq.num  ==  pos.kep[1]))
      input.map$info$chisq.pval = append(input.map$info$chisq.pval, input.map$info$chisq.pval[which(input.map$info$seq.num  ==  pos.kep[1])], after = which(input.map$info$seq.num  ==  pos.kep[1]))
      input.map$info$seq.dose.p1 = append(input.map$info$seq.dose.p1, input.map$info$seq.dose.p1[which(input.map$info$seq.num  ==  pos.kep[1])], after = which(input.map$info$seq.num  ==  pos.kep[1]))
      input.map$info$seq.dose.p2 = append(input.map$info$seq.dose.p2, input.map$info$seq.dose.p2[which(input.map$info$seq.num  ==  pos.kep[1])], after = which(input.map$info$seq.num  ==  pos.kep[1]))
      input.map$info$chrom = as.numeric(append(input.map$info$chrom, as.character(corresp$chrom[1]), after = which(input.map$info$seq.num  ==  pos.kep[1])))
      input.map$info$genome.pos = as.numeric(append(input.map$info$genome.pos, as.character(corresp$genome.pos[1]), after = which(input.map$info$seq.num  ==  pos.kep[1])))
      input.map$info$mrk.names = append(input.map$info$mrk.names, as.character(corresp$elim[1]), after = which(input.map$info$mrk.names  ==  as.character(corresp$kept[1])))
      if (!is.null(input.map$info$seq.ref) && !is.null(input.map$info$seq.alt)){
        input.map$info$seq.ref = append(input.map$info$seq.ref, as.character(corresp$seq.ref[1]), after = which(input.map$info$seq.num  ==  pos.kep[1]))
        input.map$info$seq.alt = append(input.map$info$seq.alt, as.character(corresp$seq.alt[1]), after = which(input.map$info$seq.num  ==  pos.kep[1]))
      }
      for (j in 1:length(input.map$maps)){
        input.map$maps[[j]]$seq.rf = append(input.map$maps[[j]]$seq.rf, 0.000001, after = which(input.map$info$seq.num  ==  pos.kep[1]))
        input.map$maps[[j]]$seq.ph$P = append(input.map$maps[[j]]$seq.ph$P, input.map$maps[[j]]$seq.ph$P[paste0(pos.kep[1])], after = which(input.map$info$seq.num  ==  pos.kep[1]))
        input.map$maps[[j]]$seq.ph$Q = append(input.map$maps[[j]]$seq.ph$Q, input.map$maps[[j]]$seq.ph$Q[paste0(pos.kep[1])], after = which(input.map$info$seq.num  ==  pos.kep[1]))
      }
      corresp = corresp[-1,]
    }
    ## Renaming objects and updating map information
    names(input.map$info$chrom) = names(input.map$info$genome.pos) = names(input.map$info$chisq.pval) = names(input.map$info$seq.dose.p1) = names(input.map$info$seq.dose.p2) = names(input.map$info$seq.num) = input.map$info$mrk.names
    for (j in 1:length(input.map$maps)){
      names(input.map$maps[[j]]$seq.ph$P) = names(input.map$maps[[j]]$seq.ph$Q) = as.character(input.map$info$seq.num)
      input.map$maps[[j]]$seq.num = input.map$info$seq.num
    }
    if (!is.null(input.map$info$seq.ref) && !is.null(input.map$info$seq.alt))
      names(input.map$info$seq.ref) = names(input.map$info$seq.alt) = input.map$info$mrk.names
    results[[i]] = input.map
  }
  if (length(results)  ==  1) return(results[[1]])
  else return(results)
}

#' Random sampling of dataset
#' @param input.data an object  of class \code{mappoly.data}
#' @param n number of individuals or markers to be sampled
#' @param percentage if \code{n == NULL}, the percentage of individuals or markers to be sampled
#' @param type should sample individuals or markers?
#' @param selected.ind a vector containing the name of the individuals to select. Only has effect 
#' if \code{type = "individual"}, \code{n = NULL} and \code{percentage = NULL}
#' @param selected.mrk a vector containing the name of the markers to select. Only has effect 
#' if \code{type = "marker"}, \code{n = NULL} and \code{percentage = NULL}
#' @return an object  of class \code{mappoly.data}
#' @keywords internal
#' @export
#' @importFrom magrittr "%>%"
#' @importFrom dplyr filter
sample_data <- function(input.data, n = NULL, 
                        percentage = NULL, 
                        type = c("individual", "marker"),
                        selected.ind = NULL,
                        selected.mrk = NULL){
  if(!is.null(selected.mrk))
    type <- "marker"
  else type <- match.arg(type)
  if(type  ==  "individual"){
    if(!is.null(n)){
      selected.ind.id <- sort(sample(input.data$n.ind, n))
    } else if(!is.null(percentage)){
      selected.ind.id <- sample(input.data$n.ind, ceiling(input.data$n.ind * percentage/100))
    } else if(!is.null(selected.ind)){
      selected.ind.id <- match(selected.ind, input.data$ind.names)
    } else {
      stop("Inform 'n', 'percentage' or selected.ind.")
    }
    ind <- mrk <- NULL
    selected.ind <- input.data$ind.names[selected.ind.id]
    if(length(selected.ind.id) >= input.data$n.ind) return(input.data)
    if(is.prob.data(input.data))
      input.data$geno <-  input.data$geno %>%
      dplyr::filter(ind%in%selected.ind)
    input.data$geno.dose <- input.data$geno.dose[,selected.ind.id]
    input.data$ind.names <- input.data$ind.names[selected.ind.id]
    input.data$n.ind <- ncol(input.data$geno.dose)
    ##Computing chi-square p.values
    if(!is.null(input.data$chisq.pval)){
      ploidy <- input.data$ploidy
      Ds <- array(NA, dim = c(ploidy+1, ploidy+1, ploidy+1))
      for(i in 0:ploidy)
        for(j in 0:ploidy)
          Ds[i+1,j+1,] <- segreg_poly(ploidy = ploidy, dP = i, dQ = j)
      Dpop <- cbind(input.data$dosage.p1, input.data$dosage.p2)
      M <- t(apply(Dpop, 1, function(x) Ds[x[1]+1, x[2]+1,]))
      dimnames(M) <- list(input.data$mrk.names, c(0:ploidy))
      M <- cbind(M, input.data$geno.dose)
      input.data$chisq.pval <- apply(M, 1, mrk_chisq_test, ploidy = ploidy)
    }
    return(input.data)
  } 
  else if(type  ==  "marker"){
    if(!is.null(n)){
      selected.mrk.id <- sort(sample(input.data$n.mrk, n))
    } else if(!is.null(percentage)){
      selected.mrk.id <- sort(sample(input.data$n.mrk, ceiling(input.data$n.mrk * percentage/100)))
    } else if(!is.null(selected.mrk)){
      selected.mrk.id <- match(selected.mrk, input.data$mrk.names)
    } else{
      stop("Inform 'n', 'percentage' or selected.mrk.")
    }
    selected.mrk <- input.data$mrk.names[selected.mrk.id]
    if(length(selected.mrk.id) >= input.data$n.mrk) return(input.data)
    if(is.prob.data(input.data))
      input.data$geno <-  input.data$geno %>%
      dplyr::filter(mrk%in%selected.mrk)
    input.data$geno.dose <- input.data$geno.dose[selected.mrk.id, , drop = FALSE]
    input.data$n.mrk <- nrow(input.data$geno.dose)
    input.data$mrk.names <- input.data$mrk.names[selected.mrk.id]
    input.data$dosage.p1 <- input.data$dosage.p1[selected.mrk.id]
    input.data$dosage.p2 <- input.data$dosage.p2[selected.mrk.id]
    input.data$chrom <- input.data$chrom[selected.mrk.id]
    input.data$genome.pos <- input.data$genome.pos[selected.mrk.id]
    input.data$seq.ref <- input.data$seq.ref[selected.mrk.id]
    input.data$seq.alt <- input.data$seq.alt[selected.mrk.id]
    input.data$all.mrk.depth <- input.data$all.mrk.depth[selected.mrk.id]
    input.data$kept <- intersect(input.data$mrk.names, input.data$kept)
    input.data$elim.correspondence <- input.data$elim.correspondence[input.data$elim.correspondence$kept%in%input.data$mrk.names, , drop = FALSE]
    input.data$chisq.pval <- input.data$chisq.pval[names(input.data$chisq.pval)%in%input.data$mrk.names]
    if(!is.null(input.data$chisq.pval)) 
      input.data$chisq.pval <- input.data$chisq.pval[names(input.data$chisq.pval)%in%input.data$mrk.names]
    return(input.data)  
  }
  else stop("Inform type")
}


#' Get weighted ordinary least squared map give a sequence and rf matrix
#' @keywords internal
#' @importFrom zoo na.approx
get_ols_map <- function(input.seq, input.mat, weight = TRUE){
  id <- input.seq$seq.mrk.names
  rf <- imf_h(get_rf_from_mat(input.mat$rec.mat[id,id]))
  pos <- c(0, cumsum(ifelse(is.na(rf), 0, rf)) + rf*0)
  names(pos) <- input.seq$seq.mrk.names
  pos <- rev(rev(pos)[!cumprod(is.na(rev(pos)))])
  id <- names(pos)
  z <- zoo::na.approx(pos)  
  names(z) <- id
  y <- as.numeric((imf_h(as.dist(input.mat$rec.mat[id,id]))))
  w <- as.numeric((imf_h(as.dist(input.mat$lod.mat[id,id]))))
  v <- t(combn(id,2))
  x <- numeric(nrow(v))
  names(x) <- names(y) <- apply(v, 1, paste0, collapse = "-")
  for(i in 1:nrow(v))
    x[i] <- z[v[i,2]]-z[v[i,1]]
  if(weight)
    model <- lm(y ~ x-1, weights = w)
  else
    model <- lm(y ~ x-1)
  new <- data.frame(x = z)
  u <- predict(model, new)
  d <- cumsum(imf_h(c(0, mf_h(diff(u)))))
  names(d) <- id
  d
}

#' Get Dosage Type in a Sequence
#'
#' Analyzes a genomic sequence object to categorize markers based on their dosage type.
#' The function calculates the dosage type by comparing the dosage of two parental sequences
#' (p1 and p2) against the ploidy level. It categorizes markers into simplex for parent 1 (simplex.p),
#' simplex for parent 2 (simplex.q), double simplex (ds), and multiplex based on the calculated dosages.
#'
#' @param input.seq An object of class \code{"mappoly.sequence"}:
#' 
#' @return A list with four components categorizing marker names into:
#' \describe{
#'   \item{simplex.p}{Markers with a simplex dosage from parent 1.}
#'   \item{simplex.q}{Markers with a simplex dosage from parent 2.}
#'   \item{double.simplex}{Markers with a double simplex dosage.}
#'   \item{multiplex}{Markers not fitting into the above categories, indicating a multiplex dosage.}
#' }
#' @keywords internal
#' @export
get_dosage_type <- function(input.seq){
  p <- abs(abs(input.seq$seq.dose.p1 - input.seq$ploidy/2) - input.seq$ploidy/2)
  q <- abs(abs(input.seq$seq.dose.p2 - input.seq$ploidy/2) - input.seq$ploidy/2)
  s.p <- p  ==  1 & q  ==  0
  s.q <- p  ==  0 & q  ==  1
  ds <- p  ==  1 & q  ==  1
  list(simplex.p = input.seq$seq.mrk.names[s.p],
       simplex.q = input.seq$seq.mrk.names[s.q], 
       double.simplex = input.seq$seq.mrk.names[ds],
       multiplex = input.seq$seq.mrk.names[!(s.p | s.q | ds)])
}

#' Aggregate matrix cells (lower the resolution by a factor)
#' @keywords internal
aggregate_matrix <- function(M, fact){
  id <- seq(1,ncol(M), by = fact)
  id <- cbind(id, c(id[-1]-1, ncol(M)))
  R <- matrix(NA, nrow(id), nrow(id))
  for(i in 1:(nrow(id)-1)){
    for(j in (i+1):nrow(id)){
      R[j,i] <-  R[i,j] <- mean(M[id[i,1]:id[i,2], id[j,1]:id[j,2]], na.rm = TRUE)
    }
  }
  R
}

#' Get states and emission in one informative parent
#' @keywords internal
get_states_and_emission_single_parent <- function(ploidy, ph, global.err, D, dose.notinf.P){
  n.mrk <- nrow(D)
  n.ind <- ncol(D)
  A <- matrix(0, nrow = choose(ploidy, ploidy/2), ncol = length(ph))
  for(i in 1:length(ph)){
    id1 <- numeric(ploidy)
    id1[ph[[i]]] <- 1
    A[,i] <- apply(combn(id1, ploidy/2), 2, sum) + dose.notinf.P[i]/2
  }
  if(round(global.err, 4) == 0.0){
    e <- h <- vector("list", n.mrk)
    for(j in 1:n.mrk){
      e.temp <- h.temp <- vector("list", n.ind)
      for(i in 1:n.ind){
        h.temp[[i]] <- which(A[,j] == D[j,i]) - 1
          if(length(h.temp[[i]]) == 0)
            h.temp[[i]] <- 1:nrow(A) - 1
        #e.temp[[i]] <- rep(1, length(h.temp[[i]]))/length(h.temp[[i]])
         e.temp[[i]] <- rep(1, length(h.temp[[i]]))
      }
      e[[j]] <- e.temp
      h[[j]] <- h.temp
    }
  } else if(round(global.err, 4) > 0.0){
    e <- h <- vector("list", n.mrk)
    for(j in 1:n.mrk){
      e.temp <- h.temp <- vector("list", n.ind)
      for(i in 1:n.ind){
        h.temp[[i]] <- 1:nrow(A) - 1
        s <- which(A[,j] == D[j,i])
        if(length(s) == 0)
          e.temp[[i]] <- rep(1/nrow(A), nrow(A))
        else{
          e.temp0 <- numeric(nrow(A))
          e.temp0[-s] <- global.err/(nrow(A) - length(s))
          e.temp0[s] <- (1- global.err)/length(s)
          e.temp[[i]] <- e.temp0
        }
      }
      e[[j]] <- e.temp
      h[[j]] <- h.temp
    }
  }
  list(states = h, emission = e)    
}


#' Conversion: vector to matrix
#' @keywords internal
v_2_m <- function(x, n){
  y <- base::matrix(NA, n, n) 
  y[base::lower.tri(y)] <- as.numeric(x)
  y[base::upper.tri(y)] <- t(y)[base::upper.tri(y)]
  y
}

#' Compare a list of maps
#' 
#' Compare lengths, density, maximum gaps and log likelihoods in a list of maps.
#' In order to make the maps comparable, the function uses the intersection of 
#' markers among maps. 
#' 
#' @param ... a list of objects of class \code{mappoly.map}
#' 
#' @return A data frame where the lines correspond to the maps in the order provided in input list list
#' 
#' @export
compare_maps <- function(...){
  l <- list(...)
  id <- lapply(l, function(x) x$info$mrk.names)
  id <- Reduce(intersect, id)
  if(length(id) < 2)
    stop("At least two markers must be present in all maps")
  map.out <- vector("list", length(l))
  for(i in 1:length(l)){
    id.temp <- which(l[[i]]$info$mrk.names%in%id)
    map.temp <- filter_map_at_hmm_thres(get_submap(l[[i]], mrk.pos = id.temp, 
                                                   reestimate.rf = FALSE, verbose = FALSE), 10e-5)
    map.out[[i]] <-loglike_hmm(map.temp)
  }
  a <- summary_maps(map.out, verbose = FALSE)
  a <- cbind(a[-nrow(a), -c(1,5,6,7,8)], sapply(map.out, function(x) x$maps[[1]]$loglike))
  a <- cbind(a, rep("-", nrow(a)))
  a[which.max(a[,5]),6] <- "*"
  colnames(a)[c(5,6)] <- c("loglike", "max_likelog")
  return(as.data.frame(a))
}

#' Detects which parent is informative
#'
#' @param x an object of class \code{mappoly.sequence} or \code{mappoly.map}
#' 
#' @export
detect_info_par<-function(x){
  ## checking for correct object
  if (inherits(x, "mappoly.sequence")) {
   if(all(x$seq.dose.p2 == 0 | x$seq.dose.p2 == x$ploidy))
     return("p1")
  else if(all(x$seq.dose.p1 == 0 | x$seq.dose.p1 == x$ploidy))
    return("p2")
  else
    return("both")
  } else if (inherits(x, "mappoly.map")) {
    if(all(x$info$seq.dose.p2 == 0 | x$info$seq.dose.p2 == x$info$ploidy))
      return("p1")
    else if(all(x$info$seq.dose.p1 == 0 | x$info$seq.dose.p1 == x$info$ploidy))
      return("p2")
    else
      return("both")
  } else{
    stop(deparse(substitute(x)), " is not an object of class 'mappoly.map' or 'mappoly.sequence'")    
  }
}

# Skeleton to test CPP functions
# @export
# test_CPP<-function(m, rf)
#   .Call("rec_number", as.integer(m), as.numeric(rf), PACKAGE = "mappoly")
#   


.mappoly_data_skeleton<-function(ploidy =NULL,
                                 n.ind =NULL,
                                 n.mrk =NULL,
                                 ind.names =NULL,
                                 mrk.names =NULL,
                                 dosage.p1 =NULL,
                                 dosage.p2 =NULL,
                                 chrom =NULL,
                                 genome.pos =NULL,
                                 seq.ref =NULL,
                                 seq.alt =NULL,
                                 all.mrk.depth =NULL,
                                 prob.thres =NULL,
                                 geno.dose =NULL,
                                 nphen =NULL,
                                 phen =NULL,
                                 kept =NULL,
                                 chisq.pval =NULL,
                                 elim.correspondence =NULL)
  structure(list(ploidy = ploidy,
                 n.ind = n.ind,
                 n.mrk = n.mrk,
                 ind.names = ind.names,
                 mrk.names = mrk.names,
                 dosage.p1 = dosage.p1,
                 dosage.p2 = dosage.p2,
                 chrom = chrom,
                 genome.pos = genome.pos,
                 seq.ref = seq.ref,
                 seq.alt = seq.alt,
                 all.mrk.depth = all.mrk.depth,
                 prob.thres = prob.thres,
                 geno.dose = geno.dose,
                 nphen = nphen,
                 phen = phen,
                 kept = kept,
                 chisq.pval = chisq.pval,
                 elim.correspondence = elim.correspondence),
            class = "mappoly.data")


.mappoly_map_skeleton<-function(ploidy=NULL,
                                n.mrk=NULL,
                                seq.num=NULL,
                                mrk.names=NULL,
                                seq.dose.p1=NULL,
                                seq.dose.p2=NULL,
                                chrom=NULL,
                                genome.pos=NULL,
                                seq.ref=NULL,
                                seq.alt=NULL,
                                chisq.pval=NULL,
                                data.name=NULL,
                                ph.thresh =NULL,
                                seq.rf =NULL,
                                seq.ph = list(P =NULL, Q =NULL),
                                loglike = 0)
  structure(list(info = list(ploidy=ploidy,
                             n.mrk=n.mrk,
                             seq.num=seq.num,
                             mrk.names=mrk.names,
                             seq.dose.p1=seq.dose.p1,
                             seq.dose.p2=seq.dose.p2,
                             chrom=chrom,
                             genome.pos=genome.pos,
                             seq.ref=seq.ref,
                             seq.alt=seq.alt,
                             chisq.pval=chisq.pval,
                             data.name=data.name,
                             ph.thresh = ph.thresh),
                 maps = list(list(seq.num = seq.num,
                                  seq.rf = seq.rf,
                                  seq.ph = seq.ph,
                                  loglike = loglike))),
            class = "mappoly.map")
  
  
#' Split map into sub maps given a gap threshold
#' @keywords internal
split_mappoly <- function(input.map,
                          gap.threshold = 5, 
                          size.rem.cluster = 1,
                          phase.config = "best",
                          tol.final = 10e-4,
                          verbose = TRUE){
  if (!inherits(input.map, "mappoly.map")) {
    stop(deparse(substitute(input.map)), " is not an object of class 'mappoly.map'")
  }
  ## choosing the linkage phase configuration
  LOD.conf <- get_LOD(input.map, sorted = FALSE)
  if(phase.config  ==  "best") {
    i.lpc <- which.min(LOD.conf)
  } else if (phase.config > length(LOD.conf)) {
    stop("invalid linkage phase configuration")
  } else i.lpc <- phase.config
  id <- which(imf_h(input.map$maps[[i.lpc]]$seq.rf) > gap.threshold)
  if(length(id) == 0){
    if(verbose) cat("no submaps found\n")
    return(input.map)
  } 
  id <- cbind(c(1, id+1), c(id, input.map$info$n.mrk))
  temp.map <- input.map
  ## Selecting map segments larger then the specified threshold
  segments <- id[apply(id, 1, diff) > size.rem.cluster - 1, , drop = FALSE]
  if(length(segments) == 0) stop("all markers were eliminated\n")
  ## Dividing map in sub-maps
  temp.maps <- vector("list", nrow(segments))
  if (verbose) {
    ns <- nrow(segments)
    if(ns == 1){
      cat("one submap found ...\n")
      map <- get_submap(input.map, c(segments[1, 1]:segments[1, 2]), tol.final = tol.final, verbose = FALSE)
      return(filter_map_at_hmm_thres(map, 10e-4))
    } 
    else cat(ns, "submaps found ...\n")
  }
  for(i in 1:length(temp.maps)){
    temp.id <- c(segments[i, 1]:segments[i, 2])
    if(length(temp.id) > 1)
      temp.maps[[i]] <- get_submap(input.map, temp.id, reestimate.rf = FALSE, verbose = FALSE)
    else
      temp.maps[[i]] <- input.map$info$mrk.names[temp.id]    
  }
  temp.maps
}
mmollina/MAPpoly documentation built on March 9, 2024, 2:52 a.m.