gws:

Documented in mppop.predict mppop_predict2

#' Predict genetic variance and genetic correlations in multi-parent populations
#' using a deterministic model
#'
#' @description
#' This function returns predictions of the genotypic mean, genetic variance,
#' usefulness criterion (superior progeny mean) in a set of multi-parent populations
#' using marker effects and a genetic map. If more than two traits are specified,
#' the function will also return predictions of the genetic correlation in the population
#' and the correlated response to selection.
#'
#' @param G.in See \code{G.in} in \code{\link[PopVar]{pop.predict}}.
#' @param y.in See \code{y.in} in \code{\link[PopVar]{pop.predict}}.
#' @param map.in See \code{map.in} in \code{\link[PopVar]{pop.predict}}.
#' @param crossing.table See \code{crossing.table} in \code{\link[PopVar]{pop.predict}}.
#' @param parents See \code{parents} in \code{\link[PopVar]{pop.predict}}.
#' @param tail.p See \code{tail.p} in \code{\link[PopVar]{pop.predict}}.
#' @param self.gen The number of selfing generations in the potential cross. Can be an integer or \code{Inf} for
#' recombinant inbreds. Note: \code{self.gen = 1} corresponds to an F2 population.
#' @param DH Indicator if doubled-haploids are to be induced after the number of selfing generations indicated by
#' \code{self.gen}. For example, if \code{self.gen = 0} and \code{DH = TRUE}, then doubled-haploids are asssumed
#' to be induced using gametes from F1 plants.
#' @param model See \code{models} in \code{\link[PopVar]{pop.predict}}. Only 1 model is allowed.
#' @param n.core Number of cores for parallelization. Parallelization is supported
#' only on a Linux or Mac OS operating system; if working on a Windows system, the function
#' is executed on a single core.
#'
#' @examples
#'
#' # Load data
#' data("phenos")
#' data("genos")
#' data("map")
#'
#' # Create 10, 4-way parent combinations
#'   crosses <- as.data.frame(
#'     matrix(data = sample(row.names(genos), 40), nrow = 10, byrow = TRUE,
#'            dimnames = list(NULL, paste0("parent", 1:4))),
#'     stringsAsFactors = FALSE)
#'
#' # Format the genotype data
#' G_in <- as.data.frame(cbind( c("", row.names(genos)), rbind(colnames(genos), genos)) )
#'
#' # Run predictions
#' pred_out <- mppop.predict(G.in = G_in, y.in = phenos, map.in = map,
#'                           crossing.table = crosses, self.gen = 6)
#'
#'
#' @importFrom qtl mf.h
#' @importFrom rrBLUP mixed.solve
#' @importFrom parallel mclapply
#'
#' @export
#'
mppop.predict <- function(G.in, y.in, map.in, crossing.table, parents, tail.p = 0.1, self.gen = Inf, DH = FALSE,
                          model = c("rrBLUP", "BayesC"), n.core = 1) {

  ###################
  # Error handling

  ## Check classes
  stopifnot(is.data.frame(G.in))
  stopifnot(is.data.frame(y.in))
  stopifnot(is.data.frame(map.in))

  # Check the number of markers in the map and the G.in objects
  if (ncol(G.in) - 1 != nrow(map.in))
    stop("The number of markers in G.in does not match the markers in map.in")

  ## Make sure there is no missing marker data
  if (any(is.na(G.in))) stop ("There must be no missing marker data.")

  # Self gen cannot be negative
  stopifnot(self.gen >= 0)
  # DH must be logical
  stopifnot(is.logical(DH))

  ## Make sure markers in G.in are in map.in
  markers_Gin <- as.character(unlist(G.in[1,-1, drop = TRUE]))
  markers_mapin <- map.in[,1, drop = TRUE]

  if (any(! markers_Gin %in% markers_mapin)) stop("The marker names in G.in are not all in map.in.")

  # Match arguments
  model <- match.arg(model)

  ###################


  ## Filter map.in to remove markers with unknown position
  map.in[[1]] <- as.character(map.in[[1]])
  map.in[[3]] <- as.numeric(map.in[[3]])
  map.in_use <- subset(map.in, !is.na(map.in[[3]]))
  # Reorder based on chrom and then pos
  map.in_use <- map.in_use[order(map.in_use[[2]], map.in_use[[3]]), , drop = FALSE]

  # Get the names of the markers in the new map
  markers_mapin <- as.character(map.in_use[[1]])

  ## Subset G.in for markers in map.in_use
  G.in_use <- G.in[, c(1, which(markers_Gin %in% markers_mapin) + 1), drop = FALSE]



  # If the crossing table is not missing, check that the parents are in the G.in input
  if (!missing(crossing.table)) {
    parents <- unique(unlist(crossing.table))

    # Make sure there are two or four parents
    npar <- ncol(crossing.table)

    ## If two parents, replicate the parents for the first "cross"
    if (npar == 2) {
      crossing.table <- crossing.table[,rep(seq_len(npar), each = 2), drop = FALSE]

    } else if (npar == 4) {
      # Good!

    } else {
      stop("Specified crosses must be 2-way or 4-way.")

    }

    # Make sure the parent names are not factors
    crossing.table <- as.data.frame(sapply(X = crossing.table, as.character), stringsAsFactors = FALSE)

  } else {
    if (missing(parents))
      stop("If no crossing.table is provided, a list of parents must be supplied.")

    parents <- sort(parents)
    # Create a crossing table with all possible parent combinations
    crossing.table <- as.data.frame(t(combn(x = parents, m = 2)), stringsAsFactors = FALSE)
    names(crossing.table) <- c("parent1", "parent2")

  }

  if (any(!parents %in% G.in_use[,1,drop = T]))
    stop("Parents are not in G.in.")

  ## If self.gen is Inf and DH is T, error
  if (is.infinite(self.gen) & DH) stop("Infinite selfing generations and doubled-haploid production cannot both occur.")

  ## Set the factors of line names in y.in to those in the marker df and those in the y.in
  lines_G.in <- as.character(G.in_use[-1,1, drop = TRUE])
  # The levels of the y.in_use geno factor should be the entry names in G.in
  y.in[[1]] <- factor(x = y.in[[1]], levels = sort(lines_G.in))
  # Subset out NAs
  y.in_use <- subset(y.in, !is.na(y.in[[1]]))
  # Reorder lines
  y.in_use <- y.in_use[order(y.in_use[[1]]),, drop = FALSE]


  ## Number of traits and trait names
  n_traits <- ncol(y.in_use) - 1
  trait_names <- colnames(y.in_use)[-1]


  ## Convert the marker matrix into a useable matrix form
  G.in_pred <- sapply(X = G.in_use[-1, -1], function(x) as.numeric(as.character(x)))
  row.names(G.in_pred) <- as.character(G.in_use[-1,1])
  colnames(G.in_pred) <- as.character(unlist(G.in_use[1,-1]))
  # Reorder markers and lines
  G.in_pred <- G.in_pred[order(row.names(G.in_pred)), markers_mapin]


  ## Create a model.frame from the phenotypes; extract the column name of genotypes/lines
  geno_colname <- colnames(y.in_use)[1]

  # Subset using factors in y.in
  # Model matrix for genotypes
  Zg <- model.matrix(object = as.formula(paste0("~ -1 + ", geno_colname)), data = y.in_use)
  row.names(Zg) <- y.in_use[[1]]
  M <- Zg %*% G.in_pred


  # Make sure the markers are coded correlated
  if (!all(M %in% c(-1, 1))) stop("All markers must be coded as -1 or 1. Heterzygotes are not allowed.")

  ## Create an empty matrix
  marker_names <- markers_mapin

  # Split markers by chromosome
  map.in.chr <- split(map.in_use, map.in_use[,2, drop = FALSE])
  markers_chr <- lapply(map.in.chr, "[[", 1)

  # Calculate separate centimorgan distance matrices per chromosome
  chr_cM <- lapply(X = map.in.chr, FUN = function(x) as.matrix(dist(x[,3,drop = FALSE])))
  # Convert to recombination distance
  chr_c <- lapply(X = chr_cM, FUN = mf.h)


  # Function to calculate recombination covariance between QTL at various generations
  # of selfing
  ck <- function(c, k) if (k == 1) c else ((2*c) / (1 + 2*c)) * ( 1 - (0.5 * (1 - 2*c))^k )

  # Function for calulating disequilibrium between marker effects in progeny
  calc_parD <- function(X, .x, .y) crossprod(X[.x,,drop = FALSE] - X[.y,,drop = FALSE]) / 16



  ## Calculate 2 values for all pairs of markers. Value 1 will be multiplied by
  ## the D_ij values of the initiate cross (P1 x P2)(P3 x P4); and value 2 will be multiplied by
  ## the D_ij values of the second cross (F1(1) x F1(2)).
  # If DH and self.gen = 0, DH's are formed from the F1
  if (DH & self.gen == 0) {
    chr_covar_phi1 <- lapply(X = chr_c, function(c) (1 - (2*c)))
    chr_covar_phi2 <- replicate(n = length(chr_c), 1, simplify = FALSE)

  # DHs formed after k generations of selfing
  } else if (DH & self.gen > 0) {
    chr_covar_phi1 <- lapply(X = chr_c, function(c) (1 - (2*ck(c, self.gen)) + ck(c, self.gen-1) ) * (1 - (2*c)) )
    chr_covar_phi2 <- lapply(X = chr_c, function(c) (1 - (2*ck(c, self.gen))) )

  # RILs after self.gen selfing generations
  } else if (!DH & is.finite(self.gen)) {
    chr_covar_phi1 <- lapply(X = chr_c, function(c) (1 - ck(c, self.gen)) * (1 - (2*c)) )
    chr_covar_phi2 <- lapply(X = chr_c, function(c) ( (1 - (2*ck(c, self.gen))) - ((0.5 * (1 - (2*c)))^self.gen) ) )

  } else if (!DH & is.infinite(self.gen)) {
    stop("RILs after infinite genetations of selfing are not yet supported")

  }


  ## Calculate marker effects for each trait
  if (model == "rrBLUP") {

    ## Apply a function over each trait
    marker_effect_out <- lapply(X = y.in_use[-1], FUN = function(y) {

      ## Solve the mixed model
      fit <- mixed.solve(y = y, Z = M, method = "REML")

      # Return marker effects and the grand mean
      list(effects = as.matrix(fit$u), grand_mean = fit$beta)

    })


    ## Create a complete matrix of marker effects for each trait
    mar_eff_mat <- do.call("cbind", lapply(marker_effect_out, "[[", "effects"))
    mar_eff_mat <- structure(mar_eff_mat, dimnames = list(row.names(mar_eff_mat), names(marker_effect_out)))

    mar_beta_mat <- do.call("cbind", lapply(marker_effect_out, "[[", "grand_mean"))
    mar_beta_mat <- structure(mar_beta_mat, dimnames = list(row.names(mar_beta_mat), names(marker_effect_out)))


  } else if (model == "BayesC") {
    stop("Other models not supported.")

  } else {
    stop("Other models not supported.")
  }


  ## Predicted genotypic value of all genotypes + grand mean
  pgvs <- (M %*% mar_eff_mat) + matrix(mar_beta_mat, ncol = ncol(mar_eff_mat), nrow = nrow(M), byrow = TRUE)
  row.names(pgvs) <- y.in_use[[1]]

  # Determine the k_sp from the the tail.p
  k_sp <- mean(qnorm(p = seq((1 - tail.p), 0.999999, 0.000001)))

  ## Change the number of cores based on system
  os <- Sys.info()["sysname"]
  n.core <- ifelse(os == "Windows", 1, n.core)

  ## Create a list to store dfs
  # Split up the crossing.table
  crossing_table_split <- split(x = crossing.table, rep(seq_len(n.core), length.out = nrow(crossing.table)))


  # Parallelize over cross predictions
  cross_predictions_out <- mclapply(X = crossing_table_split, FUN = function(ctb) {

    cross_predictions <- vector("list", nrow(ctb))
    for (j in seq_along(cross_predictions)) {

      # Character vector of the four parents - it was always be four
      pars <- as.character(ctb[j, 1:4])

      ## Subset the genotype matrix using the parents
      M_par <- M[pars,,drop = FALSE]
      # Find segregating markers
      seg_mar <- which(!colMeans(M_par) %in% c(1, -1))
      seg_mar_chr <- lapply(markers_chr, intersect, marker_names[seg_mar])
      seg_mar_chr_num <- mapply(markers_chr, seg_mar_chr, FUN = function(.x, .y) which(.x %in% .y))

      par_geno <- lapply(seg_mar_chr, FUN = function(m) M_par[pars, m, drop = FALSE])

      # Calculate the LD covariance

      ## This depends on the number of selfing generations
      ## Formulae are taken from Allier et al 2019

      ## First calculate the LD between parental alleles in the
      ## first and second crosses

      # Disequilibrium in first cross
      D12 <- lapply(par_geno, calc_parD, 1, 2)  # first two parents
      D34 <- lapply(par_geno, calc_parD, 3, 4)  # last two parents

      phi_1 <- mapply(D12, D34, FUN = `+`, SIMPLIFY = FALSE)

      # Disequilibrium in second cross
      D13 <- lapply(par_geno, calc_parD, 1, 3)
      D14 <- lapply(par_geno, calc_parD, 1, 4)
      D23 <- lapply(par_geno, calc_parD, 2, 3)
      D24 <- lapply(par_geno, calc_parD, 2, 4)

      phi_2 <- mapply(D13, D14, D23, D24, FUN = function(.x, .y, .z, .a) .x + .y + .z + .a, SIMPLIFY = FALSE)


      ## Subset phi1 and phi2
      ## Subset the covariance coefficients
      ## Multiply
      if (DH & self.gen == 0) {
        chr_sigma <- mapply(
          phi_1, chr_covar_phi1, phi_2, chr_covar_phi2, seg_mar_chr_num, FUN = function(.a, .b, .c, .d, .e) {
            ((.c * .d[.e, .e]) + (.a * .b[.e, .e])) * .b[.e, .e]
          }, SIMPLIFY = FALSE)

      } else {
        chr_sigma <- mapply(
          phi_1, chr_covar_phi1, phi_2, chr_covar_phi2, seg_mar_chr_num, FUN = function(.a, .b, .c, .d, .e) {
            ((.c * .d[.e, .e]) + (.a * .b[.e, .e]))
          }, SIMPLIFY = FALSE)
      }


      ## Split marker effects by trait then chromosome
      mar_eff_mat_trait_chr <- apply(X = mar_eff_mat, MARGIN = 2, FUN = function(snps) {
        lapply(X = markers_chr, FUN = function(snp_chr) as.matrix(snps[snp_chr]))
      })

      # Eq. 2 Allier et al 2019
      # Calculate variance per trait
      pred_varG_j <- sapply(X = mar_eff_mat_trait_chr, FUN = function(trait_mars) {
        sum(mapply(chr_sigma, trait_mars, seg_mar_chr, FUN = function(.x, .y, .z) crossprod(.y[.z,], .x) %*% .y[.z,]))
      })


      ## Predictions
      # Cross mean
      pred_mu_j <- colMeans(pgvs[pars,,drop = FALSE])


      # Genetic correlations between traits, if more than one trait
      if (n_traits > 1) {

        # Create a pairwise matrix
        pred_corG_j_mat <- matrix(data = 0, nrow = n_traits, ncol = n_traits,
                              dimnames = list(trait_names, trait_names))
        # Convert to distance object
        pred_corG_j_dist <- as.dist(pred_corG_j_mat)

        ## Create a combination matrix
        trait_combn <- combn(x = seq(n_traits), m = 2)

        # Turn the marker effect list inside-out
        mar_eff_mat_chr_trait <- lapply(
          X = seq_along(seg_mar_chr), function(q) lapply(mar_eff_mat_trait_chr, "[[", q)
        )

        ## Apply over columns
        # Calculate genetic covariance between pairs of traits
        pred_corG_j <- sapply(X = seq_len(ncol(trait_combn)), FUN = function(rho) {
          ij <- trait_combn[,rho]
          # Calculate covariance
          mar_eff_mat_chr_trait_ij <- lapply(X = mar_eff_mat_chr_trait, FUN = "[", ij)
          covar <- sum(mapply(chr_sigma, mar_eff_mat_chr_trait_ij, seg_mar_chr,
                              FUN = function(.x, .y, .z) crossprod(.y[[1]][.z,], .x) %*% .y[[2]][.z,]))

          # Subset predicted variance to calculate correlation
          corG <- covar / prod(sqrt(pred_varG_j[ij]))
        })

        # Add these predictions to the distance object, then convert to matrix
        pred_corG_j_dist[seq_along(pred_corG_j_dist)] <- pred_corG_j

        ## Convert distance object to matrix
        pred_corG_j_mat <- as.matrix(pred_corG_j_dist)
        diag(pred_corG_j_mat) <- NA

        ## Calculate correlated progeny mean
        response_trait_varG <- matrix(pred_varG_j, nrow = length(pred_varG_j),
                                      ncol = length(pred_varG_j), byrow = TRUE)
        correlated_response <- k_sp * pred_corG_j_mat * sqrt(response_trait_varG)
        pred_mu_j_mat <- matrix(pred_mu_j, nrow = length(pred_mu_j), ncol = length(pred_mu_j), byrow = TRUE)
        pred_cor_musp_low <- pred_mu_j_mat - correlated_response
        pred_cor_musp_high <- pred_mu_j_mat + correlated_response

        # Change names
        colnames(pred_cor_musp_low) <- paste0("pred_cor_musp_low_", trait_names)
        colnames(pred_cor_musp_high) <- paste0("pred_cor_musp_high_", trait_names)

      } else {
        pred_corG_mat <- pred_cor_musp_low <- pred_cor_musp_high <- pred_corG_j_mat <- NULL

      }

      cross_predictions[[j]] <- data.frame(
        t(pars),
        trait = trait_names,
        cbind(pred_mu = pred_mu_j, pred_varG = pred_varG_j, pred_corG_j_mat,
              pred_cor_musp_low, pred_cor_musp_high),
        stringsAsFactors = FALSE, row.names = NULL)

    }

    do.call("rbind", cross_predictions)

  }, mc.cores = n.core) # End loop

  ## Bind rows
  cross_predictions1 <- do.call("rbind", cross_predictions_out)
  # Rename col 1:npar
  names(cross_predictions1)[1:4] <- paste0("parent", 1:4)

  ## Calculate response predictions (superior progeny, correlated response, etc.)
  pred_response <- (k_sp * sqrt(cross_predictions1$pred_varG))

  # Superior progeny mean
  cross_predictions1[["pred_musp_low"]] <- cross_predictions1$pred_mu - pred_response
  cross_predictions1[["pred_musp_high"]] <- cross_predictions1$pred_mu + pred_response

  ## Re-order columns
  cor_W_cols <- grep(pattern = paste0(trait_names, collapse = "|"), x = names(cross_predictions1))
  cross_predictions2 <- cross_predictions1[, c(setdiff(seq(ncol(cross_predictions1)), cor_W_cols), cor_W_cols)]
  row.names(cross_predictions2) <- NULL

  ## Return the predictions
  return(cross_predictions2)

} # Close function







#' Predict genetic variance and genetic correlations in multi-parent populations
#' using a deterministic model
#'
#' @describeIn mppop.predict
#'
#' @param M A Matrix of marker genotypes of dimensions \code{nLine} x \code{nMarker}, coded as
#' -1, 0, and 1.
#' @param y.in A data frame of phenotypic means. The first column should include the entry name and
#' subsequent columns should include phenotypic values. Ignored if \code{marker.effects} is passed.
#' @param marker.effects A data frame of marker effects. The first column should include the marker name and
#' subsequent columns should include the marker effects.
#' @param map.in See \code{map.in} in \code{\link[PopVar]{pop.predict}}.
#' @param crossing.table See \code{crossing.table} in \code{\link[PopVar]{pop.predict}}.
#' @param tail.p See \code{tail.p} in \code{\link[PopVar]{pop.predict}}.
#' @param self.gen The number of selfing generations in the potential cross. Can be an integer or \code{Inf} for
#' recombinant inbreds. Note: \code{self.gen = 1} corresponds to an F2 population.
#' @param DH Indicator if doubled-haploids are to be induced after the number of selfing generations indicated by
#' \code{self.gen}. For example, if \code{self.gen = 0} and \code{DH = TRUE}, then doubled-haploids are asssumed
#' to be induced using gametes from F1 plants.
#' @param model See \code{models} in \code{\link[PopVar]{pop.predict}}. Only 1 model is allowed.
#' @param n.core Number of cores for parallelization; only works on a Linux or Mac OS operating system.
#'
#' @examples
#'
#' # Load data
#' data("phenos")
#' data("genos")
#' data("map")
#'
#' # Create 25, 4-way parent combinations
#'   crosses <- as.data.frame(
#'     matrix(data = sample(row.names(genos), 25 * 4), nrow = 25, byrow = TRUE,
#'            dimnames = list(NULL, paste0("parent", 1:4))),
#'     stringsAsFactors = FALSE)
#'
#' # Run predictions
#' pred_out <- mppop_predict2(M = genos, y.in = phenos, map.in = map,
#'                            crossing.table = crosses, self.gen = 6)
#'
#' # Many more possible crosses - this takes a while
#' \dontrun{
#'
#' crosses <- as.data.frame(t(combn(x = sample(row.names(genos), 20), m = 4)), stringsAsFactors = FALSE)
#'
#' pred_out <- mppop_predict2(M = genos, y.in = phenos, map.in = map,
#'                            crossing.table = crosses, self.gen = 6)
#'
#' }
#'
#'
#' ## Pass marker effects instead of phenotypes
#' # First calculate marker effects
#' phenos2 <- as.matrix(phenos[,-1]); row.names(phenos2) <- phenos[,1]
#' phenos2 <- phenos2[row.names(genos),]
#'
#' mar_eff <- apply(X = phenos2, MARGIN = 2, FUN = function(y) mixed.solve(y = y, Z = genos)$u)
#' marker_effects <- data.frame(marker = row.names(mar_eff), mar_eff, stringsAsFactors = FALSE)
#'
#' pred_out <- mppop_predict2(M = genos, marker.effects = marker_effects, map.in = map,
#'                            crossing.table = crosses, self.gen = 6)
#'
#'
#'
#' @importFrom qtl mf.h
#' @importFrom rrBLUP mixed.solve
#' @importFrom parallel mclapply
#'
#' @export
#'
mppop_predict2 <- function(M, y.in, marker.effects, map.in, crossing.table, tail.p = 0.1,
                           self.gen = Inf, DH = FALSE, model = c("rrBLUP", "BayesC"), n.core = 1) {

  ###################
  # Error handling

  ## Check classes
  stopifnot(is.matrix(M))
  stopifnot(is.data.frame(map.in))

  # Check the number of markers in the map and the G.in objects
  if (ncol(M) != nrow(map.in))
    stop("The number of markers in G.in does not match the markers in map.in")

  ## Make sure there is no missing marker data
  if (any(is.na(M))) stop ("There must be no missing marker data.")

  # Self gen cannot be negative
  stopifnot(self.gen >= 0)
  # DH must be logical
  stopifnot(is.logical(DH))

  ## Make sure markers in G.in are in map.in
  stopifnot(!is.null(colnames(M)))
  stopifnot(!is.null(row.names(M)))

  # Make sure the markers are coded correctly
  if (!all(M %in% c(-1, 1))) stop("All markers must be coded as -1 or 1. Heterzygotes are not allowed.")


  markers_M <- colnames(M)
  markers_mapin <- map.in[[1]]

  if (any(! markers_M %in% markers_mapin)) stop("The marker names in M are not all in map.in.")

  # Get the names of genotyped entries
  geno_lines <- row.names(M)


  ## Make sure one of y.in or marker.effects are not missing
  if (missing(y.in) & missing(marker.effects))
    stop("You must pass one of 'y.in' or 'marker.effects.'")

  ## Error check depending on what is passed
  if (!missing(marker.effects)) {

    # Check markers
    markers_maref <- marker.effects[[1]]

    if (! all(markers_M %in% markers_maref) ) stop("The marker names in M are not all in marker.effects")

    ## Number of traits and trait names
    n_traits <- ncol(marker.effects) - 1
    trait_names <- colnames(marker.effects)[-1]

    # Set boolean for later
    calc_marker_eff <- FALSE

    # Else check y.in
  } else {

    stopifnot(is.data.frame(y.in))

    # All phenotyped lines should be genotyped
    if (! all(y.in[[1]] %in% geno_lines) ) stop("All entries in 'y.in' should have marker data in 'M'.")

    ## Set the factors of line names in y.in to those in the marker df and those in the y.in
    y.in[[1]] <- factor(x = y.in[[1]], levels = sort(geno_lines))
    # Subset out NAs
    y.in_use <- subset(y.in, !is.na(y.in[[1]]))
    # Reorder lines
    y.in_use <- y.in_use[order(y.in_use[[1]]),, drop = FALSE]


    ## Number of traits and trait names
    n_traits <- ncol(y.in_use) - 1
    trait_names <- colnames(y.in_use)[-1]

    # Set boolean for later
    calc_marker_eff <- TRUE

  }


  # Match arguments
  model <- match.arg(model)

  ###################


  # Reorder map based on chrom and then pos
  map.in_use <- map.in[order(map.in[[2]], map.in[[3]]), , drop = FALSE]

  # Get the names of the markers in the new map
  markers_mapin <- as.character(map.in_use[[1]])


  ## Get list of unique parents from the crossing.table
  parents <- unique(unlist(crossing.table))
  # Make sure there are two or four parents
  npar <- ncol(crossing.table)

  ## If two parents, replicate the parents for the first "cross"
  if (npar == 2) {
    crossing.table <- crossing.table[,rep(seq_len(npar), each = 2), drop = FALSE]

  } else if (npar != 4) {
    stop("Specified crosses must be 2-way or 4-way.")


  }

  # Make sure the parent names are not factors
  crossing.table <- as.data.frame(sapply(X = crossing.table, as.character), stringsAsFactors = FALSE)

  if (!all(parents %in% row.names(M)))
    stop("Parents are not in G.in.")

  ## If self.gen is Inf and DH is T, error
  if (is.infinite(self.gen) & DH) stop("Infinite selfing generations and doubled-haploid production cannot both occur.")


  ## Fit models to calculate marker effects, if necessary
  if (calc_marker_eff) {

    ## Create a model.frame from the phenotypes; extract the column name of genotypes/lines
    geno_colname <- colnames(y.in_use)[1]

    # Subset using factors in y.in
    # Model matrix for genotypes
    Zg <- model.matrix(object = as.formula(paste0("~ -1 + ", geno_colname)), data = y.in_use)
    row.names(Zg) <- y.in_use[[1]]
    M1 <- Zg %*% M[, markers_mapin, drop = FALSE]

    ## Calculate marker effects for each trait
    if (model == "rrBLUP") {

      ## Apply a function over each trait
      marker_effect_out <- lapply(X = y.in_use[-1], FUN = function(y) {

        ## Solve the mixed model
        fit <- mixed.solve(y = y, Z = M1, method = "REML")

        # Return marker effects and the grand mean
        list(effects = as.matrix(fit$u), grand_mean = fit$beta)

      })


      ## Create a complete matrix of marker effects for each trait
      mar_eff_mat <- do.call("cbind", lapply(marker_effect_out, "[[", "effects"))
      mar_eff_mat <- structure(mar_eff_mat, dimnames = list(row.names(mar_eff_mat), names(marker_effect_out)))

      mar_beta_mat <- do.call("cbind", lapply(marker_effect_out, "[[", "grand_mean"))
      mar_beta_mat <- structure(mar_beta_mat, dimnames = list(row.names(mar_beta_mat), names(marker_effect_out)))


    } else if (model == "BayesC") {
      stop("Other models not supported.")

    } else {
      stop("Other models not supported.")
    }

    # Else create a matrix of marker ordered marker effects
  } else {

    # Create matrix
    mar_eff_mat <- as.matrix(marker.effects[,-1,drop = FALSE])
    row.names(mar_eff_mat) <- marker.effects[[1]]
    mar_eff_mat <- mar_eff_mat[markers_mapin,,drop = FALSE]

    # Set the grand mean to zero
    mar_beta_mat <- matrix(0, nrow = 1, ncol = ncol(mar_eff_mat),
                           dimnames = list(NULL, colnames(mar_eff_mat)))

  }

  # Reorder markers
  M1 <- M[, markers_mapin, drop = FALSE]

  ## Create an empty matrix
  marker_names <- markers_mapin

  # Split markers by chromosome
  map.in.chr <- split(map.in_use, map.in_use[,2, drop = FALSE])
  markers_chr <- lapply(map.in.chr, "[[", 1)

  ## Split marker effects by trait then chromosome
  mar_eff_mat_trait_chr <- apply(X = mar_eff_mat, MARGIN = 2, FUN = function(snps) {
    lapply(X = markers_chr, FUN = function(snp_chr) as.matrix(snps[snp_chr]))
  })

  # Calculate separate centimorgan distance matrices per chromosome
  chr_cM <- lapply(X = map.in.chr, FUN = function(x) as.matrix(dist(x[,3,drop = FALSE])))
  # Convert to recombination distance
  chr_c <- lapply(X = chr_cM, FUN = mf.h)


  # Function to calculate recombination covariance between QTL at various generations
  # of selfing
  ck <- function(c, k) if (k == 1) c else ((2*c) / (1 + 2*c)) * ( 1 - (0.5 * (1 - 2*c))^k )

  # Function for calulating disequilibrium between marker effects in progeny
  calc_parD <- function(X, .x, .y) crossprod(X[.x,,drop = FALSE] - X[.y,,drop = FALSE]) / 16



  ## Calculate 2 values for all pairs of markers. Value 1 will be multiplied by
  ## the D_ij values of the initiate cross (P1 x P2)(P3 x P4); and value 2 will be multiplied by
  ## the D_ij values of the second cross (F1(1) x F1(2)).
  # If DH and self.gen = 0, DH's are formed from the F1
  if (DH & self.gen == 0) {
    chr_covar_phi1 <- lapply(X = chr_c, function(c) (1 - (2*c)))
    chr_covar_phi2 <- replicate(n = length(chr_c), 1, simplify = FALSE)

    # DHs formed after k generations of selfing
  } else if (DH & self.gen > 0) {
    chr_covar_phi1 <- lapply(X = chr_c, function(c) (1 - (2*ck(c, self.gen)) + ck(c, self.gen-1) ) * (1 - (2*c)) )
    chr_covar_phi2 <- lapply(X = chr_c, function(c) (1 - (2*ck(c, self.gen))) )

    # RILs after self.gen selfing generations
  } else if (!DH & is.finite(self.gen)) {
    chr_covar_phi1 <- lapply(X = chr_c, function(c) (1 - ck(c, self.gen)) * (1 - (2*c)) )
    chr_covar_phi2 <- lapply(X = chr_c, function(c) ( (1 - (2*ck(c, self.gen))) - ((0.5 * (1 - (2*c)))^self.gen) ) )

  } else if (!DH & is.infinite(self.gen)) {
    stop("RILs after infinite genetations of selfing are not yet supported")

  }





  ## Predicted genotypic value of all genotypes
  ## Add non-zero grand mean if y_in was specified
  pgvs <- (M1 %*% mar_eff_mat) + matrix(mar_beta_mat, ncol = ncol(mar_eff_mat), nrow = nrow(M), byrow = TRUE)

  # Determine the k_sp from the the tail.p
  k_sp <- mean(qnorm(p = seq((1 - tail.p), 0.999999, 0.000001)))

  ## Change the number of cores based on system
  os <- Sys.info()["sysname"]
  n.core <- ifelse(os == "Windows", 1, n.core)

  ## Create a list to store dfs
  # Split up the crossing.table
  crossing_table_split <- split(x = crossing.table, rep(seq_len(n.core), length.out = nrow(crossing.table)))


  # Parallelize over cross predictions
  cross_predictions_out <- mclapply(X = crossing_table_split, FUN = function(ctb) {

    cross_predictions <- vector("list", nrow(ctb))
    for (j in seq_along(cross_predictions)) {

      # Character vector of the four parents - it was always be four
      pars <- as.character(ctb[j, 1:4])

      ## Subset the genotype matrix using the parents
      M_par <- M1[pars,,drop = FALSE]
      # Find segregating markers
      seg_mar <- which(!colMeans(M_par) %in% c(1, -1))
      seg_mar_chr <- lapply(markers_chr, intersect, marker_names[seg_mar])
      seg_mar_chr_num <- mapply(markers_chr, seg_mar_chr, FUN = function(.x, .y) which(.x %in% .y))

      par_geno <- lapply(seg_mar_chr, FUN = function(m) M_par[pars, m, drop = FALSE])

      # Calculate the LD covariance

      ## This depends on the number of selfing generations
      ## Formulae are taken from Allier et al 2019

      ## First calculate the LD between parental alleles in the
      ## first and second crosses

      # Disequilibrium in first cross
      D12 <- lapply(par_geno, calc_parD, 1, 2)  # first two parents
      D34 <- lapply(par_geno, calc_parD, 3, 4)  # last two parents

      phi_1 <- mapply(D12, D34, FUN = `+`, SIMPLIFY = FALSE)

      # Disequilibrium in second cross
      D13 <- lapply(par_geno, calc_parD, 1, 3)
      D14 <- lapply(par_geno, calc_parD, 1, 4)
      D23 <- lapply(par_geno, calc_parD, 2, 3)
      D24 <- lapply(par_geno, calc_parD, 2, 4)

      phi_2 <- mapply(D13, D14, D23, D24, FUN = function(.x, .y, .z, .a) .x + .y + .z + .a, SIMPLIFY = FALSE)


      ## Subset phi1 and phi2
      ## Subset the covariance coefficients
      ## Multiply
      if (DH & self.gen == 0) {
        chr_sigma <- mapply(
          phi_1, chr_covar_phi1, phi_2, chr_covar_phi2, seg_mar_chr_num, FUN = function(.a, .b, .c, .d, .e) {
            ((.c * .d[.e, .e]) + (.a * .b[.e, .e])) * .b[.e, .e]
          }, SIMPLIFY = FALSE)

      } else {
        chr_sigma <- mapply(
          phi_1, chr_covar_phi1, phi_2, chr_covar_phi2, seg_mar_chr_num, FUN = function(.a, .b, .c, .d, .e) {
            ((.c * .d[.e, .e]) + (.a * .b[.e, .e]))
          }, SIMPLIFY = FALSE)
      }


      # Eq. 2 Allier et al 2019
      # Calculate variance per trait
      pred_varG_j <- sapply(X = mar_eff_mat_trait_chr, FUN = function(trait_mars) {
        sum(mapply(chr_sigma, trait_mars, seg_mar_chr, FUN = function(.x, .y, .z) crossprod(.y[.z,], .x) %*% .y[.z,]))
      })


      ## Predictions
      # Cross mean
      pred_mu_j <- colMeans(pgvs[pars,,drop = FALSE])


      # Genetic correlations between traits, if more than one trait
      if (n_traits > 1) {

        # Create a pairwise matrix
        pred_corG_j_mat <- matrix(data = 0, nrow = n_traits, ncol = n_traits,
                                  dimnames = list(trait_names, trait_names))
        # Convert to distance object
        pred_corG_j_dist <- as.dist(pred_corG_j_mat)

        ## Create a combination matrix
        trait_combn <- combn(x = seq(n_traits), m = 2)

        # Turn the marker effect list inside-out
        mar_eff_mat_chr_trait <- lapply(
          X = seq_along(seg_mar_chr), function(q) lapply(mar_eff_mat_trait_chr, "[[", q)
        )

        ## Apply over columns
        # Calculate genetic covariance between pairs of traits
        pred_corG_j <- sapply(X = seq_len(ncol(trait_combn)), FUN = function(rho) {
          ij <- trait_combn[,rho]
          # Calculate covariance
          mar_eff_mat_chr_trait_ij <- lapply(X = mar_eff_mat_chr_trait, FUN = "[", ij)
          covar <- sum(mapply(chr_sigma, mar_eff_mat_chr_trait_ij, seg_mar_chr,
                              FUN = function(.x, .y, .z) crossprod(.y[[1]][.z,], .x) %*% .y[[2]][.z,]))

          # Subset predicted variance to calculate correlation
          corG <- covar / prod(sqrt(pred_varG_j[ij]))
        })

        # Add these predictions to the distance object, then convert to matrix
        pred_corG_j_dist[seq_along(pred_corG_j_dist)] <- pred_corG_j

        ## Convert distance object to matrix
        pred_corG_j_mat <- as.matrix(pred_corG_j_dist)
        diag(pred_corG_j_mat) <- NA

        ## Calculate correlated progeny mean
        response_trait_varG <- matrix(pred_varG_j, nrow = length(pred_varG_j),
                                      ncol = length(pred_varG_j), byrow = TRUE)
        correlated_response <- k_sp * pred_corG_j_mat * sqrt(response_trait_varG)
        pred_mu_j_mat <- matrix(pred_mu_j, nrow = length(pred_mu_j), ncol = length(pred_mu_j), byrow = TRUE)
        pred_cor_musp_low <- pred_mu_j_mat - correlated_response
        pred_cor_musp_high <- pred_mu_j_mat + correlated_response

        # Change names
        colnames(pred_cor_musp_low) <- paste0("pred_cor_musp_low_", trait_names)
        colnames(pred_cor_musp_high) <- paste0("pred_cor_musp_high_", trait_names)

      } else {
        pred_corG_mat <- pred_cor_musp_low <- pred_cor_musp_high <- pred_corG_j_mat <- NULL

      }

      cross_predictions[[j]] <- data.frame(
        t(pars),
        trait = trait_names,
        cbind(pred_mu = pred_mu_j, pred_varG = pred_varG_j, pred_corG_j_mat,
              pred_cor_musp_low, pred_cor_musp_high),
        stringsAsFactors = FALSE, row.names = NULL)

    }

    do.call("rbind", cross_predictions)

  }, mc.cores = n.core) # End loop

  ## Bind rows
  cross_predictions1 <- do.call("rbind", cross_predictions_out)
  # Rename col 1:npar
  names(cross_predictions1)[1:4] <- paste0("parent", 1:4)

  ## Calculate response predictions (superior progeny, correlated response, etc.)
  pred_response <- (k_sp * sqrt(cross_predictions1$pred_varG))

  # Superior progeny mean
  cross_predictions1[["pred_musp_low"]] <- cross_predictions1$pred_mu - pred_response
  cross_predictions1[["pred_musp_high"]] <- cross_predictions1$pred_mu + pred_response

  ## Re-order columns
  cor_W_cols <- grep(pattern = paste0(trait_names, collapse = "|"), x = names(cross_predictions1))
  cross_predictions2 <- cross_predictions1[, c(setdiff(seq(ncol(cross_predictions1)), cor_W_cols), cor_W_cols)]
  row.names(cross_predictions2) <- NULL

  ## Return the predictions
  return(cross_predictions2)

} # Close function