Nothing
#' Integer programming calculations
#'
#' This function maximizes the predicted genetic gain in the selection of groups of genotypes based on the predictors of genotypic effects.
#'
#' @param traits A vector with the names of the columns in the data corresponding to the target traits to be optimized, i.e., those included in the objective function.
#' @param ref Name of the reference column (e.g., genotype ID). Defaults to the first column.
#' @param clmin An integer specifying the minimum group size. If omitted, equal to 2.
#' @param clmax An integer specifying the maximum group size. If omitted, equal to `clmin`.
#' @param dmg A `data.frame` with three columns defining constraints: trait names; constraints signs (`">="`, `"<="` or `"=="`); and _right-hand side_ values of the constraints.
#' @param meanvec A named numeric vector of trait means; if omitted, data are assumed to be already normalized by the mean.
#' @param criteria A named numeric vector indicating the selection criterion for each trait: 1 for traits to be increased, -1 for traits to be decreased. If omitted, all traits are assumed to be selected for increase.
#' @param data A data frame comprising the input data consisting of the Predictors of genetic effects, which serve as the basis for the selection procedure.
#' @note The order of traits must be consistent across `traits`, `dmg`, `meanvec`, and `criteria`. Both `meanvec` and `criteria` must include values for all traits specified in `traits` and `dmg`.
#' @return
#' A list with the following components:
#' - `gain` with the gains of the several traits in each dimension
#' - `selected` with the reference os the clones selected in the group of each dimension
#'
#' @references Surgy, S., Cadima, J. & Gonçalves, E. Integer programming as a powerful tool for polyclonal selection in ancient grapevine varieties. Theor Appl Genet 138, 122 (2025). \doi{10.1007/s00122-025-04885-0}
#' @export
#' @examples
#' mymeanvec <- c(yd = 3.517, pa = 12.760, ta = 4.495, ph = 3.927, bw = 1.653)
#' mytraits <- c("yd", "pa", "ta", "ph", "bw")
#' mydmg <- data.frame(
#' lhs = c("yd", "pa", "ta", "ph", "bw"),
#' rel = c(">=", ">=", ">=", ">=", ">="),
#' rhs = c(20, 3, 3, 1, 2)
#' )
#' mycriteria <- c(yd = 1, pa = 1, ta = 1, ph = -1, bw = -1)
#' selections <- polyclonal(
#' traits = mytraits,
#' clmin = 7,
#' clmax = 20,
#' dmg = mydmg,
#' meanvec = mymeanvec,
#' criteria = mycriteria,
#' data = Gouveio
#' )
#' selections
#' summary(selections)
polyclonal <- function(traits, ref = NULL, clmin = 2, clmax, dmg = NULL, meanvec = NULL, criteria = NULL, data)
{
# Input checks
if (length(traits) < 1) stop("There must be at least one trait in selection")
if (!is.null(ref) && length(ref) != 1) stop("Only one reference column is allowed (ref).")
if (is.null(ref)) ref <- names(data)[1]
if (!all(traits %in% names(data))) stop("Some columns in 'traits' do not exist in `data`.")
if (!all(ref %in% names(data))) stop("The reference column (ref) does not exist in `data`.")
if (length(dmg) == 1){
if (dmg == "base"){
dmg <- all_zero(ref, data)
}
}
# Constraint setup
const <- if (!is.null(dmg)) dmg[[1]] else NULL
relation <- if (!is.null(dmg)) dmg[[2]] else NULL
rhs <- if (!is.null(dmg)) dmg[[3]] else NULL
if (!is.null(const) && !all(const %in% names(data))) {
stop("Some constraint in `dmg` do not exist in 'data'.")
}
# Trait + constraint columns
cols <- unique(c(traits, const))
# Validate vector lengths
if (!is.null(meanvec) && length(meanvec) != length(cols)) {
stop("Length of 'meanvec' must contain values for all traits in `traits` and `constraints`.")
}
if (!is.null(criteria) && length(criteria) != length(cols)) {
stop("Length of 'criteria' must contain values for all traits in `traits` and `constraints`.")
}
# Set defaults
if (is.null(criteria)) criteria <- stats::setNames(rep(1, length(cols)), cols)
if (is.null(meanvec)) meanvec <- stats::setNames(rep(1, length(cols)), cols)
# Create summary matrix
auxsum <- data.frame(col1=cols)
if (!is.null(dmg)){
idx <- match(auxsum$col1, dmg[[1]])
auxsum$Crit <- criteria [idx]
auxsum$Mean <- meanvec[idx]
auxsum$DMG <- rhs[idx]
}else{
idx <- match(auxsum$col1, traits)
auxsum$Crit <- criteria [idx]
auxsum$Mean <- meanvec[idx]
auxsum$DMG <- c(rep("NA", length(idx)))
}
# Normalize trait and constraint values
auxeblups <- norm_eblup(data[, cols, drop = FALSE], cols, meanvec, criteria)
auxeblups <- data.frame(data[, ref, drop = FALSE], auxeblups)
# Check clone group size limits
if(missing(clmax)) clmax <- clmin
if (clmax < clmin || clmin < 1) stop("'clmax' must be >= 'clmin' and both >= 1.")
clmin <- as.integer(clmin)
clmax <- as.integer(clmax)
# Objective function (sum of traits to maximize)
objdir <- "max"
auxobj <- auxeblups[, traits, drop = FALSE]
fobj=0
fobj <- Reduce(`+`, auxobj)
# Constraint matrix
if (is.null(const)) {
constvalue <- data.frame(rep(1, nrow(auxeblups)))
colnames(constvalue) <- "var"
dirvalue <- "=="
} else {
constvalue <- data.frame(rep(1, nrow(auxeblups)), auxeblups[, const, drop = FALSE])
colnames(constvalue) <- c("var", const)
dirvalue <- c("==", relation)
}
# Vars for results
clsel <- 0
clselmax <- 0
indsol <- 0
gainout <- NULL
# Iterate over group sizes
for (i in clmax:clmin){
rhsvalue <- i
if (!is.null(rhs)){
for (a in 1:length(rhs)){
rhsvalue <- c(rhsvalue, rhs[a]*i/100)
}
}
# Solver
prob <- lpSolve::lp(
direction = objdir,
objective.in = fobj,
const.mat = constvalue,
const.dir=dirvalue,
const.rhs=rhsvalue,
transpose.constraints = FALSE,
all.bin = TRUE,
use.rw=TRUE)
# Handle results
if (prob$status==0){
clsel = sum(prob$solution)
auxresultados <- data.frame(auxeblups[,c(ref,traits)], prob$solution)
resultados <- subset(auxresultados, prob$solution==1)
if (indsol==0){
indlinha <- clsel
clonesout <- resultados[,1]
colnomes <- c(clsel)
if (length(resultados) > 3){
ganhos <- colMeans(resultados[,c(-1, -length(resultados))]*100)
}else{
ganhos <- mean(resultados[,c(-1, -length(resultados))]*100)
}
gainout <- data.frame(t(ganhos), clsel)
colnames(gainout) <- c(colnames(resultados[, c(-1, -length(resultados)), drop = FALSE]), "Group.Size")
}else{
clonesout <- c(clonesout, resultados[,1], rep(" ", indlinha-clsel))
colnomes <- c(colnomes, clsel)
if (length(resultados) > 3){
ganhos <- colMeans(resultados[,c(-1, -length(resultados))]*100)
}else{
ganhos <- mean(resultados[,c(-1, -length(resultados))]*100)
}
gainout <- rbind(gainout, c(t(ganhos), clsel))
}
indsol <- indsol + 1
}
}
# impossible
if (indsol == 0) {
message("No possible solution!")
return(invisible(NULL))
}
clonesout <- matrix(clonesout, nrow = indlinha, ncol = indsol)
clonesout <- as.data.frame(clonesout)
colnames(clonesout) <- colnomes
auxsum <- data.frame(auxsum, row.names = 1)
auxsum$MaxGain <- NA_real_
auxsum$MaxGroup <- NA_integer_
common_names <- intersect(colnames(gainout), rownames(auxsum))
for (name in common_names) {
max_val <- max(gainout[[name]], na.rm = TRUE)
auxsum[name, "MaxGain"] <- max_val
rows_with_max <- which(gainout[[name]] == max_val)
last_row <- utils::tail(rows_with_max, 1)
auxsum[name, "MaxGroup"] <- gainout$Group.Size[last_row]
}
if (ncol(clonesout) != (clmax - clmin +1)) {
message("No possible solution was found for some of the requested group sizes.")
}
gainout <- gainout[, c(ncol(gainout), 1:(ncol(gainout) - 1))]
result <- list(
gain = gainout,
selected = clonesout,
n_traits = length(traits),
n_constraints = length(const),
overview = auxsum
)
class(result) <- "polyresult"
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.