R/filterPairs.R

Defines functions filterPairs

Documented in filterPairs

#' Filters kinship values from a long-format kinship table based on the sexes
#'  of the two animals involved.
## Copyright(c) 2017-2020 R. Mark Sharp
## This file is part of nprcgenekeepr
#'
#' Part of Group Formation
#'
#' @return A dataframe representing a filtered long-format kinship table.
#'
#' @examples
#' \donttest{
#' library(nprcgenekeepr)
#' ped <- nprcgenekeepr::lacy1989Ped
#' ped$gen <- findGeneration(ped$id, ped$sire, ped$dam)
#' kmat <- kinship(ped$id, ped$sire, ped$dam, ped$gen)
#' kin <- kinMatrix2LongForm(kmat, rm.dups = FALSE)
#' threshold <- 0.1
#' kin <- filterThreshold(kin, threshold = threshold)
#' ped$sex <- c("M", "F", "M", "M", "F", "F", "M")
#' kinNull <- filterPairs(kin, ped, ignore = NULL)
#' kinMM <- filterPairs(kin, ped, ignore = list(c("M", "M")))
#' ped
#' kin[kin$id1 == "C", ]
#' kinMM[kinMM$id1 == "C", ]
#' }
#'
#' @param kin a dataframe with columns \code{id1}, \code{id2}, and
#' \code{kinship}. This is the kinship data reformatted from a matrix,
#' to a long-format table.
#' @param ped Dataframe of pedigree information including the IDs listed in
#' \code{candidates}.
#' @param ignore a list containing zero or more character vectors of length 2
#' indicating which sex pairs should be ignored with regard to kinship.
#' Defaults to \code{list(c("F", "F"))}.
#' @export
filterPairs <- function(kin, ped, ignore = list(c("F", "F"))) {

  if (length(ignore) == 0) {
    return(kin)
  }
  kin["sort.col"] <- seq_len(nrow(kin))

  g1 <- merge(kin, ped, by.x = "id1", by.y = "id", all.x = TRUE, all.y = FALSE)
  g2 <- merge(kin, ped, by.x = "id2", by.y = "id", all.x = TRUE, all.y = FALSE)

  g1 <- g1[with(g1, order(sort.col)), "sex"]
  g2 <- g2[with(g2, order(sort.col)), "sex"]

  keep <- rep(TRUE, length(g1))

  for (i in seq_len(length(ignore))) {
    rel <- ignore[[i]]
    k <- !(((g1 == rel[1]) &
               (g2 == rel[2])) |
             ((g1 == rel[2]) &
                (g2 == rel[1])))
    keep <- keep & k
  }
  kin$sort.col <- NULL
  kin <- kin[keep, ]
  if (nrow(kin) > 0)
    rownames(kin) <- seq_len(nrow(kin))
  return(kin[!is.na(kin[[1]]), ])
}
rmsharp/nprcmanager documentation built on April 24, 2021, 3:13 p.m.