R/RelPlot.R
In sequoia: Pedigree Inference from SNPs

Documented in PlotRelPairs

#' @title Plot Pairwise Relationships
#'
#' @description Plot pairwise 1st and 2nd degree relationships between
#'   individuals, similar to Colony's dyad plot.
#'
#' @param RelM square matrix with relationships between all pairs of
#'   individuals, as generated by \code{\link{GetRelM}}. Row and column names
#'   should be individual IDs.
#' @param subset.x vector with IDs to show on the x-axis; the y-axis will
#'   include all siblings, parents and grandparents of these individuals.
#' @param subset.y vector with IDs to show on the y-axis; the x-axis will
#'   include all siblings, offspring and grandoffspring of these individuals.
#'   Specify either \code{subset.x} or \code{subset.y} (or neither), not both.
#' @param drop.U  logical: omit individuals without relatives from the plot, and
#'   omit individuals without parents from the x-axis. Ignored if
#'   \code{subset.x} or \code{subset.y} is specified.
#' @param pch.symbols  logical: use different symbols for the different
#'   relationships (TRUE) or only colours in a heatmap-like fashion (FALSE).
#'   Question marks in the plot indicate that one or more of the symbols are not
#'   supported on your machine.
#' @param cex.axis  the magnification to be used for axis annotation. Decrease
#'   this value if R is dropping axis labels to prevent them from overlapping.
#' @param mar A numerical vector of the form c(bottom, left, top, right) which
#'   gives the number of lines of margin to be specified on the four sides of
#'   the plot.
#'
#' @return The subsetted, rearranged \code{RelM} is returned
#'   \code{\link{invisible}}.
#'
#'   The numbers of unique pairs of each relationship type are given in the
#'   figure legend. The number of 'self' pairs refers to the number of
#'   individuals on the x-axis, not all of whom may occur on the y-axis when
#'   \code{drop.U=TRUE} or a \code{subset} is specified.
#'
#' @details  Parents are shown above the diagonal (y-axis is parent of x-axis),
#'   siblings below the diagonal. If present, grandparents and full aunts/uncles
#'   are also shown above the diagonal. Individuals are sorted by dam ID and
#'   sire ID so that siblings are grouped together, and then by generation
#'   (\code{\link{getGenerations}}) so that later generations are closer to the
#'   origin.
#'
#'   If \code{RelM} is based on a dataframe with pairs rather than a pedigree,
#'   parents and grandparents are similarly only displayed above the diagonal,
#'   but the order of individuals is arbitrary and the ID on the x-axis is as
#'   likely to be the grandparent of the one on the y-axis as vice versa. Second
#'   degree relatives of unknown classification ('2nd', may be HS, GP or FA) are
#'   only shown below the diagonal. The switch between pedigree-based versus
#'   pairs-based is made on whether parent-offspring pairs are coded as 'M','P',
#'   'MP', 'O' (unidirectional, from pedigree) or as 'PO' (bidirectional, from
#'   pairs).
#'
#'   Note that half-avuncular and (double) full cousin pairs are ignored.
#'
#'
#' @seealso  \code{\link{GetRelM}}; \code{\link{SummarySeq}} for individual-wise
#'   graphical pedigree summaries.
#'
#' @examples
#' Rel.griffin <- GetRelM(Ped_griffin, patmat=TRUE, GenBack=2)
#' PlotRelPairs(Rel.griffin)
#'
#' \dontrun{
#' PlotRelPairs(Rel.griffin, pch.symbols = TRUE)
#' # plot with unicode symbols not supported on all platforms
#' }
#'
#' # parents & grandparents of 2008 cohort:
#' PlotRelPairs(Rel.griffin,
#'              subset.x = Ped_griffin$id[Ped_griffin$birthyear ==2008])
#' # offspring & grand-offspring of 2002 cohort:
#' PlotRelPairs(Rel.griffin,
#'              subset.y = Ped_griffin$id[Ped_griffin$birthyear ==2002])
#'
#' @export

PlotRelPairs <- function(RelM = NULL,
                         subset.x = NULL,
                         subset.y = NULL,
                         drop.U = TRUE,
                         pch.symbols = FALSE,
                         cex.axis = 0.7,
                         mar = c(5,5,1,8))
{
  RelNames <- c(M = "dam", P = "sire",  MP = "parent", FS = "full sib",
                MHS = "mat. half sib", PHS = "pat. half sib", HS = "half sib",
                GP = "grandparent", FA = "full avuncular",
                S = "self")
  Rel.compare <- names(RelNames)
  Rel.maybe <- c("PO", "FS", "HS", "GP", "FA", "2nd", "Q")  # output from GetMaybeRel():
  Rel.maybeQ <- paste0(Rel.maybe, "?")   # ^ as Pairs2 in ComparePairs():
  Rel.ignore <- c("X", "U", "O", "GO", "FN", "HA", "HN", "DFC1", "FC1", "XX", "XX?", "HA?")
  Rel.valid <- c(Rel.compare, Rel.maybe, Rel.maybeQ)

  RelNames <- c(RelNames[1:3], PO = "parent", RelNames[4:9],
                "2nd" = "2nd degree", Q = "Unclear", RelNames[10])
  RelNames <- c(RelNames, setNames(paste0(RelNames[Rel.maybe], "?"), Rel.maybeQ))


  # check if RelM valid ----
  if (nrow(RelM) != ncol(RelM))  stop("`RelM` must be a square matrix")
  if (length(intersect(rownames(RelM), colnames(RelM))) != nrow(RelM))
    stop("`RelM` must have same IDs on rows and columns")
  if (any(rownames(RelM) != colnames(RelM)))
    stop("rows and columns of `RelM` must be in the same order")

  RCM <- RelM
  RCM[is.na(RCM)] <- "X"
  RCM[RCM %in% c("MGM", "MGF", "PGM", "PGF")] <- "GP"  # simplify
  RCM[RCM %in% c("MFA", "PFA")] <- "FA"

  Rel.invalid <- setdiff(c(RCM), c(Rel.valid, Rel.ignore))
  if (length(Rel.invalid) > 0) {
    stop("Unknown value(s) in RelM : ", Rel.invalid)
  }
  if (!any(unique(c(RCM)) %in% Rel.valid)) {
    cli::cli_alert_danger("No recognised relationship abbreviations in `RelM`")
    return()
  }

  PedBased <- any(RCM %in% c("M", "P", "MP"))   # RelM derived from pedigree, rather than Pairs

  if (!is.null(subset.x) & !is.null(subset.y))
    stop("Please specify either `subset.x` or `subset.y`, not both")
  if (!is.null(subset.x) && !all(subset.x %in% rownames(RelM)))
    stop("All IDs in `subset.x` must occur as rownames in `RelM`")
  if (!is.null(subset.y) && !all(subset.y %in% rownames(RelM)))
    stop("All IDs in `subset.y` must occur as rownames in `RelM`")

  if (!is.null(subset.x) | !is.null(subset.y))  drop.U <- FALSE


  # subset levels depending on Compare GenBack and patmat / GetMaybeRel ----
  lvls <- list(GB1.no = c("MP", "FS", "HS", "S"),
               GB1.yes = c("M", "P", "FS", "MHS", "PHS", "S"),
               GB2.no = c("MP", "FS", "HS", "GP", "FA", "S"),
               GB2.yes = c("M", "P", "FS", "MHS", "PHS", "GP", "FA", "S"))

  if ("PO" %in% RCM & all(RCM %in% c(Rel.maybe, Rel.ignore))) { # output from GetMaybeRel
    RelNames.sub <- RelNames[Rel.maybe]
  } else if ("PO?" %in% RCM & all(RCM %in% c(Rel.maybeQ, Rel.ignore))) { # output from GetMaybeRel via comparePairs
    RelNames.sub <- RelNames[Rel.maybeQ]

  } else if (all(RCM %in% c(Rel.compare, Rel.maybeQ, Rel.ignore))) {   # output from comparePairs
    if (!any(RCM %in% c("M", "P", "MHS", "PHS"))) {  # patmat=FALSE
      if (!any(RCM %in% c("GP", "FA"))) {
        RelNames.sub <- RelNames[lvls[["GB1.no"]]]
      } else {
        RelNames.sub <- RelNames[lvls[["GB2.no"]]]
      }

    } else if (!any(RCM %in% c("MP", "HS"))) {   # patmat=TRUE
      if (!any(RCM %in% c("GP", "FA"))) {
        RelNames.sub <- RelNames[lvls[["GB1.yes"]]]
      } else {
        RelNames.sub <- RelNames[lvls[["GB2.yes"]]]
      }
    }
    if (any(RCM %in% Rel.maybeQ) & all(RCM %in% c(Rel.maybeQ, Rel.compare, Rel.ignore))) {
      RelNames.sub <- c(RelNames.sub, RelNames[Rel.maybeQ])
    }
  } else {
    # some custom/combination set
    RelNames.sub <- RelNames[names(RelNames) %in% RCM]   # no zero-counts.
  }


  # get generation no. for each individual ----
  if (PedBased) {   # RelM based on pedigree
    gen <- setNames(rep(9999, nrow(RCM)), rownames(RCM))
    npt <- apply(RCM, 1, function(x) sum(!is.na(factor(x, levels=c("M", "P", "MP")))))  # no. assigned parents (not maybe)
    ngt <- apply(RCM, 1, function(x) sum(x == "GP"))  # no. grandparents
    gen[npt == 0 & ngt==0] <- 0  # founders
    for (g in 0:1000) {
      npg <- apply(RCM[, gen <= g], 1, function(x) sum(!is.na(factor(x, levels=c("M", "P", "MP")))))  # no. parents
      ngg <-  apply(RCM[, gen <= g-1], 1, function(x) sum(x == "GP"))
      # both parents in generation g or before, & ll grandparents in g-1 or before
      gen[gen==9999 & npg == npt & ngg == ngt] <- g+1
      if (all(gen < 9999))  break
    }
  } else {   # RelM based on pairs
    gen <- setNames(rep(0, nrow(RCM)), rownames(RCM))
  }


  # cluster sibs together ----
  for (p in c("MP", "P", "M")) {
    id.par <- apply(RCM, 1, function(R) match(p, R))
#    id.par.2 <- apply(RCM, 2, function(R) match(p, R))    # if matrix no longer square
    RCM <- RCM[order(id.par), order(id.par)]
  }


  # order individuals by generation ----
  RCM <- RCM[order(-gen[rownames(RCM)]), order(-gen[colnames(RCM)])]


  # drop pairs included twice ----
  RCM[upper.tri(RCM) & RCM %in% c("FS", "HS", "MHS", "PHS")] <- "zzz"
  if (!PedBased) {
    RCM[lower.tri(RCM) & RCM %in% c("PO", "PO?", "GP", "GP?", "FA", "FA?","Q", "Q?")] <- "zzz"
    RCM[upper.tri(RCM) & RCM %in% c("2nd", "2nd?")] <- "zzz"
  }


  # subsets ----
  if (!is.null(subset.x)) {
    sub.x <- rownames(RCM) %in% as.character(subset.x)   # don't change order
    nri.sub <- apply(RCM[sub.x, ], 2, function(x)
      sum(!is.na(factor(x, levels=names(RelNames.sub)))))
    RCM <- RCM[sub.x, nri.sub >1]   # rows become x-axis

  } else if (!is.null(subset.y)) {
    sub.y <- rownames(RCM) %in% as.character(subset.y)
    nri.sub <- apply(RCM[, sub.y], 1, function(x)
      sum(!is.na(factor(x, levels=names(RelNames.sub)))))
    RCM <- RCM[nri.sub >0, sub.y]
  }

  # drop individuals without relatives ----
  if (drop.U) {
    nri <- apply(RCM, 2, function(x) sum(!is.na(factor(x, levels=names(RelNames.sub)))))  # no. relatives
    nai <- apply(RCM, 1, function(x) sum(!is.na(factor(x, levels=c("M", "P", "MP", "GP")))))  # has ancestors
    if (any(RCM %in% c("M", "P", "MP"))) {   # RelM based on pedigree
      RCM <- RCM[nai>0, nri>1]  # nri==1: only relative = self ; has at least 1 ancestor
    } else {  # RelM based on pairs
      RCM <- RCM[nri>0, nri>1]
    }
  }


  # plotting symbols & colours -----
  PCH <- c(M = 16,  # circle
           P = 15,  # square
           MP = -9688,  # square w circle
           PO = -9688,
           FS = 23,  # diamond
           MHS = 24, # triangle up
           PHS = 25, # triangle down
           HS = -9658,  # triangle right-pointing
           GP = -9733,  # filled star
           FA = -9734,  # open star
#           HA = -9734,
           "2nd" = -9668,   # triangle left-pointing
           Q = 4,   # cross
           S = 8)   # asterisk

  COL <- c(M = "firebrick",
           P = "darkblue",
           MP = "purple4",
           PO = "purple4",
           FS = "green4",
           MHS = "orchid",
           PHS = "steelblue3",
           HS = "cyan3",
           GP = "goldenrod2",
           FA = "chocolate4",
#           HA = "chocolate2",
           "2nd" = "wheat3",
           Q = "wheat1",
           S = "darkgrey")

  if (any(RCM %in% Rel.maybeQ)) {
    PCH <- c(PCH, setNames(PCH, paste0(names(PCH), "?")))   # same symbols
    COLQ <- grDevices::adjustcolor(COL, alpha.f=0.5)  # lower opacity colours
    COL <- c(COL, setNames(COLQ, paste0(names(COL), "?")))
  }

  # dataframe w all plotting specs per relationship ----
  RelDF <- as.DF(RelNames.sub, colnames=c("Rel", "NAME"))
  RelDF$n <- table(factor(RCM, levels = RelDF$Rel))
  if (drop.U | !is.null(subset.x) | !is.null(subset.y)) {  # matrix non-square
    RelDF$n[RelDF$Rel=="S"] <- nrow(RCM)  # no. individuals on x-axis
  }
  RelDF$LEG <- with(RelDF, paste0(NAME, "\n(", n, ")"))   # for legend

  RelDF$Rank <- seq_along(RelDF$Rel)   # to undo order change by merge()
  RelDF <- merge(RelDF, as.DF(COL, colnames=c("Rel", "COL")), all.x=TRUE)
  RelDF$BG <- grDevices::adjustcolor(RelDF$COL, alpha.f=0.5)
  RelDF$BG[RelDF$Rel == "FS"] <- RelDF$COL[RelDF$Rel == "FS"]
  RelDF <- merge(RelDF, as.DF(PCH, colnames=c("Rel", "PCH")))
  RelDF <- RelDF[order(RelDF$Rank, decreasing=FALSE), ]

  # set up plot ----
  if (pch.symbols) {
    xp <- seq_len(nrow(RCM))
    yp <- seq_len(ncol(RCM))
  } else {
    xp <- seq(0, 1, length.out = nrow(RCM))
    yp <- seq(0, 1, length.out = ncol(RCM))
  }

  oldpar <- par(no.readonly = TRUE)
  oldpar <- oldpar[!names(oldpar) %in% c("pin", "fig")]   # current plot dimensions, not setable. bug?
  par(mfcol=c(1,1), mar=mar)

  OK <- tryPlot(plot,
                1,1, las=1, type="n", xlim=range(xp), ylim=range(yp),
                xlab = "", ylab="", axes=FALSE, frame.plot=TRUE,
#                ErrMsg = "PlotRelPairs: Plotting area too small",
                oldpar = oldpar)
#   if (!OK)  return()     # not possible to do invisible() return halfway ?
  if (OK) {
    axis(side=1, at=xp, labels=rownames(RCM), las=2, cex.axis=cex.axis)
    axis(side=2, at=yp, labels=colnames(RCM), las=2, cex.axis=cex.axis)
    abline(h=yp, col="lightgrey")
    abline(v=xp, col="lightgrey")

    # actual plotting ----
    if (pch.symbols) {
      for (r in rev(seq.int(nrow(RelDF)))) {
        these <- which(RCM == RelDF$Rel[r], arr.ind=TRUE)
        if (nrow(these) == 0)  next
        with(RelDF, points(these, pch = ifelse(PCH[r]>=0, PCH[r], intToUtf8(-PCH[r])),
                           col = COL[r], bg=BG[r], cex=1.1, lwd=ifelse(PCH>0, 2, 1)))
      }
      with(RelDF, legend(x=1.08*max(xp), y=max(yp), legend=LEG, col = COL, pt.bg=BG,
                         pch=PCH, pt.cex=1.5, pt.lwd=2, xpd = NA, y.intersp=2))
    } else {
      RCM.f <- matrix( as.numeric( factor(RCM, levels=RelDF$Rel)),
                       nrow(RCM))
      image(RCM.f, xaxt="n", yaxt="n", breaks=seq.int(nrow(RelDF)+1) -.5,
            col=RelDF$COL, add=TRUE)

      with(RelDF, legend(x=1.08, y=1, legend=LEG,
                         fill = COL, pt.cex=1.5, xpd = NA, y.intersp=2))
    }
    par(oldpar)
  }

  invisible(RCM)  # subsetted RelM
}