R/summarizeKinshipValues.R

Defines functions summarizeKinshipValues

Documented in summarizeKinshipValues

#' Summary statistics for imputed kinship values
#'
#' Makes a data.frame object containing simulated kinship summary statistics
#' using the counts of kinship values list from \code{countKinshipValues}.
#'
#' @return a data.frame with one row of summary statistics for each imputed
#' kinship value. The columns are as follows:
#'  \code{id_1},
#'  \code{id_2},
#'  \code{min},
#'  \code{secondQuartile},
#'  \code{mean},
#'  \code{median},
#'  \code{thirdQuartile},
#'  \code{max}, and
#'  \code{sd}.
#'
#' @param countedKValues list object from countKinshipValues function that
#' containes the lists \code{kinshipIds}, \code{kinshipValues},
#' and \code{kinshipCounts}.
#' @importFrom stats fivenum sd
#' @export
#' @examples
#' ped <- nprcgenekeepr::smallPed
#' simParent_1 <- list(id = "A",
#'                     sires = c("s1_1", "s1_2", "s1_3"),
#'                     dams = c("d1_1", "d1_2", "d1_3", "d1_4"))
#' simParent_2 <- list(id = "B",
#'                     sires = c("s1_1", "s1_2", "s1_3"),
#'                     dams = c("d1_1", "d1_2", "d1_3", "d1_4"))
#' simParent_3 <- list(id = "E",
#'                     sires = c("A", "C", "s1_1"),
#'                     dams = c("d3_1", "B"))
#' simParent_4 <- list(id = "J",
#'                     sires = c("A", "C", "s1_1"),
#'                     dams = c("d3_1", "B"))
#' simParent_5 <- list(id = "K",
#'                     sires = c("A", "C", "s1_1"),
#'                     dams = c("d3_1", "B"))
#' simParent_6 <- list(id = "N",
#'                     sires = c("A", "C", "s1_1"),
#'                     dams = c("d3_1", "B"))
#' allSimParents <- list(simParent_1, simParent_2, simParent_3,
#'                       simParent_4, simParent_5, simParent_6)
#'
#' extractKinship <- function(simKinships, id1, id2, simulation) {
#'   ids <- dimnames(simKinships[[simulation]])[[1]]
#'   simKinships[[simulation]][seq_along(ids)[ids == id1],
#'                             seq_along(ids)[ids == id2]]
#' }
#'
#' extractKValue <- function(kValue, id1, id2, simulation) {
#'   kValue[kValue$id_1 ==  id1 & kValue$id_2 == id2,
#'          paste0("sim_", simulation)]
#' }
#'
#' n <- 10
#' simKinships <- createSimKinships(ped, allSimParents, pop = ped$id, n = n)
#' kValues <- kinshipMatricesToKValues(simKinships)
#' extractKValue(kValues, id1 = "A", id2 = "F", simulation = 1:n)
#' counts <- countKinshipValues(kValues)
#' stats <- summarizeKinshipValues(counts)
summarizeKinshipValues <- function(countedKValues) {
  if (!all(is.element(names(countedKValues), c("kIds", "kValues",
                                               "kCounts"))))
    stop("summarizeKinshipValues received wrong object")
  stats <- data.frame()

  for (i in seq_along(countedKValues$kIds)) {
    numbers <- rep(unlist(countedKValues$kValues[i]),
                   unlist(countedKValues$kCounts[i]))
    if (any(is.na(numbers), is.na(mean(numbers))))
      cat(paste0("i = ", i))
    tukeys <- fivenum(numbers)
    stats <- rbind(
      stats,
      data.frame(
        id_1 = countedKValues$kIds[[i]][1],
        id_2 = countedKValues$kIds[[i]][2],
        min = tukeys[1],
        secondQuartile = tukeys[1],
        mean = mean(numbers),
        median = tukeys[3],
        thirdQuartile = tukeys[4],
        max = tukeys[5],
        sd = sd(numbers)
      )
    )
  }
  stats
}
rmsharp/nprcmanager documentation built on Feb. 2, 2025, 12:45 a.m.