R/subsetDVI.R

Defines functions subsetDVI

Documented in subsetDVI

#' Extract a subset of a DVI dataset
#'
#' @param dvi A [dviData()] object
#' @param pm A vector with names or indices of victim samples. By
#'   default, all are included.
#' @param am A vector with names or indices of AM components. By
#'   default, components without remaining missing individuals are dropped.
#' @param missing A vector with names or indices of missing persons. By
#'   default, all missing persons in the remaining AM families are included. 
#' @param verbose A logical.
#'
#' @return A `dviData` object.
#'
#' @examples
#' 
#' subsetDVI(example2, pm = 1:2) |> plotDVI()
#' subsetDVI(example2, pm = "V1", am = 1) |> plotDVI()
#' subsetDVI(example2, missing = "M3") |> plotDVI()
#' 
#' @export
subsetDVI = function(dvi, pm = NULL, am = NULL, missing = NULL, verbose = TRUE) {
  
  if(verbose)
    cat("Reducing DVI dataset\n")
  
  dvi = consolidateDVI(dvi)
  
  pmNew = dvi$pm
  amNew = dvi$am
  missNew = dvi$missing
  
  if(!is.null(pm)) {
    pmNew = dvi$pm[pm]
    
    err = vapply(pmNew, is.null, FALSE)
    if(any(err))
      stop2("Unknown name/index of PM singleton: ", pm[err])
  }
  
  if(!is.null(am)) {
    amNew = dvi$am[am]
    
    err = vapply(amNew, is.null, FALSE)
    if(any(err))
      stop2("Unknown name/index of AM family: ", am[err])
    
    if(is.null(missing)) {
      comps = getComponent(amNew, dvi$missing, checkUnique = FALSE, errorIfUnknown = FALSE)
      NAcomp = is.na(comps)
      if(any(NAcomp)) {
        missNew = dvi$missing[!NAcomp]
        if(verbose)
          cat(sprintf("Removing %d missing person%s, keeping %d:\n %s\n", 
                      sum(NAcomp), if(sum(NAcomp)==1) "" else "s", sum(!NAcomp), toString(missNew)))
      }
    }
  }
  
  if(!is.null(missing)) {
    
    # Add names to allow subsetting by name or index
    miss0 = dvi$missing
    names(miss0) = miss0
    
    missNew = unname(miss0[missing])
    
    err = is.na(missNew)
    if(any(err))
      stop2("Unknown name/index of missing person: ", missing[err])
    
    # If AM subset not given by used, remove components without missing persons
    if(is.null(am)) {
      comps = getComponent(dvi$am, missNew, checkUnique = FALSE, errorIfUnknown = FALSE)
      if(anyNA(comps))
        stop2("Missing person not found in AM data: ", missNew[is.na(comps)])
      
      unused = setdiff(seq_along(dvi$am), comps)
      if(length(unused)) {
        if(verbose) {
          nun = length(unused)
          famNames = names(amNew)[unused] %||% unused
          cat(sprintf("Removing %d AM famil%s with no remaining missing persons: %s\n", 
                          nun, if(nun == 1) "y" else "ies", toString(famNames)))
        }
        amNew[unused] = NULL
      }
    }
  }
  
  dviNew = dviData(pmNew, amNew, missNew)
  
  # Fix pairings if they were included in original dataset
  if(!is.null(dvi$pairings)) {
    removedMiss = setdiff(dvi$missing, missNew)
    dviNew$pairings = lapply(dvi$pairings[names(pmNew)], function(v) v[!v %in% removedMiss])
  } 
    
  # PMs with no remaining pairings?
  excl = vapply(dviNew$pairings, function(v) length(v) == 1 && v == "*", 
                FUN.VALUE = FALSE)
  if(nex <- sum(excl)) {
    if(verbose)
      cat(sprintf("Removing %s PM sample%s with no remaining pairings: %s\n", 
                  nex, if(nex == 1) "" else "s", toString(names(excl)[excl])))
    dviNew$pm[excl] = dviNew$pairings[excl] = NULL
  }
  
  dviNew
}

Try the dvir package in your browser

Any scripts or data that you put into this service are public.

dvir documentation built on Sept. 11, 2024, 7:03 p.m.