R/buildComponent.R

Defines functions .loadOrComputeParList .loadOrComputeInverseDiagonal .loadOrComputeIsChild .loadOrComputeIsPar loadOrComputeCheckpoint .assignParentValue initializeCheckpoint .computeTranspose ped2ce ped2cn ped2mt ped2add ped2com

Documented in .assignParentValue .computeTranspose initializeCheckpoint loadOrComputeCheckpoint .loadOrComputeInverseDiagonal .loadOrComputeIsChild .loadOrComputeIsPar .loadOrComputeParList ped2add ped2ce ped2cn ped2com ped2mt

#' Take a pedigree and turn it into a relatedness matrix
#' @param ped a pedigree dataset.  Needs ID, momID, and dadID columns
#' @param component character.  Which component of the pedigree to return.  See Details.
#' @param max.gen the maximum number of generations to compute
#'  (e.g., only up to 4th degree relatives).  The default is 25. However it can be set to infinity.
#'   `Inf` uses as many generations as there are in the data.
#' @param sparse logical.  If TRUE, use and return sparse matrices from Matrix package
#' @param verbose logical.  If TRUE, print progress through stages of algorithm
#' @param update_rate numeric. The rate at which to print progress
#' @param gc logical. If TRUE, do frequent garbage collection via \code{\link{gc}} to save memory
#' @param saveable logical. If TRUE, save the intermediate results to disk
#' @param save_rate numeric. The rate at which to save the intermediate results
#' @param save_rate_gen  numeric. The rate at which to save the intermediate results by generation. If NULL, defaults to save_rate
#' @param save_rate_parlist numeric. The rate at which to save the intermediate results by parent list. If NULL, defaults to save_rate*1000
#' @param resume logical. If TRUE, resume from a checkpoint
#' @param save_path character. The path to save the checkpoint files
#' @param flatten.diag logical. If TRUE, overwrite the diagonal of the final relatedness matrix with ones
#' @param standardize.colnames logical. If TRUE, standardize the column names of the pedigree dataset
#' @param transpose_method character. The method to use for computing the transpose.  Options are "tcrossprod", "crossprod", or "star"
#' @param adjacency_method character. The method to use for computing the adjacency matrix.  Options are "loop", "indexed", direct or beta
#' @param isChild_method character. The method to use for computing the isChild matrix.  Options are "classic" or "partialparent"
#' @param adjBeta_method numeric The method to use for computing the building the adjacency_method matrix when using the "beta" build
#' @param ... additional arguments to be passed to \code{\link{ped2com}}
#' @details The algorithms and methodologies used in this function are further discussed and exemplified in the vignette titled "examplePedigreeFunctions". For more advanced scenarios and detailed explanations, consult this vignette.
#' @export
#'
ped2com <- function(ped, component,
                    max.gen = 25,
                    sparse = TRUE,
                    verbose = FALSE,
                    gc = FALSE,
                    flatten.diag = FALSE,
                    standardize.colnames = TRUE,
                    transpose_method = "tcrossprod",
                    adjacency_method = "direct",
                    isChild_method = "classic",
                    saveable = FALSE,
                    resume = FALSE,
                    save_rate = 5,
                    save_rate_gen = save_rate,
                    save_rate_parlist = 100000 * save_rate,
                    update_rate = 100,
                    save_path = "checkpoint/",
                    adjBeta_method = NULL,
                    ...) {
  #------
  # Check inputs
  #------

  config <- list(
    verbose = verbose,
    saveable = saveable,
    resume = resume,
    save_path = save_path,
    max.gen = max.gen,
    sparse = sparse,
    flatten.diag = flatten.diag,
    standardize.colnames = standardize.colnames,
    transpose_method = transpose_method,
    adjacency_method = adjacency_method,
    isChild_method = isChild_method,
    save_rate = save_rate,
    save_rate_gen = save_rate_gen,
    save_rate_parlist = save_rate_parlist,
    update_rate = update_rate,
    gc = gc,
    component = component,
    adjBeta_method = adjBeta_method,
    nr = nrow(ped)
  )


  #------
  # Checkpointing
  #------
  if (config$saveable || config$resume) { # prepare checkpointing
    if (config$verbose) cat("Preparing checkpointing...\n")

    checkpoint_files <- initializeCheckpoint(config) # initialize checkpoint files
  }
  #------
  # Validation/Preparation
  #------

  # Validate the 'component' argument and match it against predefined choices
  config$component <- match.arg(tolower(config$component),
    choices = c(
      "generation",
      "additive",
      "common nuclear",
      "mitochondrial",
      "mtdna", "mitochondria"
    )
  )

  transpose_method_options <- c(
    "tcrossprod", "crossprod", "star",
    "tcross.alt.crossprod", "tcross.alt.star"
  )
  if (!config$transpose_method %in% transpose_method_options) {
    stop(paste0(
      "Invalid method specified. Choose from ",
      paste(transpose_method_options, collapse = ", "), "."
    ))
  }


  if (!config$adjacency_method %in% c("indexed", "loop", "direct", "beta")) {
    stop("Invalid method specified. Choose from 'indexed', 'loop', 'direct', or 'beta'.")
  }

  # standardize colnames
  if (config$standardize.colnames) {
    ped <- standardizeColnames(ped, verbose = config$verbose)
  }

  # Load final result if computation was completed
  if (config$resume && file.exists(checkpoint_files$final_matrix)) {
    if (config$verbose) cat("Loading final computed matrix...\n")
    return(readRDS(checkpoint_files$final_matrix))
  }


  #------
  # Algorithm
  #------

  # Get the number of rows in the pedigree dataset, representing the size of the family
  #  nr <- nrow(ped)

  # Print the family size if verbose is TRUE
  if (config$verbose) {
    cat(paste0("Family Size = ", config$nr, "\n"))
  }

  # Step 1: Construct parent-child adjacency matrix

  ## A. Resume from Checkpoint if Needed
  ## Initialize variables
  list_of_adjacencies <- .loadOrComputeParList(
    checkpoint_files = checkpoint_files,
    ped = ped,
    config = config
  )


  ## B. Resume loop from the next uncomputed index


  # Construct sparse matrix
  # Garbage collection if gc is TRUE
  if (config$gc) {
    gc()
  }

  # Assign parent values based on the component type
  parVal <- .assignParentValue(component = config$component)

  # Construct sparse matrix
  # Initialize adjacency matrix for parent-child relationships
  isPar <- .loadOrComputeIsPar(
    iss = list_of_adjacencies$iss,
    jss = list_of_adjacencies$jss,
    parVal = parVal,
    ped = ped,
    checkpoint_files = checkpoint_files,
    config = config
  )
  if (config$verbose) {
    cat("Completed first degree relatives (adjacency)\n")
  }

  # isPar is the adjacency matrix.  'A' matrix from RAM

  if (config$component %in% c("common nuclear")) {
    Matrix::diag(isPar) <- 1
    if (config$sparse == FALSE) {
      isPar <- as.matrix(isPar)
    }
    return(isPar)
  }

  # isChild is the 'S' matrix from RAM
  isChild <- .loadOrComputeIsChild(
    ped = ped,
    checkpoint_files = checkpoint_files,
    config = config
  )
  # --- Step 2: Compute Relatedness Matrix ---


  if (config$resume && file.exists(checkpoint_files$r_checkpoint) && file.exists(checkpoint_files$gen_checkpoint) && file.exists(checkpoint_files$mtSum_checkpoint) && file.exists(checkpoint_files$newIsPar_checkpoint) &&
    file.exists(checkpoint_files$count_checkpoint)
  ) {
    if (config$verbose) cat("Resuming: Loading previous computation...\n")
    r <- readRDS(checkpoint_files$r_checkpoint)
    gen <- readRDS(checkpoint_files$gen_checkpoint)
    mtSum <- readRDS(checkpoint_files$mtSum_checkpoint)
    newIsPar <- readRDS(checkpoint_files$newIsPar_checkpoint)
    count <- readRDS(checkpoint_files$count_checkpoint)
  } else {
    r <- Matrix::Diagonal(x = 1, n = config$nr)
    gen <- rep(1, config$nr)
    mtSum <- sum(r, na.rm = TRUE)
    newIsPar <- isPar
    count <- 0
  }
  maxCount <- config$max.gen + 1
  if (config$verbose) {
    cat("About to do RAM path tracing\n")
  }

  # r is I + A + A^2 + ... = (I-A)^-1 from RAM
  # could trim, here
  while (mtSum != 0 && count < maxCount) {
    r <- r + newIsPar
    gen <- gen + (Matrix::rowSums(newIsPar) > 0)
    newIsPar <- newIsPar %*% isPar
    mtSum <- sum(newIsPar)
    count <- count + 1
    if (config$verbose) {
      cat(paste0("Completed ", count - 1, " degree relatives\n"))
    }
    # Save progress every save_rate iterations
    if (config$saveable && (count %% save_rate_gen == 0)) {
      saveRDS(r, file = checkpoint_files$r_checkpoint)
      saveRDS(gen, file = checkpoint_files$gen_checkpoint)
      saveRDS(newIsPar, file = checkpoint_files$newIsPar_checkpoint)
      saveRDS(mtSum, file = checkpoint_files$mtSum_checkpoint)
      saveRDS(count, file = checkpoint_files$count_checkpoint)
    }
    if (config$gc && config$nr > 1000000) {
      gc()
    } # extra gc if large
  }
  # compute rsq <- r %*% sqrt(diag(isChild))
  # compute rel <- tcrossprod(rsq)
  if (config$gc) {
    rm(isPar, newIsPar)
    gc()
  }
  if (config$saveable) {
    saveRDS(r, file = checkpoint_files$ram_checkpoint)
  }

  if (config$component == "generation") { # no need to do the rest
    return(gen)
  } else {
    if (config$verbose) {
      cat("Completed RAM path tracing\n")
    }
  }

  # --- Step 3: I-A inverse times diagonal multiplication ---
  r2 <- .loadOrComputeInverseDiagonal(
    r = r,
    isChild = isChild,
    checkpoint_files = checkpoint_files,
    config = config
  )

  # --- Step 4: T crossproduct  ---

  if (config$resume && file.exists(checkpoint_files$tcrossprod_checkpoint) && config$component != "generation") {
    if (config$verbose) cat("Resuming: Loading tcrossprod...\n")
    r <- readRDS(checkpoint_files$tcrossprod_checkpoint)
  } else {
    r <- .computeTranspose(r2 = r2, transpose_method = transpose_method, verbose = config$verbose)
    if (config$saveable) {
      saveRDS(r, file = checkpoint_files$tcrossprod_checkpoint)
    }
  }

  if (config$component %in% c("mitochondrial", "mtdna", "mitochondria")) {
    r@x <- rep(1, length(r@x))
    # Assign 1 to all nonzero elements for mitochondrial component
  }

  if (config$sparse == FALSE) {
    r <- as.matrix(r)
  }
  if (config$flatten.diag) { # flattens diagonal if you don't want to deal with inbreeding
    diag(r) <- 1
  }
  if (config$saveable) {
    saveRDS(r, file = checkpoint_files$final_matrix)
  }
  return(r)
}

#' Take a pedigree and turn it into an additive genetics relatedness matrix
#' @inheritParams ped2com
#' @inherit ped2com details
#' @export
#'
ped2add <- function(ped, max.gen = 25, sparse = TRUE, verbose = FALSE,
                    gc = FALSE,
                    flatten.diag = FALSE, standardize.colnames = TRUE,
                    transpose_method = "tcrossprod",
                    adjacency_method = "direct",
                    saveable = FALSE,
                    resume = FALSE,
                    save_rate = 5,
                    save_rate_gen = save_rate,
                    save_rate_parlist = 100000 * save_rate,
                    save_path = "checkpoint/",
                    ...) {
  ped2com(
    ped = ped,
    max.gen = max.gen,
    sparse = sparse,
    verbose = verbose,
    gc = gc,
    component = "additive",
    flatten.diag = flatten.diag,
    standardize.colnames = standardize.colnames,
    transpose_method = transpose_method,
    adjacency_method = adjacency_method,
    saveable = saveable,
    resume = resume,
    save_rate_gen = save_rate_gen,
    save_rate_parlist = save_rate_parlist,
    save_path = save_path
  )
}

#' Take a pedigree and turn it into a mitochondrial relatedness matrix
#' @inheritParams ped2com
#' @inherit ped2com details
#' @export
#' @aliases ped2mt
#'
ped2mit <- ped2mt <- function(ped, max.gen = 25,
                              sparse = TRUE,
                              verbose = FALSE, gc = FALSE,
                              flatten.diag = FALSE,
                              standardize.colnames = TRUE,
                              transpose_method = "tcrossprod",
                              adjacency_method = "direct",
                              saveable = FALSE,
                              resume = FALSE,
                              save_rate = 5,
                              save_rate_gen = save_rate,
                              save_rate_parlist = 100000 * save_rate,
                              save_path = "checkpoint/",
                              ...) {
  ped2com(
    ped = ped,
    max.gen = max.gen,
    sparse = sparse,
    verbose = verbose,
    gc = gc,
    component = "mitochondrial",
    flatten.diag = flatten.diag,
    standardize.colnames = standardize.colnames,
    transpose_method = transpose_method,
    adjacency_method = adjacency_method,
    saveable = saveable,
    resume = resume,
    save_rate_gen = save_rate_gen,
    save_rate_parlist = save_rate_parlist,
    save_path = save_path,
    ...
  )
}

#' Take a pedigree and turn it into a common nuclear environmental relatedness matrix
#' @inheritParams ped2com
#' @inherit ped2com details
#' @export
#'
ped2cn <- function(ped, max.gen = 25, sparse = TRUE, verbose = FALSE,
                   gc = FALSE, flatten.diag = FALSE,
                   standardize.colnames = TRUE,
                   transpose_method = "tcrossprod",
                   saveable = FALSE,
                   resume = FALSE,
                   save_rate = 5,
                   adjacency_method = "direct",
                   save_rate_gen = save_rate,
                   save_rate_parlist = 1000 * save_rate,
                   save_path = "checkpoint/",
                   ...) {
  ped2com(
    ped = ped,
    max.gen = max.gen,
    sparse = sparse,
    verbose = verbose,
    gc = gc,
    component = "common nuclear",
    adjacency_method = adjacency_method,
    flatten.diag = flatten.diag,
    standardize.colnames = standardize.colnames,
    transpose_method = transpose_method,
    saveable = saveable,
    resume = resume,
    save_rate_gen = save_rate_gen,
    save_rate_parlist = save_rate_parlist,
    save_path = save_path,
    ...
  )
}
#' Take a pedigree and turn it into an extended environmental relatedness matrix
#' @inheritParams ped2com
#' @inherit ped2com details
#' @export
#'
ped2ce <- function(ped, ...) {
  matrix(1, nrow = nrow(ped), ncol = nrow(ped), dimnames = list(ped$ID, ped$ID))
}


#' Compute the transpose multiplication for the relatedness matrix
#' @inheritParams ped2com
#' @inherit ped2com details
#' @param r2 a relatedness matrix
#'
.computeTranspose <- function(r2, transpose_method = "tcrossprod", verbose = FALSE) {
  valid_methods <- c(
    "tcrossprod", "crossprod", "star",
    "tcross.alt.crossprod", "tcross.alt.star"
  )
  if (!transpose_method %in% valid_methods) {
    stop("Invalid method specified. Choose from 'tcrossprod', 'crossprod', 'star', 'tcross.alt.crossprod', or 'tcross.alt.star'.")
  }

  # Map aliases to core methods
  alias_map <- c(
    "tcross.alt.crossprod" = "crossprod",
    "tcross.alt.star" = "star"
  )

  if (transpose_method %in% names(alias_map)) {
    method_normalized <- alias_map[[transpose_method]]
  } else {
    method_normalized <- transpose_method
  }

  result <- switch(method_normalized,
    "tcrossprod" = {
      if (verbose) cat("Doing tcrossprod\n")
      Matrix::tcrossprod(r2)
    },
    "crossprod" = {
      if (verbose) cat("Doing tcrossprod using crossprod(t(.))\n")
      crossprod(t(as.matrix(r2)))
    },
    "star" = {
      if (verbose) cat("Doing tcrossprod using %*% t(.)\n")
      r2 %*% t(as.matrix(r2))
    }
  )

  return(result)
}

#' Initialize checkpoint files
#' @inheritParams ped2com
#' @keywords internal

initializeCheckpoint <- function(config = list(
                                   verbose = FALSE,
                                   saveable = FALSE,
                                   resume = FALSE,
                                   save_path = "checkpoint/"
                                 )) {
  # Define checkpoint files
  # Ensure save path exists
  if (config$saveable && !dir.exists(config$save_path)) {
    if (config$verbose) cat("Creating save path...\n")
    dir.create(config$save_path, recursive = TRUE)
  } else if (config$resume && !dir.exists(config$save_path)) {
    stop("Cannot resume from checkpoint. Save path does not exist.")
  }

  checkpoint_files <- list(
    parList = file.path(config$save_path, "parList.rds"),
    lens = file.path(config$save_path, "lens.rds"),
    isPar = file.path(config$save_path, "isPar.rds"),
    iss = file.path(config$save_path, "iss.rds"),
    jss = file.path(config$save_path, "jss.rds"),
    isChild = file.path(config$save_path, "isChild.rds"),
    r_checkpoint = file.path(config$save_path, "r_checkpoint.rds"),
    gen_checkpoint = file.path(config$save_path, "gen_checkpoint.rds"),
    newIsPar_checkpoint = file.path(config$save_path, "newIsPar_checkpoint.rds"),
    mtSum_checkpoint = file.path(config$save_path, "mtSum_checkpoint.rds"),
    ram_checkpoint = file.path(config$save_path, "ram_checkpoint.rds"),
    r2_checkpoint = file.path(config$save_path, "r2_checkpoint.rds"),
    tcrossprod_checkpoint = file.path(config$save_path, "tcrossprod_checkpoint.rds"),
    count_checkpoint = file.path(config$save_path, "count_checkpoint.rds"),
    final_matrix = file.path(config$save_path, "final_matrix.rds")
  )

  return(checkpoint_files)
}

#' Assign parent values based on component type
#' @inheritParams ped2com
.assignParentValue <- function(component) {
  # Set parent values depending on the component type
  if (component %in% c("generation", "additive")) {
    parVal <- .5
  } else if (component %in% c("common nuclear", "mitochondrial", "mtdna", "mitochondria")) {
    parVal <- 1
  } else {
    stop("Don't know how to set parental value")
  }
  return(parVal)
}

#' Load or compute a checkpoint
#' @param file The file path to load the checkpoint from.
#' @param compute_fn The function to compute the checkpoint if it doesn't exist.
#' @param config A list containing configuration parameters such as `resume`, `verbose`, and `saveable`.
#' @param message_resume Optional message to display when resuming from a checkpoint.
#' @param message_compute Optional message to display when computing the checkpoint.
#' @return The loaded or computed checkpoint.
#' @keywords internal
loadOrComputeCheckpoint <- function(file, compute_fn, config, message_resume = NULL, message_compute = NULL) {
  if (config$resume && file.exists(file)) {
    if (config$verbose && !is.null(message_resume)) cat(message_resume)
    return(readRDS(file))
  } else {
    if (config$verbose && !is.null(message_compute)) cat(message_compute)
    result <- compute_fn()
    if (config$saveable) saveRDS(result, file = file)
    return(result)
  }
}

#' Load or compute the isPar matrix
#' @inheritParams loadOrComputeCheckpoint
#' @inheritParams ped2com
#' @param iss The row indices of the sparse matrix.
#' @param jss The column indices of the sparse matrix.
#' @param parVal The value to assign to the non-zero elements of the sparse matrix.
#' @param ped The pedigree dataset.
#' @param checkpoint_files A list of checkpoint file paths.
#'
#' @keywords internal
#' @importFrom Matrix sparseMatrix
.loadOrComputeIsPar <- function(iss, jss, parVal, ped, checkpoint_files, config) {
  isPar <- loadOrComputeCheckpoint(
    file = checkpoint_files$isPar,
    compute_fn = function() {
      Matrix::sparseMatrix(
        i = iss, j = jss, x = parVal,
        dims = c(config$nr, config$nr),
        dimnames = list(ped$ID, ped$ID)
      )
    },
    config = config,
    message_resume = "Resuming: Loading adjacency matrix...\n",
    message_compute = "Initializing adjacency matrix...\n"
  )

  return(isPar)
}

#' Load or compute the isChild matrix
#' @inheritParams loadOrComputeCheckpoint
#' @inheritParams ped2com
#' @param checkpoint_files A list of checkpoint file paths.
#'
#'  @keywords internal

.loadOrComputeIsChild <- function(ped, checkpoint_files, config) {
  isChild <- loadOrComputeCheckpoint(
    file = checkpoint_files$isChild,
    compute_fn = function() isChild(isChild_method = config$isChild_method, ped = ped),
    config = config,
    message_resume = "Resuming: Loading isChild matrix...\n",
    message_compute = "Computing isChild matrix...\n"
  )

  return(isChild)
}

#' Load or compute the inverse diagonal matrix
#' @inheritParams loadOrComputeCheckpoint
#' @inheritParams ped2com
#' @param r The relatedness matrix.
#' @importFrom Matrix Diagonal
#' @keywords internal
#' @return The computed inverse diagonal matrix.


.loadOrComputeInverseDiagonal <- function(r, isChild, checkpoint_files, config) {
  r2 <- loadOrComputeCheckpoint(
    file = checkpoint_files$r2_checkpoint,
    compute_fn = function() {
      r %*% Matrix::Diagonal(x = sqrt(isChild), n = config$nr)
    },
    config = config,
    message_resume = "Resuming: Loading I-A inverse...\n",
    message_compute = "Doing I-A inverse times diagonal multiplication\n"
  )
  if (config$gc) {
    rm(r, isChild)
    gc()
  }
  return(r2)
}



#' parent-child adjacency data
#' @inheritParams loadOrComputeCheckpoint
#' @inheritParams ped2com
#' @param checkpoint_files A list of checkpoint file paths.
#' @param config A list containing configuration parameters such as `resume`, `verbose`, and `saveable`.
#' @param parList A list of parent-child adjacency data.
#' @param lens A vector of lengths for each parent-child relationship.
#' @keywords internal

#' @return A list containing the parent-child adjacency data either loaded from a checkpoint or initialized.
#'

.loadOrComputeParList <- function(checkpoint_files, config, ped = NULL, parList = NULL, lens = NULL) {
  if (config$resume && file.exists(checkpoint_files$parList) && file.exists(checkpoint_files$lens)) {
    if (config$verbose) cat("Resuming: Loading parent-child adjacency data...\n")
    parList <- readRDS(checkpoint_files$parList)
    lens <- readRDS(checkpoint_files$lens)
    computed_indices <- which(!sapply(parList, is.null))
    lastComputed <- if (length(computed_indices) > 0) max(computed_indices) else 0
    if (config$verbose) cat("Resuming from iteration", lastComputed + 1, "\n")
  } else {
    ## Initialize variables
    parList <- vector("list", config$nr)
    lens <- integer(config$nr)
    lastComputed <- 0
    if (config$verbose) cat("Building parent adjacency matrix...\n")
  }

  if (config$resume && file.exists(checkpoint_files$iss) && file.exists(checkpoint_files$jss)) { # fix to check actual
    if (config$verbose) message("Resuming: Constructed matrix...\n")
    jss <- readRDS(checkpoint_files$jss)
    iss <- readRDS(checkpoint_files$iss)
    list_of_adjacencies <- list(iss = iss, jss = jss)
  } else {
    if (config$verbose) message("Computing parent-child adjacency matrix...\n")
    list_of_adjacencies <- computeParentAdjacency(
      ped = ped,
      save_rate_parlist = config$save_rate_parlist,
      checkpoint_files = checkpoint_files,
      component = config$component,
      adjacency_method = config$adjacency_method, # adjacency_method,
      saveable = config$saveable,
      resume = config$resume,
      save_path = config$save_path,
      update_rate = config$update_rate,
      verbose = config$verbose,
      lastComputed = lastComputed,
      config = config,
      parList = parList,
      lens = lens,
      adjBeta_method = config$adjBeta_method
    )

    # Construct sparse matrix


    if (config$verbose) {
      message("Constructed sparse matrix\n")
    }
    if (config$saveable) {
      saveRDS(list_of_adjacencies$jss, file = checkpoint_files$jss)
      saveRDS(list_of_adjacencies$iss, file = checkpoint_files$iss)
    }
  }
  return(list_of_adjacencies)
}

Try the BGmisc package in your browser

Any scripts or data that you put into this service are public.

BGmisc documentation built on June 11, 2025, 1:07 a.m.