R/HLA_mismatch_base.R

Defines functions HLA_mismatch_base

Documented in HLA_mismatch_base

#' @title HLA_mismatch_base
#'
#' @description A function to return a string of mismatches between recipient
#' and donor HLA genotypes represented as GL strings. The function finds
#' mismatches based on the direction of comparison specified in the inputs
#' and also handles homozygosity. As the name implies, this function is the base
#' for all other mismatch (and matching) functions. This function is not meant to be
#' called directly; it is better to use one of the derivative functions.
#'
#' @param GL_string_recip A GL string representing the recipient's HLA genotype.
#' @param GL_string_donor A GL string representing the donor's HLA genotype.
#' @param loci A character vector specifying the loci to be considered for
#' mismatch calculation. HLA-DRB3/4/5 (and their serologic equivalents DR51/52/53)
#' are considered once locus for this function, and should be called in this argument
#' as "HLA-DRB3/4/5" or "HLA-DR51/52/53", respectively.
#' @param direction A character string indicating the direction of mismatch.
#' Options are "HvG" (host vs. graft) or "GvH" (graft vs. host).
#' @param homozygous_count An integer specifying how to handle homozygosity.
#' Defaults to 2, where homozygous alleles are treated as duplicated for
#' mismatch calculations. Can be specified to be 1, in which case homozygous
#' alleles are treated as single occurrences without duplication.
#'
#' @return A character vector, where each element is a string summarizing the
#' mismatches for the specified loci. The strings are formatted as
#' comma-separated locus mismatch entries if multiple loci are supplied, or
#' simple GL strings if a single locus is supplied.

#'
#' @examples
#' file <- HLA_typing_1[, -1]
#' GL_string <- HLA_columns_to_GLstring(file, HLA_typing_columns = everything())
#'
#' GL_string_recip <- GL_string[1]
#' GL_string_donor <- GL_string[2]
#'
#' loci <- c("HLA-A", "HLA-DRB3/4/5", "HLA-DPB1")
#' mismatches <- HLA_mismatch_base(GL_string_recip, GL_string_donor, loci, direction = "HvG")
#' print(mismatches)
#'
#' @export
#'
#' @importFrom stringr str_split
#' @importFrom stringr str_flatten
#' @importFrom stringr str_detect
#' @importFrom stringr str_replace
#' @importFrom stringr str_c
#' @importFrom purrr keep
#' @importFrom purrr discard
#' @importFrom purrr map
#' @importFrom purrr map_chr
#' @importFrom purrr map2_chr
#' @importFrom purrr modify_if
#'

HLA_mismatch_base <- function(GL_string_recip, GL_string_donor, loci, direction, homozygous_count = 2) {
  direction <- match.arg(direction, c("HvG", "GvH"))
  # Ensure input vectors are of the same length - each input should be a single GL string.
  if (length(GL_string_recip) != length(GL_string_donor)) {
    stop("Recipient and donor GL strings must be of equal length")
  }

  # Check for ambiguity
  if (any(str_detect(GL_string_recip, "[|/]") | str_detect(GL_string_donor, "[|/]"))) {
    stop("The matching/mismatching functions do not support ambiguous GL strings containing | or /. Process your GL strings to result in unambiguous genotypes before using these functions.")
  }

  # Function to preprocess GL strings: handle null alleles and homozygosity
  preprocess_GL_string <- function(GL_string, homozygous_count) {
    # Split GL string into alleles
    alleles <- str_split(GL_string, "\\+", simplify = TRUE)

    # Replace any allele that ends with "N" with "NullN".
    alleles <- str_replace(alleles, "(?<=HLA-[:alnum:]{1,4}\\*).+N$", "XXN")

    # Handle homozygosity
    if (length(alleles) == 1 && homozygous_count == 2) {
      alleles <- rep(alleles, times = homozygous_count)
    } else if (length(alleles) == 2 && alleles[1] == alleles[2] && homozygous_count == 1) {
      alleles <- alleles[1]
    }

    # Return processed alleles as a single GL string
    return(str_flatten(alleles, collapse = "+", na.rm = TRUE))
  }

  # Function to process each pair of recipient/donor alleles
  process_pair <- function(recip_str, donor_str) {
    # Split GL strings by "^" to separate loci
    recip_alleles_list <- unlist(strsplit(recip_str, "\\^"))
    donor_alleles_list <- unlist(strsplit(donor_str, "\\^"))

    # Function to process the alleles strings.
    process_alleles <- function(alleles_list, homozygous_count) {
      # Process DRB3/4/5 or DR51/52/53 alleles to add them to a single locus "HLA-DRB3/4/5"
      alleles_list_DRB345 <- alleles_list %>%
        keep(str_detect(., "(HLA-DRB[345])|(HLA-DR5[123])")) %>%
        paste(collapse = "+")

      # Remove DRB3/4/5 or DR51/52/53 alleles from the original list
      alleles_list_no_DRB345 <- alleles_list %>%
        keep(!str_detect(., "(HLA-DRB[345])|(HLA-DR5[123])"))

      # Combine lists and preprocess each allele string
      alleles_list_processed <- c(alleles_list_no_DRB345, alleles_list_DRB345) %>%
        keep(~ str_detect(.x, "[:graph:]")) %>%
        map_chr(preprocess_GL_string, homozygous_count)

      # Filter out any NA values
      alleles_list_processed <- alleles_list_processed[!is.na(alleles_list_processed)]

      return(alleles_list_processed)
    }

    # Process recipient and donor allele strings:
    recip_alleles_list_processed <- process_alleles(recip_alleles_list, homozygous_count)
    donor_alleles_list_processed <- process_alleles(donor_alleles_list, homozygous_count)

    # Set names for each locus
    names(recip_alleles_list_processed) <- map_chr(
      recip_alleles_list_processed,
      ~ case_when(
        # Molecular nomenclature
        str_detect(.x, "\\*") && str_detect(.x, "HLA-DRB[345]") ~ "HLA-DRB3/4/5",
        str_detect(.x, "\\*") ~ strsplit(.x, "\\*")[[1]][1],
        # Serologic nomenclature
        !str_detect(.x, "\\*") && str_detect(.x, "HLA-DR5[123]") ~ "HLA-DR51/52/53",
        !str_detect(.x, "\\*") ~ str_extract(.x, "^HLA-[A-Za-z]+")
      )
    )
    names(donor_alleles_list_processed) <- map_chr(
      donor_alleles_list_processed,
      ~ case_when(
        # Molecular nomenclature
        str_detect(.x, "\\*") && str_detect(.x, "HLA-DRB[345]") ~ "HLA-DRB3/4/5",
        str_detect(.x, "\\*") ~ strsplit(.x, "\\*")[[1]][1],
        # Serologic nomenclature
        !str_detect(.x, "\\*") && str_detect(.x, "HLA-DR5[123]") ~ "HLA-DR51/52/53",
        !str_detect(.x, "\\*") ~ str_extract(.x, "^HLA-[A-Za-z]+")
      )
    )

    # Find which supplied loci are missing from recipient and donor genotypes
    missing_loci_from_recipient <- setdiff(loci, names(recip_alleles_list_processed))
    missing_loci_from_donor <- setdiff(loci, names(donor_alleles_list_processed))

    if (length(missing_loci_from_recipient) > 0 || length(missing_loci_from_donor) > 0) {
      stop(paste(
        "Either the recipient and/or donor GL strings are missing these loci:",
        paste(union(missing_loci_from_recipient, missing_loci_from_donor), collapse = ", ")
      ))
    }

    # Mismatch results calculation
    mismatch_results <- map(loci, function(locus_name) {
      # Pull out the allele list for each locus.
      recip_alleles_str <- recip_alleles_list_processed[locus_name]
      donor_alleles_str <- donor_alleles_list_processed[locus_name]

      if (direction == "GvH") {
        # Calculate matches (Including nulls)
        matched_allele_1 <- intersect(unlist(strsplit(donor_alleles_str, "\\+"))[1], unlist(strsplit(recip_alleles_str, "\\+")))
        matched_allele_2 <- intersect(unlist(strsplit(donor_alleles_str, "\\+"))[2], unlist(strsplit(recip_alleles_str, "\\+")))
        matched_alleles <- discard(c(matched_allele_1, matched_allele_2), is.na)
        # Calculate mismatches (excluding nulls)
        recip_valid <- unlist(strsplit(recip_alleles_str, "\\+"))
        recip_valid <- recip_valid[!str_detect(recip_valid, "[Nn]$")]
        mismatched_allele_1 <- setdiff(recip_valid[1], unlist(strsplit(donor_alleles_str, "\\+")))
        mismatched_allele_2 <- setdiff(recip_valid[2], unlist(strsplit(donor_alleles_str, "\\+")))
        mismatched_alleles <- discard(c(mismatched_allele_1, mismatched_allele_2), is.na)
        # Count number of matches and mismatches
        total_match_mismatch <- length(matched_alleles) + length(mismatched_alleles)
        # If total matches + mismatches < 2, and homozygous_count == 2, repeat mismatched alleles.
        if (total_match_mismatch < 2 && homozygous_count == 2) {
          mismatched_alleles <- rep(mismatched_alleles, times = homozygous_count)
        }
      } else if (direction == "HvG") {
        # Calculate matches (Including nulls)
        matched_allele_1 <- intersect(unlist(strsplit(recip_alleles_str, "\\+"))[1], unlist(strsplit(donor_alleles_str, "\\+")))
        matched_allele_2 <- intersect(unlist(strsplit(recip_alleles_str, "\\+"))[2], unlist(strsplit(donor_alleles_str, "\\+")))
        matched_alleles <- discard(c(matched_allele_1, matched_allele_2), is.na)
        # Calculate mismatches (excluding nulls)
        donor_valid <- unlist(strsplit(donor_alleles_str, "\\+"))
        donor_valid <- donor_valid[!str_detect(donor_valid, "[Nn]$")]
        mismatched_allele_1 <- setdiff(donor_valid[1], unlist(strsplit(recip_alleles_str, "\\+")))
        mismatched_allele_2 <- setdiff(donor_valid[2], unlist(strsplit(recip_alleles_str, "\\+")))
        mismatched_alleles <- discard(c(mismatched_allele_1, mismatched_allele_2), is.na)
        # Count number of matches and mismatches
        total_match_mismatch <- length(matched_alleles) + length(mismatched_alleles)
        # If total matches + mismatches < 2, and homozygous_count == 2, repeat mismatched alleles.
        if (total_match_mismatch < 2 && homozygous_count == 2) {
          mismatched_alleles <- rep(mismatched_alleles, times = homozygous_count)
        }
      } else {
        stop("Direction must either be 'GvH', or 'HvG'.")
      }

      # Create a string of mismatched alleles or 'NA' if no mismatches are found.
      allele_mismatches_str <-
        if (length(mismatched_alleles) > 0) {
          paste0(locus_name, "=", paste(mismatched_alleles, collapse = "+"))
        } else {
          paste0(locus_name, "=", "NA")
        }
    })

    result <- paste(mismatch_results, collapse = ", ")

    # If only a single locus was selected in the arguments, output without starting with the locus name followed by an equal sign.
    if (length(loci) == 1) {
      result %>%
        str_replace(str_c(loci, "="), "") %>%
        na_if("NA")
    } else {
      return(result)
    }
  }

  # Return final result by applying the GL strings to the process_pair function defined above.
  map2_chr(GL_string_recip, GL_string_donor, process_pair)
}

globalVariables(c(".", "process_alleles"))

Try the immunogenetr package in your browser

Any scripts or data that you put into this service are public.

immunogenetr documentation built on Aug. 21, 2025, 5:52 p.m.