tests/testthat/test-convert.R

test_that("can convert genes", {
  # First, define all the dataframes we need
  imgt_df <- data.frame(
    v_gene = c("TRAV12-1*01", "TRBV15*01"),
    d_gene = c(NA, "TRBD1*01"),
    j_gene = c("TRAJ16*01", "TRBJ2-5*01"),
    c_gene = c("TRAC*01", "TRBC2*01"),
    cdr3 = c("CAVLIF", "CASSGF")
  )

  tenx_df <- data.frame(
    v_gene = c("TRAV12-1", "TRBV15"),
    d_gene = c(NA, "TRBD1"),
    j_gene = c("TRAJ16", "TRBJ2-5"),
    c_gene = c("TRAC", "TRBC2"),
    cdr3 = c("CAVLIF", "CASSGF")
  )

  adapt_df <- data.frame(
    v_resolved = c("TCRAV12-01*01", "TCRBV15-01*01"),
    d_resolved = c(NA, "TCRBD01-01*01"),
    j_resolved = c("TCRAJ16-01*01", "TCRBJ02-05*01"),
    cdr3_amino_acid = c("CAVLIF", "CASSGF")
  )

  adapt_v2_df <- data.frame(
    vMaxResolved = c("TCRAV12-01*01", "TCRBV15-01*01"),
    dMaxResolved = c(NA, "TCRBD01-01*01"),
    jMaxResolved = c("TCRAJ16-01*01", "TCRBJ02-05*01"),
    aminoAcid = c("CAVLIF", "CASSGF")
  )

  custom_df <- imgt_df
  colnames(custom_df) <- c("myV", "myD", "myJ", "myC", "myCDR3")

  custom_vj_tenx_df <- data.frame(
    myV = c("TRAV12-1", "TRBV15"),
    myD = c(NA, "TRBD1*01"),
    myJ = c("TRAJ16", "TRBJ2-5"),
    myC = c("TRAC*01", "TRBC2*01"),
    myCDR3 = c("CAVLIF", "CASSGF")
  )

  tenx_to_adapt_df <- adapt_df
  colnames(tenx_to_adapt_df) <- c("v_gene", "d_gene", "j_gene", "cdr3")
  # Insert a 'c_gene' column before 'cdr3'
  tenx_to_adapt_df <- cbind(
    tenx_to_adapt_df[, 1:3],
    c_gene = NA,
    tenx_to_adapt_df["cdr3"]
  )
  tenx_to_adapt_df$c_gene <- as.character(tenx_to_adapt_df$c_gene)

  adapt_to_tenx_df <- tenx_df
  adapt_to_tenx_df$c_gene <- NULL
  colnames(adapt_to_tenx_df) <- c("v_resolved", "d_resolved", "j_resolved", "cdr3_amino_acid")

  adapt_to_imgt_df <- imgt_df
  adapt_to_imgt_df$c_gene <- NULL
  colnames(adapt_to_imgt_df) <- c("v_resolved", "d_resolved", "j_resolved", "cdr3_amino_acid")

  adaptv2_to_tenx_df <- tenx_df
  adaptv2_to_tenx_df$c_gene <- NULL
  colnames(adaptv2_to_tenx_df) <- c("vMaxResolved", "dMaxResolved", "jMaxResolved", "aminoAcid")

  adaptv2_to_imgt_df <- imgt_df
  adaptv2_to_imgt_df$c_gene <- NULL
  colnames(adaptv2_to_imgt_df) <- c("vMaxResolved", "dMaxResolved", "jMaxResolved", "aminoAcid")

  custom_to_tenx_df <- tenx_df
  colnames(custom_to_tenx_df) <- c("myV", "myD", "myJ", "myC", "myCDR3")

  adapt_no_allele_df <- data.frame(
    v_resolved = c("TCRAV12-01", "TCRBV15-01*01"),
    d_resolved = c(NA, "TCRBD01-01"),
    j_resolved = c("TCRAJ16-01*01", "TCRBJ02-05"),
    cdr3_amino_acid = c("CAVLIF", "CASSGF")
  )
  # 10X <-> Adaptive
  suppressMessages(suppressWarnings({
    expect_equal(convert_gene(tenx_df, "tenx", "adaptive"), tenx_to_adapt_df)
    expect_equal(convert_gene(tenx_df, "tenx", "adaptivev2"), tenx_to_adapt_df)
    expect_equal(convert_gene(adapt_df, "adaptive", "tenx"), adapt_to_tenx_df)
    expect_equal(convert_gene(adapt_v2_df, "adaptivev2", "tenx"), adaptv2_to_tenx_df)
    # 10X <-> IMGT
    expect_equal(convert_gene(tenx_df, "tenx", "imgt"), imgt_df)
    expect_equal(convert_gene(imgt_df, "imgt", "tenx"), tenx_df)
    # IMGT <-> Adaptive
    expect_equal(convert_gene(imgt_df, "imgt", "adaptive"), tenx_to_adapt_df)
    expect_equal(convert_gene(imgt_df, "imgt", "adaptivev2"), tenx_to_adapt_df)
    expect_equal(convert_gene(adapt_df, "adaptive", "imgt"), adapt_to_imgt_df)
    expect_equal(convert_gene(adapt_v2_df, "adaptivev2", "imgt"), adaptv2_to_imgt_df)
    # Custom column names
    expect_equal(convert_gene(custom_df, "imgt", "tenx",
      frm_cols = c("myV", "myD", "myJ", "myC")
    ), custom_to_tenx_df)
    # MOUSE
    expect_equal(convert_gene(tenx_df, "tenx", "adaptive", species = "mouse"), tenx_to_adapt_df)
    expect_equal(convert_gene(tenx_df, "tenx", "adaptivev2", species = "mouse"), tenx_to_adapt_df)
    expect_equal(convert_gene(adapt_df, "adaptive", "tenx", species = "mouse"), adapt_to_tenx_df)
    expect_equal(convert_gene(adapt_v2_df, "adaptivev2", "tenx", species = "mouse"), adaptv2_to_tenx_df)
    expect_equal(convert_gene(tenx_df, "tenx", "imgt", species = "mouse"), imgt_df)
    expect_equal(convert_gene(imgt_df, "imgt", "tenx", species = "mouse"), tenx_df)
    expect_equal(convert_gene(imgt_df, "imgt", "adaptive", species = "mouse"), tenx_to_adapt_df)
    expect_equal(convert_gene(imgt_df, "imgt", "adaptivev2", species = "mouse"), tenx_to_adapt_df)
    expect_equal(convert_gene(adapt_df, "adaptive", "imgt", species = "mouse"), adapt_to_imgt_df)
    expect_equal(convert_gene(adapt_v2_df, "adaptivev2", "imgt", species = "mouse"), adaptv2_to_imgt_df)
    expect_equal(convert_gene(custom_df, "imgt", "tenx",
      species = "mouse",
      frm_cols = c("myV", "myD", "myJ", "myC")
    ), custom_to_tenx_df)
    # RHESUS MACAQUE
    expect_equal(convert_gene(tenx_df, "tenx", "adaptive", species = "rhesus"), tenx_to_adapt_df)
    expect_equal(convert_gene(tenx_df, "tenx", "adaptivev2", species = "rhesus"), tenx_to_adapt_df)
    expect_equal(convert_gene(adapt_df, "adaptive", "tenx", species = "rhesus"), adapt_to_tenx_df)
    expect_equal(convert_gene(adapt_v2_df, "adaptivev2", "tenx", species = "rhesus"), adaptv2_to_tenx_df)
    expect_equal(convert_gene(tenx_df, "tenx", "imgt", species = "rhesus"), imgt_df)
    expect_equal(convert_gene(imgt_df, "imgt", "tenx", species = "rhesus"), tenx_df)
    expect_equal(convert_gene(imgt_df, "imgt", "adaptive", species = "rhesus"), tenx_to_adapt_df)
    expect_equal(convert_gene(imgt_df, "imgt", "adaptivev2", species = "rhesus"), tenx_to_adapt_df)
    expect_equal(convert_gene(adapt_df, "adaptive", "imgt", species = "rhesus"), adapt_to_imgt_df)
    expect_equal(convert_gene(adapt_v2_df, "adaptivev2", "imgt", species = "rhesus"), adaptv2_to_imgt_df)
    expect_equal(convert_gene(custom_df, "imgt", "tenx",
      species = "rhesus",
      frm_cols = c("myV", "myD", "myJ", "myC")
    ), custom_to_tenx_df)
    # Some Adaptive genes without allele
    expect_equal(convert_gene(adapt_no_allele_df, "adaptive", "imgt"), adapt_to_imgt_df)
    # Confirm won't convert non-VDJC gene column to NAs
    expect_equal(convert_gene(custom_df, "imgt", "tenx",
      frm_cols = c("myV", "myJ", "myCDR3")
    ), custom_vj_tenx_df)
  }))
})


test_that("bad input raises error", {
  tenx_df <- data.frame(
    v_gene = c("TRAV12-1", "TRBV15"),
    d_gene = c(NA, "TRBD1"),
    j_gene = c("TRAJ16", "TRBJ2-5"),
    c_gene = c("TRAC", "TRBC2"),
    cdr3 = c("CAVLIF", "CASSGF")
  )

  # Same input and output format
  expect_error(convert_gene(tenx_df, "tenx", "tenx"), '"frm" and "to" formats should be different.')
  # Empty input dataframe
  expect_error(convert_gene(data.frame(), "tenx", "imgt"), "Input data is empty.")
})


test_that("chooses correct lookup", {
  lookup_tenx <- system.file("extdata/human", "lookup_from_tenx.csv", package = "TCRconvertR")
  lookup_adapt <- system.file("extdata/human", "lookup_from_adaptive.csv", package = "TCRconvertR")
  lookup_imgt <- system.file("extdata/human", "lookup.csv", package = "TCRconvertR")

  # Species we don't have lookups for
  expect_error(
    choose_lookup("tenx", "imgt", "non-existent-species", verbose = FALSE),
    "Lookup table not found, please run build_lookup_from_fastas()."
  )
  # Different 'frm' formats
  expect_equal(choose_lookup("tenx", "imgt", verbose = FALSE), lookup_tenx)
  expect_equal(choose_lookup("adaptivev2", "imgt", verbose = FALSE), lookup_adapt)
  expect_equal(choose_lookup("imgt", "tenx", verbose = FALSE), lookup_imgt)
})


test_that("chooses correct frm_cols", {
  col_ref <- list(
    adaptive = c("v_resolved", "d_resolved", "j_resolved"),
    adaptivev2 = c("vMaxResolved", "dMaxResolved", "jMaxResolved"),
    imgt = c("v_gene", "d_gene", "j_gene", "c_gene"),
    tenx = c("v_gene", "d_gene", "j_gene", "c_gene")
  )

  adapt_df <- data.frame(
    v_resolved = c("TCRAV12-01*01", "TCRBV15-01*01"),
    d_resolved = c(NA, "TCRBD01-01*01"),
    j_resolved = c("TCRAJ16-01*01", "TCRBJ02-05*01"),
    cdr3_amino_acid = c("CAVLIF", "CASSGF")
  )

  adapt_v2_df <- data.frame(
    vMaxResolved = c("TCRAV12-01*01", "TCRBV15-01*01"),
    dMaxResolved = c(NA, "TCRBD01-01*01"),
    jMaxResolved = c("TCRAJ16-01*01", "TCRBJ02-05*01"),
    aminoAcid = c("CAVLIF", "CASSGF")
  )

  imgt_df <- data.frame(
    v_gene = c("TRAV12-1*01", "TRBV15*01"),
    d_gene = c(NA, "TRBD1*01"),
    j_gene = c("TRAJ16*0", "TRBJ2-5*01"),
    c_gene = c("TRAC*01", "TRBC2*01"),
    cdr3 = c("CAVLIF", "CASSGF")
  )

  tenx_df <- data.frame(
    v_gene = c("TRAV12-1", "TRBV15"),
    d_gene = c(NA, "TRBD1"),
    j_gene = c("TRAJ16", "TRBJ2-5"),
    c_gene = c("TRAC", "TRBC2"),
    cdr3 = c("CAVLIF", "CASSGF")
  )

  custom_df <- data.frame(
    myV = c("TRAV12-1*01", "TRBV15*01"),
    myD = c(NA, "TRBD1*01"),
    myJ = c("TRAJ16*0", "TRBJ2-5*01"),
    myC = c("TRAC*01", "TRBC2*01"),
    myCDR3 = c("CAVLIF", "CASSGF")
  )

  suppressWarnings({
    expect_equal(which_frm_cols(adapt_df, "adaptive", verbose = FALSE), col_ref$adaptive)
    expect_equal(which_frm_cols(adapt_v2_df, "adaptivev2", verbose = FALSE), col_ref$adaptivev2)
    expect_equal(which_frm_cols(imgt_df, "imgt", verbose = FALSE), col_ref$imgt)
    expect_equal(which_frm_cols(tenx_df, "tenx", verbose = FALSE), col_ref$tenx)
    # Custom columns
    custom_col <- c("myV", "myD", "myJ", "myC")
    expect_equal(which_frm_cols(custom_df, "tenx", frm_cols = custom_col, verbose = FALSE), custom_col)
    # Non-existent column
    expect_error(
      which_frm_cols(tenx_df, "tenx", frm_cols = c("v_gene", "j_gene", "x_gene"), verbose = FALSE),
      "These columns are not in the input dataframe: x_gene"
    )
  })
})


test_that("choose_lookup verbose flag works", {
  # Capture messages when verbose = TRUE
  expect_message(choose_lookup("tenx", "adaptive"),
    fixed = TRUE,
    "Converting from 10X. Using *01 as allele for all genes."
  )
  expect_message(choose_lookup("adaptive", "imgt"),
    fixed = TRUE,
    "Converting from Adaptive to IMGT. Using *01 for genes lacking alleles."
  )
  # Ensure no messages when verbose = FALSE
  expect_silent(choose_lookup("tenx", "adaptive", verbose = FALSE))
  expect_silent(choose_lookup("adaptive", "imgt", verbose = FALSE))
})


test_that("which_frm_cols verbose flag works", {
  custom_df <- data.frame(
    myV = c("TRAV12-1*01", "TRBV15*01"),
    myD = c(NA, "TRBD1*01"),
    myJ = c("TRAJ16*0", "TRBJ2-5*01"),
    myC = c("TRAC*01", "TRBC2*01"),
    myCDR3 = c("CAVLIF", "CASSGF")
  )
  # Custom columns
  expect_message(
    which_frm_cols(custom_df, "imgt", frm_cols = c("myV", "myJ")),
    "Using custom column names: myV, myJ"
  )
  # Custom columns with verbose = FALSE
  expect_silent(
    which_frm_cols(custom_df, "imgt", frm_cols = c("myV", "myJ"), verbose = FALSE)
  )
  # IMGT format without column names, still expect warnings with verbose = FALSE
  expect_warning(
    which_frm_cols(custom_df, "imgt", verbose = FALSE),
    "No column names for IMGT data. Using 10X columns: v_gene, d_gene, j_gene, c_gene"
  )
})


test_that("convert_gene verbose flag works", {
  # Note, the flag mostly affects downstream functions called by convert_gene.
  tenx_df_bad <- data.frame(
    v_gene = c("TRAV12-1", "TRBV15"),
    d_gene = c(NA, "TRBD1"),
    j_gene = c("TRAJ16", "BAD_J_GENE"),
    c_gene = c("TRAC", "TRBC2"),
    cdr3 = c("CAVLIF", "CASSGF")
  )

  captured_warnings <- character()

  # Capture warnings without printing
  result <- withCallingHandlers(
    convert_gene(tenx_df_bad, "tenx", "adaptive", verbose = FALSE),
    warning = function(w) {
      # Add each one as it comes up to our vector above
      captured_warnings <<- c(captured_warnings, conditionMessage(w))
      # Prevent warnings from coming up and being shown
      invokeRestart("muffleWarning")
    }
  )

  # Verify expected warnings
  expect_true(any(grepl(
    "Adaptive only captures VDJ genes; C genes will be NA.",
    captured_warnings
  )))
  expect_true(any(grepl(
    "These genes are not in IMGT for this species and will be replaced with NA",
    captured_warnings
  )))
})

Try the TCRconvertR package in your browser

Any scripts or data that you put into this service are public.

TCRconvertR documentation built on June 8, 2025, 10:43 a.m.