tests/testthat/test-format_ensembl_vep.R

context('format_ensembl_vep')

#
my_headers = c('ALT_ID', 'RS_ID', 'POS', 'A1', 'A2', 'MAF', 'MA', 'INFO')

test_that("Can deal with SNV", {




  r1 = c('1:109650558_G_T', 'rs62623709',	'109650558',	'G', 	'T', '0.000853376', "T", '0.909332') # a loss of stop / gain of stop mutation


})

test_that("DELETION 1", {

  # format input
  indel  <- t(data.frame(c('1:11254006_AC_A', '1:11254006_AC_A', '11254006',	'AC', 'A', '0.213164', 'AC', '0.966333')))
  colnames(indel) <- my_headers
  rownames(indel) <- NULL
  indel <- as.data.frame(indel)
  res = format_ensembl_vep(df = indel)

  # reference
  ref = c('1', 11254006, 11254006, 'A/AC', '+', '1:11254006_AC_A')
  ref = as.data.frame(t(data.frame(ref)))
  colnames(ref) <- c('ENS_CHR','ENS_START',"ENS_END", 'ENS_ALLELE','ENS_STRAND','ENS_ID')

  # check equal expectation
  expect_equal(as.character(res$ENS_CHR), as.character(ref$ENS_CHR))
  expect_equal(as.character(res$ENS_START), as.character(ref$ENS_START))
  expect_equal(as.character(res$ENS_END), as.character(ref$ENS_END))
  expect_equal(as.character(res$ENS_ALLELE), as.character(ref$ENS_ALLELE))

})

test_that("Deletion 2", {

  # format input
  indel <- t(data.frame(c('2:164541449_ATTTTTT_A', 'rs767585131', '164541449', 'ATTTTTT', 'A', '0.151642', 'A', '0.938078')))
  colnames(indel) <- my_headers
  rownames(indel) <- NULL
  indel <- as.data.frame(indel)
  res = format_ensembl_vep(df = indel)

  # reference
  ref = c('2', '164541449', '164541449' ,'A/ATTTTTT',  '+', 'rs767585131')
  ref = as.data.frame(t(data.frame(ref)))
  colnames(ref) <- c('ENS_CHR','ENS_START',"ENS_END", 'ENS_ALLELE','ENS_STRAND','ENS_ID')

  # check equal expectation
  expect_equal(as.character(res$ENS_CHR), as.character(ref$ENS_CHR))
  expect_equal(as.character(res$ENS_START), as.character(ref$ENS_START))
  expect_equal(as.character(res$ENS_END), as.character(ref$ENS_END))
  expect_equal(as.character(res$ENS_ALLELE), as.character(ref$ENS_ALLELE))

})


test_that("Inertion 1", {

  # format input
  indel <- t(data.frame(c('2:164541449_ATTTTTT_A', 'rs767585131', '164541449', 'ATTTTTT', 'A', '0.151642', 'A', '0.938078')))
  colnames(indel) <- my_headers
  rownames(indel) <- NULL
  indel <- as.data.frame(indel)
  res = format_ensembl_vep(df = indel)

  # reference
  ref = c('2', '164541449', '164541449' ,'A/ATTTTTT',  '+', 'rs767585131')
  ref = as.data.frame(t(data.frame(ref)))
  colnames(ref) <- c('ENS_CHR','ENS_START',"ENS_END", 'ENS_ALLELE','ENS_STRAND','ENS_ID')

  # check equal expectation
  expect_equal(as.character(res$ENS_CHR), as.character(ref$ENS_CHR))
  expect_equal(as.character(res$ENS_START), as.character(ref$ENS_START))
  expect_equal(as.character(res$ENS_END), as.character(ref$ENS_END))
  expect_equal(as.character(res$ENS_ALLELE), as.character(ref$ENS_ALLELE))

})



test_that("Difference in reference allele", {


  ## REF IS MINOR ALLELE

  # format input
  indel <- t(data.frame(c('3:135909384_C_CA', 'rs377140738', '135909384', 'C', 'CA', '0.258913', 'CA', '0.951976')))
  colnames(indel) <- my_headers
  rownames(indel) <- NULL
  indel <- as.data.frame(indel)
  res = format_ensembl_vep(df = indel)

  # format reference
  ref = c('3', '135909384', '135909385', 'CA/C', '+', 'rs377140738')
  ref = as.data.frame(t(data.frame(ref)))
  colnames(ref) <- c('ENS_CHR','ENS_START',"ENS_END", 'ENS_ALLELE','ENS_STRAND','ENS_ID')

  # check equal expectation
  expect_equal(as.character(res$ENS_CHR), as.character(ref$ENS_CHR))
  expect_equal(as.character(res$ENS_START), as.character(ref$ENS_START))
  expect_equal(as.character(res$ENS_END), as.character(ref$ENS_END))
  expect_equal(as.character(res$ENS_ALLELE), as.character(ref$ENS_ALLELE))


  ## ALT IS MINOR ALLELE

  # format input
  indel <- t(data.frame(c('2:213405797_CTT_C', '2:213405797_CTT_C', '213405797', 'CTT', 'C', '0.411486', 'C', '0.984844')))
  colnames(indel) <- my_headers
  rownames(indel) <- NULL
  indel <- as.data.frame(indel)
  res = format_ensembl_vep(df = indel)


  #Column 1: chr
  #Column 2: start pos
  #Column 3: end pos, which is different from start pos if the length of the effect allele is >1: Start pos + number of characters in effect allele -1
  #Column 4: Effect allele/Other allele
  #Column 5: strand (always + for UKBB)
  #Column 6: rsid, or other unique ID for variants for which there is no ID starting with rs

  # format reference
  ref = c('2', '213405797', '213405799', 'CTT/C', '+', '2:213405797_CTT_C')
  ref = as.data.frame(t(data.frame(ref)))
  colnames(ref) <- c('ENS_CHR','ENS_START',"ENS_END", 'ENS_ALLELE','ENS_STRAND','ENS_ID')

  # check equal expectation
  #expect_equal(as.character(res$ENS_CHR), as.character(ref$ENS_CHR))
  #expect_equal(as.character(res$ENS_START), as.character(ref$ENS_START))
  #expect_equal(as.character(res$ENS_END), as.character(ref$ENS_END))
  #expect_equal(as.character(res$ENS_ALLELE), as.character(ref$ENS_ALLELE))

})
frhl/our documentation built on Feb. 5, 2021, 7:30 p.m.