HLAdivR exists to enable the examination of HLA diversity in a simple and concise way consisting of three basic functions. HLA_AlignedSeqGet()
to extract the amino acid sequence of individual HLA alleles, HLADiversityScore()
to calculate the diversity score between pairs of HLA alleles using different metrics and GetDistScore()
to use a predefined distance matrix between known HLA alleles to extract a score.
knitr::opts_chunk$set(echo = TRUE)
library(HLAdivR) library(tidyverse)
Using HLA_AllignedSeqGet()
is simple and only requires the name of the HLA allele in the following format:
HLA_AlignedSeqGet('A*24:74:01')
The output of HLA_AlignedSeqGet gives the sequence aligned to reference alleles where '-' indicate no change from the reference allele and '*' indicates the allele is truncated and '.' indicates an insertion appears in that location in alternate alleles.
The reference sequences are as follows:
HLA_AlignedSeqGet('A*01:01:01:01') HLA_AlignedSeqGet('B*07:02:01:01') HLA_AlignedSeqGet('C*01:02:01:01')
Full sequences for any allele can be calculated using the full=TRUE
option:
HLA_AlignedSeqGet('A*24:74:01', full = TRUE)
The diversity score between two alleles of the same class can be calculated as follows:
HLADiversityScore('A*24:74:01','A*01:178N')
The default score system is Grantham (@grantham1974amino) with the calculation only taking place on exons 2 and 3. Other scoring symtems exist such as Sandberg (@sandberg2003quantifying), and the simple pdist that uses a binary score of 1 if the amino acids do not match. Phylogenetic methods such as Point Accepted Mutation (PAM) and JTT utilised in the Phylip package are also supported (@retief2000phylogenetic). Note that for this to work the program protdist has to be installed on your computer and a path to it must be supplied.
HLADiversityScore('A*24:74:01','A*01:178N',diversity.measure = 'sandberg') HLADiversityScore('A*24:74:01','A*01:178N',diversity.measure = 'pdist') # test.score1 <- HLADiversityScore('A*24:74:01','A*01:178N',diversity.measure = 'JTT', # protdist.path = '~/Documents/Programs/phylip-3.695/exe/protdist.app/Contents/MacOS') # test.score2 # test.score2 <- HLADiversityScore('A*24:74:01','A*01:178N',diversity.measure = 'PAM', # protdist.path = '~/Documents/Programs/phylip-3.695/exe/protdist.app/Contents/MacOS') # test.score2
GetDistScore()
is a function designed to use a pre-calculated distance matrix between HLA alleles. It is a relatively simple process to create such a matrix using the Grantham distance as an example for 100 randomly chosen HLA-A alleles.
data("aligned_HLA_seq") all.alleles <- aligned_HLA_seq$A$X1 all.alleles <- unique(sapply(strsplit(all.alleles,split = ":"), FUN = function(x) paste(x[1],x[2],sep = ':'))) set.seed(1) samp.size <- 100 all.alleles.samp <- sample(all.alleles,size = samp.size,replace = FALSE) # Create distance matrix grantham.dist <- matrix(0,samp.size,samp.size) colnames(grantham.dist) <- all.alleles.samp rownames(grantham.dist) <- all.alleles.samp for(i in 1:(samp.size)){ for(j in 1:(samp.size)){ grantham.dist[i,j] <- HLADiversityScore(rownames(grantham.dist)[i], colnames(grantham.dist)[j],diversity.measure = 'grantham', exons23 = TRUE) } }
With the distance matrix created GetDistScore
can now be used.
GetDistScore('A*02:703', 'A*02:365',dist.mat = grantham.dist)
An alternative distance matrix can be created based on the distances between the HLA-A alleles on an MDS plot based on these grantham scores
grantham.dist2 <- as.dist(grantham.dist) fit <- cmdscale(grantham.dist2, eig = TRUE, k = 2) x <- fit$points[,1] y <- fit$points[,2] MDS.df <- data.frame(MDS1 = x, MDS2 = y, allele = rownames(fit$points)) library(tidyverse) MDS.df <- MDS.df %>% mutate(allele.group = substr(allele,1,4))
Here are the HLA alleles projected onto an MDS plot based on the Grantham distance between them:
ggplot(MDS.df, aes(MDS1, MDS2)) + geom_point(aes(colour = factor(allele.group)))
MDS.dist <- matrix(0,samp.size,samp.size) colnames(MDS.dist) <- all.alleles.samp rownames(MDS.dist) <- all.alleles.samp for(i in 1:(samp.size)){ for(j in 1:(samp.size)){ MDS.dist[i,j] <- as.numeric(dist(MDS.df[c(which(MDS.df$allele == rownames(MDS.dist)[i]), which(MDS.df$allele == colnames(MDS.dist)[j])), c(1,2)], method = 'euclidean')) } } GetDistScore('A*02:703', 'A*02:365',dist.mat = MDS.dist) GetDistScore('A*02:703', 'A*02:365',dist.mat = grantham.dist)
plot(MDS.dist, grantham.dist) abline(0,1, col = 'red')
The 2019 paper by Chowell et al. "Evolutionary divergence of HLA class I genotype impacts efficacy of cancer immunotherapy" (@chowell2019evolutionary) calculates HLA diversity using the Grantham method, they construct a score called the meanHED which is the average HLA diversity of HLA-A, B and C in exons 2 and 3. This score can be easily recreated using HLAdivR.
data("Chowell_2019_clinical") chowell.hla <- Chowell_2019_clinical$HLAI_Genotype %>% strsplit(',') chowell.hla.df <-Reduce(rbind, chowell.hla) %>% as.data.frame() %>% rename(A1 = V1, A2 =V2, B1 = V3, B2 = V4, C1=V5, C2=V6) row.names(chowell.hla.df) <- NULL # Change names allele.change <- function(x){ return(paste0(substr(x,1,1),'*', substr(x,2,3),':',substr(x,4,5)))} chowell.hla.df <- chowell.hla.df %>% mutate_all(allele.change) HLADiversityScore_v <- Vectorize(HLADiversityScore) # Calculate A, B and C grantham distance chowell.hla.df.upd2 <- chowell.hla.df %>% mutate(A.grantham = HLADiversityScore_v(A1,A2, exons23 = TRUE)) %>% mutate(B.grantham = HLADiversityScore_v(B1,B2, exons23 = TRUE)) %>% mutate(C.grantham = HLADiversityScore_v(C1,C2, exons23 = TRUE)) %>% mutate(meanHED = (A.grantham + B.grantham + C.grantham)/3) %>% mutate(any.homozygous = ifelse(A.grantham == 0, TRUE, ifelse(B.grantham == 0, TRUE, ifelse(C.grantham == 0, TRUE, FALSE))))
plot(Chowell_2019_clinical$Mean_HED, chowell.hla.df.upd2$meanHED, xlab = 'Chowell calculated HED', ylab = 'HLAdivR calculated HED') abline(0,1, col = 'red')
devtools::session_info()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.