#' Determines the overlap of predicted HLA binders with transmembrane
#' helices for all HLA supertypes.
#'
#' Input:
#' * tmh_9mers_as_data_filename
#' * protein_lengths_filename
#'
#' Output:
#' * tmh_overlapping_binders_as_data_filename
#' @inheritParams default_params_doc
#' @author Richèl J.C. Bilderbeek, adapted from Johannes Textor
#' @export
calculate_overlap <- function(
tmh_9mers_as_data_filename,
protein_lengths_filename,
tmh_overlapping_binders_as_data_filename
) {
tmh.9mers <- NULL; rm(tmh.9mers) # nolint, fixes warning: no visible binding for global variable
load(tmh_9mers_as_data_filename) # Used to be load("work/tmh.9mers.Rdata")
pldf <- utils::read.table(protein_lengths_filename, row.names=1 )
pl <- pldf[,1]
names(pl) <- rownames(pldf)
ninemers.in.tmhs <- sapply( tmh.9mers, length )
ninemers.total <- pl-8
ninemers.total[ninemers.total < 0] <- 0
perc.binders <- function( mhc = "A01-01" ){
d <- data.table::fread(paste0("binding-predictions/HLA-",mhc,".txt"), data.table=FALSE)[,c(1,3)]
colnames(d) <- c("protein", "start")
d <- utils::unstack( d, start ~ protein )
binders.in.tmhs <- sapply( names(d),
function(p) length( intersect( d[[p]], tmh.9mers[[p]] ) ) )
c( sum( binders.in.tmhs ), sum( sapply( d, length ) ) )
}
# Original
hlas <- get_hlas()
r <- sapply( hlas,
perc.binders )
grDevices::pdf("plots/figure-1-a.pdf",width=4,height=4,useDingbats=FALSE)
graphics::par( mar=c(6,5,.2,.2) )
graphics::barplot( 100* r[1,] / r[2,], xlab="",
ylab="% epitopes overlapping\nwith transmembrane helix", border=NA, las=2 )
graphics::mtext( "HLA haplotype", 1, line=4.5 )
pbin <- sum( ninemers.in.tmhs) / sum(ninemers.total)
# From paper, to explain the 0.02:
#
# Confidence intervals for repeated
# sampling under this null hypothesis were determined from the
# critical region of this binomial test, for which we chose an alpha
# value of 0.001 (R code provided as Supplemental Information).
# Note that the independence assumption of the binomial test is
# approximate in this case, but because the predicted HLA binders
# constitute only 2% of the 9-mers in the human proteome, this
# approximation is reasonable.
# x: number of successes
# n: number of trials
# n >= x
ci <- c(0.0, 1.0) # Testing values
# For full run, this is true
if (sum(ninemers.total) >= sum(ninemers.in.tmhs)) {
#expect_gte(sum(ninemers.total), sum(ninemers.in.tmhs))
ci <- (
stats::binom.test(
x = round(0.02*sum(ninemers.in.tmhs)),
n = round(0.02*sum(ninemers.total)),
p = pbin,
conf.level = 0.99
)
)$conf
}
graphics::abline( h=100*ci[1], col=2 )
graphics::abline( h=100*ci[2], col=2 )
grDevices::dev.off()
save(r, file = tmh_overlapping_binders_as_data_filename)
for( h in hlas ){
if (pbin <= 1.0) {
# Will work for full run
cat( h, "\t", stats::binom.test( r[1,h], r[2,h], p = pbin )$p.value," \n" )
} else {
# Because pbin >= 1.0 in test run, use any valid p value, say 0.5
cat( h, "\t", stats::binom.test( r[1,h], r[2,h], p = 0.5 )$p.value," \n" )
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.