Nothing
## ----echo=FALSE---------------------------------------------------------------------------------------------
options(width=110)
## -----------------------------------------------------------------------------------------------------------
library(HIBAG)
## ----fig.width=10, fig.height=4, fig.align='center'---------------------------------------------------------
# load the published parameter estimates from European ancestry
# e.g., filename <- "HumanOmniExpressExome-European-HLA4-hg19.RData"
# here, we use example data in the package
filename <- system.file("extdata", "ModelList.RData", package="HIBAG")
model.list <- get(load(filename))
# HLA imputation at HLA-A
hla.id <- "A"
model <- hlaModelFromObj(model.list[[hla.id]])
summary(model)
plot(model) # show the frequency of SNP marker in the model
# SNPs in the model
head(model$snp.id)
head(model$snp.position)
## ----eval=FALSE---------------------------------------------------------------------------------------------
# #########################################################################
# # Import your PLINK BED file
# #
# yourgeno <- hlaBED2Geno(bed.fn=".bed", fam.fn=".fam", bim.fn=".bim")
# summary(yourgeno)
#
# # best-guess genotypes and all posterior probabilities
# pred.guess <- hlaPredict(model, yourgeno, type="response+prob")
# summary(pred.guess)
#
# pred.guess$value
# pred.guess$postprob
## ----eval=FALSE---------------------------------------------------------------------------------------------
# library(HIBAG)
#
# # load HLA types and SNP genotypes in the package
# data(HLA_Type_Table, package="HIBAG")
# data(HapMap_CEU_Geno, package="HIBAG")
#
# # a list of HLA genotypes
# # e.g., train.HLA <- hlaAllele(sample.id, H1=c("01:02", "05:01", ...),
# # H2=c("02:01", "03:01", ...), locus="A")
# # the HLA type of the first individual is 01:02/02:01,
# # the second is 05:01/03:01, ...
# hla.id <- "A"
# train.HLA <- hlaAllele(HLA_Type_Table$sample.id,
# H1 = HLA_Type_Table[, paste(hla.id, ".1", sep="")],
# H2 = HLA_Type_Table[, paste(hla.id, ".2", sep="")],
# locus=hla.id, assembly="hg19")
#
# # selected SNPs:
# # the flanking region of 500kb on each side,
# # or an appropriate flanking size without sacrificing predictive accuracy
# region <- 500 # kb
# snpid <- hlaFlankingSNP(HapMap_CEU_Geno$snp.id,
# HapMap_CEU_Geno$snp.position, hla.id, region*1000, assembly="hg19")
# length(snpid)
#
# # training genotypes
# # import your PLINK BED file,
# # e.g., train.geno <- hlaBED2Geno(bed.fn=".bed", fam.fn=".fam", bim.fn=".bim")
# # or,
# train.geno <- hlaGenoSubset(HapMap_CEU_Geno,
# snp.sel = match(snpid, HapMap_CEU_Geno$snp.id))
# summary(train.geno)
#
# # train a HIBAG model
# set.seed(100)
# model <- hlaAttrBagging(train.HLA, train.geno, nclassifier=100)
## ----echo=FALSE---------------------------------------------------------------------------------------------
mobj <- get(load(system.file("extdata", "ModelList.RData", package="HIBAG")))
model <- hlaModelFromObj(mobj$A)
## -----------------------------------------------------------------------------------------------------------
summary(model)
## ----eval=FALSE---------------------------------------------------------------------------------------------
# library(HIBAG)
# library(parallel)
#
# # load HLA types and SNP genotypes in the package
# data(HLA_Type_Table, package="HIBAG")
# data(HapMap_CEU_Geno, package="HIBAG")
#
# # a list of HLA genotypes
# # e.g., train.HLA <- hlaAllele(sample.id, H1=c("01:02", "05:01", ...),
# # H2=c("02:01", "03:01", ...), locus="A")
# # the HLA type of the first individual is 01:02/02:01,
# # the second is 05:01/03:01, ...
# hla.id <- "A"
# train.HLA <- hlaAllele(HLA_Type_Table$sample.id,
# H1 = HLA_Type_Table[, paste(hla.id, ".1", sep="")],
# H2 = HLA_Type_Table[, paste(hla.id, ".2", sep="")],
# locus=hla.id, assembly="hg19")
#
# # selected SNPs:
# # the flanking region of 500kb on each side,
# # or an appropriate flanking size without sacrificing predictive accuracy
# region <- 500 # kb
# snpid <- hlaFlankingSNP(HapMap_CEU_Geno$snp.id,
# HapMap_CEU_Geno$snp.position, hla.id, region*1000, assembly="hg19")
# length(snpid)
#
# # training genotypes
# # import your PLINK BED file,
# # e.g., train.geno <- hlaBED2Geno(bed.fn=".bed", fam.fn=".fam", bim.fn=".bim")
# # or,
# train.geno <- hlaGenoSubset(HapMap_CEU_Geno,
# snp.sel = match(snpid, HapMap_CEU_Geno$snp.id))
# summary(train.geno)
#
#
# # Multithreading
# cl <- 2 # 2 -- # of threads
#
# # Building ...
# set.seed(1000)
# hlaParallelAttrBagging(cl, train.HLA, train.geno, nclassifier=100,
# auto.save="AutoSaveModel.RData")
# model.obj <- get(load("AutoSaveModel.RData"))
# model <- hlaModelFromObj(model.obj)
# summary(model)
## -----------------------------------------------------------------------------------------------------------
library(HIBAG)
# load HLA types and SNP genotypes in the package
data(HLA_Type_Table, package="HIBAG")
data(HapMap_CEU_Geno, package="HIBAG")
# make a list of HLA types
hla.id <- "A"
hla <- hlaAllele(HLA_Type_Table$sample.id,
H1 = HLA_Type_Table[, paste(hla.id, ".1", sep="")],
H2 = HLA_Type_Table[, paste(hla.id, ".2", sep="")],
locus=hla.id, assembly="hg19")
# divide HLA types randomly
set.seed(100)
hlatab <- hlaSplitAllele(hla, train.prop=0.5)
names(hlatab)
summary(hlatab$training)
summary(hlatab$validation)
# SNP predictors within the flanking region on each side
region <- 500 # kb
snpid <- hlaFlankingSNP(HapMap_CEU_Geno$snp.id,
HapMap_CEU_Geno$snp.position, hla.id, region*1000, assembly="hg19")
length(snpid)
# training and validation genotypes
train.geno <- hlaGenoSubset(HapMap_CEU_Geno,
snp.sel = match(snpid, HapMap_CEU_Geno$snp.id),
samp.sel = match(hlatab$training$value$sample.id,
HapMap_CEU_Geno$sample.id))
summary(train.geno)
test.geno <- hlaGenoSubset(HapMap_CEU_Geno, samp.sel=match(
hlatab$validation$value$sample.id, HapMap_CEU_Geno$sample.id))
## ----eval=FALSE---------------------------------------------------------------------------------------------
# # train a HIBAG model
# set.seed(100)
# model <- hlaAttrBagging(hlatab$training, train.geno, nclassifier=100)
## ----echo=FALSE---------------------------------------------------------------------------------------------
mobj <- get(load(system.file("extdata", "OutOfBag.RData", package="HIBAG")))
model <- hlaModelFromObj(mobj)
## -----------------------------------------------------------------------------------------------------------
summary(model)
# validation
pred <- hlaPredict(model, test.geno)
summary(pred)
# compare
comp <- hlaCompareAllele(hlatab$validation, pred, allele.limit=model, call.threshold=0)
comp$overall
## ----fig.align="center", fig.height=4, fig.width=5----------------------------------------------------------
# hierarchical cluster analysis
d <- hlaDistance(model)
p <- hclust(as.dist(d))
plot(p, xlab="HLA alleles")
## ----fig.align="center", fig.height=3.3, fig.width=5--------------------------------------------------------
# violin plot
hlaReportPlot(pred, model=model, fig="matching")
## ----fig.align="center", fig.height=3.3, fig.width=5--------------------------------------------------------
hlaReportPlot(pred, hlatab$validation, fig="call.rate")
hlaReportPlot(pred, hlatab$validation, fig="call.threshold")
## -----------------------------------------------------------------------------------------------------------
# report overall accuracy, per-allele sensitivity, specificity, etc
hlaReport(comp, type="txt")
## ----results='asis'-----------------------------------------------------------------------------------------
# report overall accuracy, per-allele sensitivity, specificity, etc
hlaReport(comp, type="markdown")
## -----------------------------------------------------------------------------------------------------------
# report overall accuracy, per-allele sensitivity, specificity, etc
hlaReport(comp, type="tex", header=FALSE)
## ----eval=FALSE---------------------------------------------------------------------------------------------
# library(HIBAG)
#
# # make a list of HLA types
# hla.id <- "DQA1"
# hla <- hlaAllele(HLA_Type_Table$sample.id,
# H1 = HLA_Type_Table[, paste(hla.id, ".1", sep="")],
# H2 = HLA_Type_Table[, paste(hla.id, ".2", sep="")],
# locus = hla.id, assembly = "hg19")
#
# # training genotypes
# region <- 500 # kb
# snpid <- hlaFlankingSNP(HapMap_CEU_Geno$snp.id, HapMap_CEU_Geno$snp.position,
# hla.id, region*1000, assembly="hg19")
# train.geno <- hlaGenoSubset(HapMap_CEU_Geno,
# snp.sel = match(snpid, HapMap_CEU_Geno$snp.id),
# samp.sel = match(hla$value$sample.id, HapMap_CEU_Geno$sample.id))
#
# set.seed(1000)
# model <- hlaAttrBagging(hla, train.geno, nclassifier=100)
# summary(model)
#
# # remove unused SNPs and sample IDs from the model
# mobj <- hlaPublish(model,
# platform = "Illumina 1M Duo",
# information = "Training set -- HapMap Phase II",
# warning = NULL,
# rm.unused.snp=TRUE, anonymize=TRUE)
#
# save(mobj, file="Your_HIBAG_Model.RData")
## ----eval=FALSE---------------------------------------------------------------------------------------------
# # assume the HIBAG models are stored in R objects: mobj.A, mobj.B, ...
#
# ModelList <- list()
# ModelList[["A"]] <- mobj.A
# ModelList[["B"]] <- mobj.B
# ...
#
# # save to an R data file
# save(ModelList, file="HIBAG_Model_List.RData")
## -----------------------------------------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.