Nothing
## ----setup--------------------------------------------------------------------
library(ApplyPolygenicScore)
## ----import-vcf---------------------------------------------------------------
vcf.file <- system.file(
'extdata',
'HG001_GIAB.vcf.gz',
package = 'ApplyPolygenicScore',
mustWork = TRUE
)
vcf.data <- import.vcf(
vcf.path = vcf.file,
info.fields = NULL,
format.fields = NULL,
verbose = TRUE
)
str(vcf.data)
## ----import-pgs---------------------------------------------------------------
pgs.file <- system.file(
'extdata',
'PGS003378_hmPOS_GRCh38.txt.gz',
package = 'ApplyPolygenicScore',
mustWork = TRUE
)
pgs.data <- import.pgs.weight.file(
pgs.weight.path = pgs.file,
use.harmonized.data = TRUE
)
str(pgs.data)
## ----import-phenotype---------------------------------------------------------
# Isolating the individual IDs from the VCF data
vcf.individuals <- unique(vcf.data$dat$Indiv)
# Simulating phenotype data
set.seed(123)
phenotype.data <- data.frame(
Indiv = vcf.individuals,
continuous.phenotype = rnorm(length(vcf.individuals)),
binary.phenotype = rbinom(length(vcf.individuals), 1, 0.5)
)
head(phenotype.data)
## ----convert-pgs-to-bed-------------------------------------------------------
pgs.coordinate.info <- pgs.data$pgs.weight.data
pgs.bed.format <- convert.pgs.to.bed(
pgs.weight.data = pgs.coordinate.info,
chr.prefix = TRUE,
numeric.sex.chr = FALSE,
slop = 10
)
head(pgs.bed.format)
## ----merge-pgs-bed------------------------------------------------------------
# convert your PGS weight files with no added slop
pgs.bed1 <- convert.pgs.to.bed(pgs.coordinate.info, slop = 0)
# simulating a second PGS with all coordinates shifted by 20 base pairs.
pgs.bed2 <- pgs.bed1
pgs.bed2$start <- pgs.bed2$start + 20
pgs.bed2$end <- pgs.bed2$end + 20
# Input must be a named list
pgs.bed.list <- list(PGS1 = pgs.bed1, PGS2 = pgs.bed2)
merged.pgs.bed <- combine.pgs.bed(
pgs.bed.list = pgs.bed.list,
add.annotation.data = TRUE,
annotation.column.index = which(colnames(pgs.bed1) == 'rsID') # keep information from the rsID column during merge
)
str(merged.pgs.bed)
## ----check-pgs-columns--------------------------------------------------------
apply.polygenic.score(
vcf.data = vcf.data$dat,
pgs.weight.data = pgs.data$pgs.weight.data,
phenotype.data = phenotype.data,
validate.inputs.only = TRUE
)
## ----apply-pgs----------------------------------------------------------------
pgs.results <- apply.polygenic.score(
vcf.data = vcf.data$dat,
pgs.weight.data = pgs.data$pgs.weight.data,
correct.strand.flips = FALSE, # no strand flip check to avoid warnings
missing.genotype.method = 'none'
)
str(pgs.results)
## ----merge-vcf-with-pgs-------------------------------------------------------
merged.vcf.pgs.data <- combine.vcf.with.pgs(
vcf.data = vcf.data$dat,
pgs.weight.data = pgs.data$pgs.weight.data
)
names(merged.vcf.pgs.data)
head(merged.vcf.pgs.data$missing.snp.data)[, 1:6]
## ----allele-matching, eval = FALSE--------------------------------------------
#
# # Most strict allele match handling
# strict.allele.match.result <- apply.polygenic.score(
# vcf.data = vcf.data$dat,
# pgs.weight.data = pgs.data$pgs.weight.data,
# missing.genotype.method = 'none',
# correct.strand.flips = TRUE,
# remove.ambiguous.allele.matches = TRUE,
# remove.mismatched.indels = TRUE
# );
#
# # Less strict allele match handling
# less.strict.allele.match.result <- apply.polygenic.score(
# vcf.data = vcf.data$dat,
# pgs.weight.data = pgs.data$pgs.weight.data,
# missing.genotype.method = 'none',
# correct.strand.flips = TRUE,
# remove.ambiguous.allele.matches = FALSE,
# remove.mismatched.indels = FALSE
# );
#
## ----missing-genotype-methods-------------------------------------------------
all.missing.methods.pgs.results <- apply.polygenic.score(
vcf.data = vcf.data$dat,
pgs.weight.data = pgs.data$pgs.weight.data,
correct.strand.flips = FALSE, # no strand flip check to avoid warnings
missing.genotype.method = c('none', 'normalize', 'mean.dosage')
)
head(all.missing.methods.pgs.results$pgs.output)
## ----custom-percentiles-------------------------------------------------------
custom.percentiles.pgs.results <- apply.polygenic.score(
vcf.data = vcf.data$dat,
pgs.weight.data = pgs.data$pgs.weight.data,
correct.strand.flips = FALSE, # no strand flip check to avoid warnings
n.percentiles = 5
)
head(custom.percentiles.pgs.results$pgs.output)
## ----load-large-vcf-----------------------------------------------------------
phenotype.test.data.path <- system.file(
'extdata',
'phenotype.test.data.Rda',
package = 'ApplyPolygenicScore',
mustWork = TRUE
)
load(phenotype.test.data.path)
str(phenotype.test.data)
## ----phenotype-merge----------------------------------------------------------
pgs.results.with.phenotype <- apply.polygenic.score(
vcf.data = phenotype.test.data$vcf.data,
pgs.weight.data = phenotype.test.data$pgs.weight.data,
phenotype.data = phenotype.test.data$phenotype.data
)
head(pgs.results.with.phenotype$pgs.output)
## ----phenotype-analysis-------------------------------------------------------
pgs.results.with.phenotype.analysis <- apply.polygenic.score(
vcf.data = phenotype.test.data$vcf.data,
pgs.weight.data = phenotype.test.data$pgs.weight.data,
phenotype.data = phenotype.test.data$phenotype.data,
phenotype.analysis.columns = c('continuous.phenotype', 'binary.phenotype')
)
head(pgs.results.with.phenotype.analysis$regression.output)
## ----plotting-dir, echo = FALSE-----------------------------------------------
temp.dir <- tempdir();
basic.density.filename <- ApplyPolygenicScore:::generate.filename(
project.stem = 'vignette-example-basic',
file.core = 'pgs-density',
extension = 'png'
);
phenotype.density.filename <- ApplyPolygenicScore:::generate.filename(
project.stem = 'vignette-example-phenotype',
file.core = 'pgs-density',
extension = 'png'
);
correlation.filename <- ApplyPolygenicScore:::generate.filename(
project.stem = 'vignette-example-correlation',
file.core = 'pgs-scatter',
extension = 'png'
);
basic.rank.filename <- ApplyPolygenicScore:::generate.filename(
project.stem = 'vignette-example-basic',
file.core = 'pgs-rank-plot',
extension = 'png'
);
phenotype.rank.filename <- ApplyPolygenicScore:::generate.filename(
project.stem = 'vignette-example-phenotype',
file.core = 'pgs-rank-plot',
extension = 'png'
);
## ----pgs-density, eval = TRUE-------------------------------------------------
create.pgs.density.plot(
pgs.data = pgs.results.with.phenotype.analysis$pgs.output,
output.dir = temp.dir,
filename.prefix = 'vignette-example-basic',
file.extension = 'png'
)
## ----out.width = '50%', echo = FALSE------------------------------------------
knitr::include_graphics(file.path(temp.dir, basic.density.filename));
## ----pgs-density-phenotype, eval = TRUE---------------------------------------
create.pgs.density.plot(
pgs.data = pgs.results.with.phenotype.analysis$pgs.output,
output.dir = temp.dir,
filename.prefix = 'vignette-example-phenotype',
file.extension = 'png',
tidy.titles = TRUE,
phenotype.columns = c('binary.factor.phenotype', 'categorical.phenotype', 'continuous.phenotype')
)
## ----out.width = '50%', echo = FALSE------------------------------------------
knitr::include_graphics(file.path(temp.dir, phenotype.density.filename));
## ----pgs-correlation, eval = TRUE---------------------------------------------
create.pgs.with.continuous.phenotype.plot(
pgs.data = pgs.results.with.phenotype.analysis$pgs.output,
output.dir = temp.dir,
filename.prefix = 'vignette-example-correlation',
file.extension = 'png',
tidy.titles = TRUE,
phenotype.columns = c('continuous.phenotype', 'categorical.phenotype')
)
## ----out.width = '70%', echo = FALSE------------------------------------------
knitr::include_graphics(file.path(temp.dir, correlation.filename));
## ----pgs-rank, eval = TRUE----------------------------------------------------
# Introducing some missing genotypes to demonstrate the missing genotype barplot
pgs.results.with.phenotype.analysis$pgs.output$n.missing.genotypes <- rep(c(0, 1, 0, 2, 1), 2)
create.pgs.rank.plot(
pgs.data = pgs.results.with.phenotype.analysis$pgs.output,
output.dir = temp.dir,
filename.prefix = 'vignette-example-basic',
file.extension = 'png'
)
## ----out.width = '50%', echo = FALSE------------------------------------------
knitr::include_graphics(file.path(temp.dir, basic.rank.filename));
## ----pgs-rank-phenotype, eval = TRUE------------------------------------------
create.pgs.rank.plot(
pgs.data = pgs.results.with.phenotype.analysis$pgs.output,
output.dir = temp.dir,
filename.prefix = 'vignette-example-phenotype',
file.extension = 'png',
phenotype.columns = c('binary.factor.phenotype', 'binary.phenotype', 'categorical.phenotype', 'continuous.phenotype')
)
## ----out.width = '50%', echo = FALSE------------------------------------------
knitr::include_graphics(file.path(temp.dir, phenotype.rank.filename));
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.