Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup, echo = FALSE------------------------------------------------------
library(BinaryDosage)
## ----vcf_files----------------------------------------------------------------
vcf1afile <- system.file("extdata", "set1a.vcf", package = "BinaryDosage")
vcf1ainfo <- system.file("extdata", "set1a.info", package = "BinaryDosage")
vcf1bfile <- system.file("extdata", "set1b.vcf", package = "BinaryDosage")
vcf1binfo <- system.file("extdata", "set1b.info", package = "BinaryDosage")
## ----vcf_convert, message = FALSE, warning = FALSE----------------------------
bdfile1a <- tempfile()
bdfile1b <- tempfile()
# Without information file
vcftobdlegacy(vcffiles = vcf1afile, bdfiles = bdfile1a)
# With information file — embeds aaf, maf, avgcall, rsq in the header
vcftobdlegacy(vcffiles = c(vcf1bfile, vcf1binfo), bdfiles = bdfile1b)
## ----vcf_gz, message = FALSE, warning = FALSE---------------------------------
vcf1agzfile <- system.file("extdata", "set1a.vcf.gz", package = "BinaryDosage")
bdfile1a_gz <- tempfile()
vcftobdlegacy(vcffiles = vcf1agzfile, bdfiles = bdfile1a_gz, gz = TRUE)
## ----vcf_dosageonly, message = FALSE, warning = FALSE-------------------------
vcf2afile <- system.file("extdata", "set2a.vcf", package = "BinaryDosage")
bdfile2a <- tempfile()
vcftobdlegacy(vcffiles = vcf2afile, bdfiles = bdfile2a)
## ----snpidformat, message = FALSE, warning = FALSE----------------------------
vcf1brsfile <- system.file("extdata", "set1b_rssnp.vcf", package = "BinaryDosage")
bdfile_id0 <- tempfile()
bdfile_id1 <- tempfile()
bdfile_id2 <- tempfile()
bdfile_idm1 <- tempfile()
vcftobdlegacy(vcffiles = vcf1brsfile, bdfiles = bdfile_id0)
vcftobdlegacy(vcffiles = vcf1brsfile, bdfiles = bdfile_id1, snpidformat = 1L)
vcftobdlegacy(vcffiles = vcf1brsfile, bdfiles = bdfile_id2, snpidformat = 2L)
vcftobdlegacy(vcffiles = vcf1brsfile, bdfiles = bdfile_idm1, snpidformat = -1L)
snpnames <- data.frame(
format0 = getbdinfo(bdfile_id0)$snps$snpid,
format1 = getbdinfo(bdfile_id1)$snps$snpid,
format2 = getbdinfo(bdfile_id2)$snps$snpid,
formatm1 = getbdinfo(bdfile_idm1)$snps$snpid
)
knitr::kable(snpnames, caption = "SNP IDs by snpidformat")
## ----bdoptions, message = FALSE, warning = FALSE------------------------------
bdfile1a_calc <- tempfile()
vcftobdlegacy(vcffiles = vcf1afile, bdfiles = bdfile1a_calc,
bdoptions = c("aaf", "maf", "rsq"))
bdcalcinfo <- getbdinfo(bdfile1a_calc)
knitr::kable(data.frame(bdcalcinfo$snpinfo),
caption = "Calculated per-SNP statistics", digits = 3)
## ----gen_files----------------------------------------------------------------
gen3afile <- system.file("extdata", "set3a.imp", package = "BinaryDosage")
gen3asample <- system.file("extdata", "set3a.sample", package = "BinaryDosage")
gen3bfile <- system.file("extdata", "set3b.imp", package = "BinaryDosage")
gen3bsample <- system.file("extdata", "set3b.sample", package = "BinaryDosage")
## ----gen_convert, message = FALSE, warning = FALSE----------------------------
bdfile3a <- tempfile()
bdfile3b <- tempfile()
gentobd(genfiles = c(gen3afile, gen3asample),
snpcolumns = c(0L, 2L:5L),
bdfiles = bdfile3a)
gentobd(genfiles = c(gen3bfile, gen3bsample),
snpcolumns = c(0L, 2L:5L),
bdfiles = bdfile3b)
## ----gen_snpcolumns, message = FALSE, warning = FALSE-------------------------
gen3bchrfile <- system.file("extdata", "set3b.chr.imp", package = "BinaryDosage")
bdfile3b_chr <- tempfile()
# chromosome is in column 1, SNP ID in column 2, location in column 3, etc.
gentobd(genfiles = c(gen3bchrfile, gen3bsample), bdfiles = bdfile3b_chr)
## ----gen_impformat, message = FALSE, warning = FALSE--------------------------
gen2bfile <- system.file("extdata", "set2b.imp", package = "BinaryDosage")
sample2bfile <- system.file("extdata", "set2b.sample", package = "BinaryDosage")
bdfile2b <- tempfile()
gentobd(genfiles = c(gen2bfile, sample2bfile),
snpcolumns = c(1L, 3L, 2L, 4L, 5L),
impformat = 1L,
bdfiles = bdfile2b)
## ----gen_gz, message = FALSE, warning = FALSE---------------------------------
gen4bgzfile <- system.file("extdata", "set4b.imp.gz", package = "BinaryDosage")
sample4bfile <- system.file("extdata", "set4b.sample", package = "BinaryDosage")
bdfile4b_gz <- tempfile()
gentobd(genfiles = c(gen4bgzfile, sample4bfile),
snpcolumns = c(1L, 2L, 4L, 5L, 6L),
startcolumn = 7L,
impformat = 2L,
gz = TRUE,
bdfiles = bdfile4b_gz)
## ----merge, message = FALSE, warning = FALSE----------------------------------
bd1afile <- system.file("extdata", "vcf1a.bdose", package = "BinaryDosage")
bd1bfile <- system.file("extdata", "vcf1b.bdose", package = "BinaryDosage")
bd1merged <- tempfile()
bdmerge(mergefiles = bd1merged, bdfiles = c(bd1afile, bd1bfile))
bd1ainfo <- getbdinfo(bd1afile)
bd1binfo <- getbdinfo(bd1bfile)
bd1merinfo <- getbdinfo(bd1merged)
cat("Set 1a subjects:", nrow(bd1ainfo$samples), "\n")
cat("Set 1b subjects:", nrow(bd1binfo$samples), "\n")
cat("Merged subjects:", nrow(bd1merinfo$samples), "\n")
## ----getbdinfo, message = FALSE, warning = FALSE------------------------------
bd1ainfo <- getbdinfo(bdfiles = bd1afile)
## ----bdapply, message = FALSE, warning = FALSE--------------------------------
calculateaaf <- function(dosage, p0, p1, p2) {
mean(dosage, na.rm = TRUE) / 2
}
bd1ainfo <- getbdinfo(bd1afile)
aaf_vals <- unlist(bdapply(bdinfo = bd1ainfo, func = calculateaaf))
aaf_table <- data.frame(snpid = bd1ainfo$snps$snpid, aaf = aaf_vals)
knitr::kable(aaf_table, caption = "Alternate allele frequencies", digits = 4)
## ----bdapply_getaaf, message = FALSE, warning = FALSE-------------------------
aaf_vals2 <- unlist(bdapply(bdinfo = bd1ainfo, func = getaaf))
## ----getsnp, message = FALSE, warning = FALSE---------------------------------
snp_data <- data.frame(getsnp(bdinfo = bd1ainfo, snp = "1:12000:T:C", dosageonly = FALSE))
snp_table <- cbind(sid = bd1ainfo$samples$sid, snp_data)
knitr::kable(head(snp_table, 10), caption = "SNP 1:12000:T:C (first 10 subjects)", digits = 4)
## ----vcfinfo, message = FALSE, warning = FALSE--------------------------------
vcfinfo <- getvcfinfo(vcffiles = vcf1afile, index = TRUE)
aaf_vcf <- unlist(vcfapply(vcfinfo = vcfinfo, func = getaaf))
aaf_compare <- data.frame(snpid = vcfinfo$snps$snpid, aaf = aaf_vcf)
knitr::kable(aaf_compare, caption = "AAF from VCF via vcfapply", digits = 4)
## ----geninfo, message = FALSE, warning = FALSE--------------------------------
geninfo <- getgeninfo(genfiles = c(gen3bchrfile, gen3bsample), index = TRUE)
aaf_gen <- unlist(genapply(geninfo = geninfo, func = getaaf))
aaf_gen_table <- data.frame(snpid = geninfo$snps$snpid, aaf = aaf_gen)
knitr::kable(aaf_gen_table, caption = "AAF from GEN via genapply", digits = 4)
## ----full_workflow, message = FALSE, warning = FALSE--------------------------
library(BinaryDosage)
# --- Input files ---
vcf1afile <- system.file("extdata", "set1a.vcf", package = "BinaryDosage")
vcf1ainfo <- system.file("extdata", "set1a.info", package = "BinaryDosage")
vcf1bfile <- system.file("extdata", "set1b.vcf", package = "BinaryDosage")
vcf1binfo <- system.file("extdata", "set1b.info", package = "BinaryDosage")
gen3afile <- system.file("extdata", "set3a.imp", package = "BinaryDosage")
gen3asample <- system.file("extdata", "set3a.sample", package = "BinaryDosage")
gen3bfile <- system.file("extdata", "set3b.imp", package = "BinaryDosage")
gen3bsample <- system.file("extdata", "set3b.sample", package = "BinaryDosage")
# --- Convert to binary dosage ---
bdfile1a <- tempfile(); bdfile1b <- tempfile()
bdfile3a <- tempfile(); bdfile3b <- tempfile()
vcftobdlegacy(vcffiles = c(vcf1afile, vcf1ainfo), bdfiles = bdfile1a)
vcftobdlegacy(vcffiles = c(vcf1bfile, vcf1binfo), bdfiles = bdfile1b)
gentobd(genfiles = c(gen3afile, gen3asample), snpcolumns = c(0L, 2L:5L), bdfiles = bdfile3a)
gentobd(genfiles = c(gen3bfile, gen3bsample), snpcolumns = c(0L, 2L:5L), bdfiles = bdfile3b)
# --- Merge ---
mergebd1 <- tempfile(); mergebd3 <- tempfile()
bdmerge(mergefiles = mergebd1, bdfiles = c(bdfile1a, bdfile1b))
bdmerge(mergefiles = mergebd3, bdfiles = c(bdfile3a, bdfile3b))
# --- Apply function ---
mergebd1info <- getbdinfo(mergebd1)
mergebd3info <- getbdinfo(mergebd3)
aaf1 <- unlist(bdapply(mergebd1info, getaaf))
aaf3 <- unlist(bdapply(mergebd3info, getaaf))
aaf <- cbind(mergebd1info$snps[, c("chromosome", "location", "snpid")],
aaf_vcf = aaf1, aaf_gen = aaf3)
knitr::kable(aaf, caption = "AAF: VCF vs GEN sources", digits = 4, row.names = FALSE)
## ----full_workflow_snp, message = FALSE, warning = FALSE----------------------
# --- Extract SNP 6 ---
set1snp6 <- getsnp(mergebd1info, 6L)
set3snp6 <- getsnp(mergebd3info, 6L)
snpdf <- data.frame(
subjectid = mergebd1info$samples$sid,
dosage_vcf = unlist(set1snp6),
dosage_gen = unlist(set3snp6)
)
knitr::kable(head(snpdf, 10),
caption = "SNP 6 dosage: VCF vs GEN (first 10 subjects)",
digits = 4, row.names = FALSE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.