Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(comment = ">",fig.height = 4.5, fig.width = 4.5, fig.show = "hold")
## ----library, message = FALSE-------------------------------------------------
library(rehh)
## ----make_examples, results = 'hide'------------------------------------------
make.example.files()
## ----minimalcodeexample, fig.align = 'center', results = "hide"---------------
hh <- # data input
data2haplohh(
hap_file = "bta12_cgu.hap",
map_file = "map.inp",
chr.name = "12",
allele_coding = "map"
)
scan <- scan_hh(hh) # calculation of EHH and integration
# (combine results from different chromosomes)
ihs <- ihh2ihs(scan) # log ratio for alleles and standardization
manhattanplot(ihs) # plot of the statistics
## ----map_inp------------------------------------------------------------------
# show first 6 lines of file
cat(readLines("map.inp", n = 6), sep = "\n")
## ----eval = FALSE-------------------------------------------------------------
# ?data2haplohh
## ----example1-----------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_cgu.hap",
map_file = "map.inp",
chr.name = 12,
allele_coding = "map")
## ----error = TRUE-------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_cgu.hap",
map_file = "map.inp",
allele_coding = "map")
## ----error = TRUE-------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_cgu.hap",
map_file = "map.inp",
chr.name = 18,
allele_coding = "map")
## ----example2-----------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_cgu.thap",
map_file = "map.inp",
chr.name = 12,
allele_coding = "map",
haplotype.in.columns = TRUE)
## ----example3-----------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_hapguess_switch.out",
map_file = "map.inp",
chr.name = 12,
popsel = 7,
allele_coding = "map")
## ----error = TRUE-------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_hapguess_switch.out",
map_file = "map.inp",
chr.name = 12,
allele_coding = "map")
## ----vcf_example, eval = requireNamespace("data.table", quietly = TRUE) & requireNamespace("R.utils", quietly = TRUE)----
hh <- data2haplohh(hap_file = "bta12_cgu.vcf.gz",
polarize_vcf = FALSE,
vcf_reader = "data.table")
## ----ms_example, eval = requireNamespace("gap", quietly = TRUE)---------------
hh <- data2haplohh(hap_file = "ms.out",
chr.name = 2,
position_scaling_factor = 1000)
## -----------------------------------------------------------------------------
hh_subset = subset(hh, select.hap = 1:5, min_maf = 0)
## -----------------------------------------------------------------------------
hh_subset = subset(hh, select.mrk = -1)
## ----eval=FALSE---------------------------------------------------------------
# ?calc_ehh
## -----------------------------------------------------------------------------
#example haplohh object (280 haplotypes, 1424 SNPs) see ?haplohh_cgu_bta12 for details
data(haplohh_cgu_bta12)
#computing EHH statistics for the focal SNP with name "F1205400"
#which displays a strong signal of selection
res <- calc_ehh(haplohh_cgu_bta12,
mrk = "F1205400",
include_nhaplo = TRUE)
## -----------------------------------------------------------------------------
res$mrk.name
## -----------------------------------------------------------------------------
res$freq
## -----------------------------------------------------------------------------
res$ehh
## -----------------------------------------------------------------------------
res$ihh
## ---- ehh, fig.align = 'center', fig.cap = "Graphical output of the plot.ehh() function", fig.pos = '!h', fig.lp = 'fig:'----
plot(res)
## ----ehh2, fig.align = 'center', fig.cap = "Graphical output of the plot.ehh() function", fig.pos = '!h', fig.lp = 'fig:'----
plot(calc_ehh(haplohh_cgu_bta12,
mrk = "F1205400",
phased = FALSE),
xlim = c(2.55E7, 3.05E7))
## ---- eval=FALSE--------------------------------------------------------------
# ?calc_ehhs
## -----------------------------------------------------------------------------
data(haplohh_cgu_bta12)
res <- calc_ehhs(haplohh_cgu_bta12,
mrk = "F1205400",
include_nhaplo = TRUE)
## -----------------------------------------------------------------------------
res$mrk.name
## -----------------------------------------------------------------------------
res$ehhs
## -----------------------------------------------------------------------------
res$IES
## -----------------------------------------------------------------------------
res$INES
## ----ehhs, fig.align = 'center', fig.cap = 'Graphical output of the plot.ehhs() function', fig.pos = '!h', fig.lp = 'fig:'----
plot(res)
## ----nehhs, fig.align = 'center', fig.cap = 'Graphical output of the plot.ehhs() function', fig.pos = '!h', fig.lp = 'fig:'----
plot(res, nehhs = TRUE)
## ----ehhs2, fig.align = 'center', fig.cap = 'Graphical output of the plot.ehhs() function', fig.pos = '!h', fig.lp = 'fig:'----
plot(calc_ehhs(haplohh_cgu_bta12,
mrk = "F1205400",
phased = FALSE))
## ----eval=FALSE---------------------------------------------------------------
# ?scan_hh
## -----------------------------------------------------------------------------
data(haplohh_cgu_bta12)
scan <- scan_hh(haplohh_cgu_bta12)
## -----------------------------------------------------------------------------
scan[453:459,]
## -----------------------------------------------------------------------------
# perform scan using scan_hh
system.time(scan <- scan_hh(haplohh_cgu_bta12))
## ----slow_scan----------------------------------------------------------------
# perform scan applying calc_ehh and calc_ehhs to each marker
slow_scan_hh <- function(haplohh) {
# create empty vectors of size nmrk
IHH_A <- IHH_D <- IES <- INES <- vector("numeric", nmrk(haplohh))
# invoke calc_ehh and calc_ehhs for each marker
for (i in 1:nmrk(haplohh)) {
res <- calc_ehh(haplohh, mrk = i)
IHH_A[i] <- res$ihh["IHH_A"]
IHH_D[i] <- res$ihh["IHH_D"]
res <- calc_ehhs(haplohh, mrk = i)
IES[i] <- res$IES
INES[i] <- res$INES
}
# create data frame (the return value of this function)
data.frame(IHH_A = IHH_A,
IHH_D = IHH_D,
IES = IES,
INES = INES)
}
system.time(slow_scan <- slow_scan_hh(haplohh_cgu_bta12))
## ----scancomp-----------------------------------------------------------------
identical(slow_scan[, "IHH_A"], scan[, "IHH_A"])
identical(slow_scan[, "IHH_D"], scan[, "IHH_D"])
identical(slow_scan[, "IES"], scan[, "IES"])
identical(slow_scan[, "INES"], scan[, "INES"])
## ----ihh2ihs1, eval = FALSE---------------------------------------------------
# ## demo code - no data files for all chromosomes provided
# for(i in 1:29) {
# # haplotype file name for each chromosome
# hap_file = paste("hap_chr_", i, ".cgu", sep = "")
# # create internal representation
# hh <- data2haplohh(hap_file = hap_file,
# map_file = "map.inp",
# chr.name = i,
# allele_coding = "map")
# # perform scan on a single chromosome (calculate iHH values)
# scan <- scan_hh(hh)
# # concatenate chromosome-wise data frames to
# # a data frame for the whole genome
# # (more efficient ways certainly exist...)
# if (i == 1) {
# wgscan <- scan
# } else {
# wgscan <- rbind(wgscan, scan)
# }
# }
# # calculate genome-wide iHS values
# wgscan.ihs <- ihh2ihs(wgscan)
## ----ihh2ihs2-----------------------------------------------------------------
library(rehh.data)
data(wgscan.cgu)
wgscan.ihs.cgu <- ihh2ihs(wgscan.cgu)
## -----------------------------------------------------------------------------
head(wgscan.ihs.cgu$ihs)
## -----------------------------------------------------------------------------
head(wgscan.ihs.cgu$frequency.class)
## ----ines2rsb2----------------------------------------------------------------
data(wgscan.cgu) ; data(wgscan.eut)
## results from a genome scan (44,057 SNPs) see ?wgscan.eut and ?wgscan.cgu for details
rsb.cgu_eut <- ines2rsb(scan_pop1 = wgscan.cgu,
scan_pop2 = wgscan.eut,
popname1 = "CGU",
popname2 = "EUT")
## -----------------------------------------------------------------------------
head(rsb.cgu_eut)
## ----ies2xpehh2---------------------------------------------------------------
data(wgscan.cgu) ; data(wgscan.eut)
## results from a genome scan (44,057 SNPs) see ?wgscan.eut and ?wgscan.cgu for details
xpehh.cgu_eut <- ies2xpehh(scan_pop1 = wgscan.cgu,
scan_pop2 = wgscan.eut,
popname1 = "CGU",
popname2 = "EUT")
## -----------------------------------------------------------------------------
head(xpehh.cgu_eut)
## ----candidate_regions--------------------------------------------------------
cr.cgu <- calc_candidate_regions(wgscan.ihs.cgu,
threshold = 4,
pval = TRUE,
window_size = 1E6,
overlap = 1E5,
min_n_extr_mrk = 2)
cr.cgu
## ----freqbin, fig.align='center', fig.lp='fig:', fig.cap='Graphical output of the freqbinplot() function', fig.pos='!h'----
freqbinplot(wgscan.ihs.cgu)
## ----comp, echo = TRUE, fig.align = 'center', fig.lp = 'fig:', fig.cap = 'Comparison of Rsb and XP-EHH values across the CGU and EUT populations', fig.pos = "!h"----
plot(rsb.cgu_eut[, "RSB_CGU_EUT"],
xpehh.cgu_eut[, "XPEHH_CGU_EUT"],
xlab = "Rsb",
ylab = "XP-EHH",
pch = ".",
xlim = c(-7.5, 7.5),
ylim = c(-7.5, 7.5))
# add red circle for marker with name "F1205400"
points(rsb.cgu_eut["F1205400", "RSB_CGU_EUT"],
xpehh.cgu_eut["F1205400", "XPEHH_CGU_EUT"],
col = "red")
# add dashed diagonal
abline(a = 0, b = 1, lty = 2)
## ----distribplot, fig.align = 'center', fig.lp = 'fig:', fig.cap = 'Graphical output of the distribplot() function', fig.pos="!h"----
distribplot(wgscan.ihs.cgu$ihs$IHS, xlab = "iHS")
## ----qqplot, fig.align = 'center', fig.lp='fig:', fig.cap = 'Graphical output of the distribplot() function', fig.pos = "!h"----
distribplot(wgscan.ihs.cgu$ihs$IHS,
xlab = "iHS",
qqplot = TRUE)
## ----manhattanplot, fig.align = 'center', fig.width = 7, fig.lp = 'fig:', fig.cap = 'Graphical output of the manhattanplot() function', fig.pos = '!h'----
manhattanplot(wgscan.ihs.cgu,
main = "iHS (CGU cattle breed)")
## ----pval, fig.align = 'center', fig.width = 7, fig.lp = 'fig:', fig.cap = "Graphical output of the manhattanplot() function", fig.pos = '!h'----
manhattanplot(wgscan.ihs.cgu,
pval = TRUE,
threshold = 4,
main = "p-value of iHS (CGU cattle breed)")
## ----manhattanplotsub, fig.align = 'center', fig.width = 7, fig.lp = 'fig:', fig.cap = 'Graphical output of the manhattanplot() function', fig.pos = '!h'----
# re-define colors
palette(c("red", "green"))
manhattanplot(wgscan.ihs.cgu,
pval = TRUE,
threshold = 4,
chr.name = c("1", "4", "5", "12"),
main = "iHS (CGU cattle breed)",
cr = cr.cgu,
mrk = "F1205400",
inset = 1E+7,
resolution = c(200000, 0.05))
# set back to default colors
palette("default")
## ----rehh2qqman---------------------------------------------------------------
# extract data frame from result list
ihs <- wgscan.ihs.cgu$ihs
# create new data frame
wgscan.cgu.ihs.qqman <- data.frame(
CHR = as.integer(factor(ihs$CHR,
levels = unique(ihs$CHR))),
# chromosomes as integers
BP = ihs$POSITION, # base pairs
P = 10**(-ihs$LOGPVALUE), # transform back to p-values
SNP = row.names(ihs) # SNP names
)
## ----qqman, eval = requireNamespace("qqman", quietly = TRUE), fig.align = 'center', fig.width = 7, fig.lp = 'fig:', fig.cap = 'Graphical output of the qqman::manhattan() function', fig.pos = '!h', message = FALSE----
library(qqman)
qqman::manhattan(wgscan.cgu.ihs.qqman,
col = c("red","green"),
chrlabs = unique(ihs$CHR),
suggestiveline = 4,
highlight = "F1205400",
annotatePval = 0.0001)
## ----plothaplo, results = "hide", fig.align = 'center', fig.width = 7, fig.height = 5, fig.lp = 'fig:', fig.cap = 'Graphical output of the plot.haplohh() function', fig.pos = '!h'----
hh_subset <- subset(haplohh_cgu_bta12, select.mrk = 350:550)
oldpar <- par(mar = c(3, 2, 2, 2) + 0.1)
plot(
hh_subset,
mrk = "F1205400",
group_by_allele = TRUE,
ignore.distance = TRUE,
col = c(NA, "red"),
linecol = c("lightblue", "lightpink"),
mrk.col = "black",
cex = 0.1,
pos.lab.hap = "none",
pos.lab.mrk = "none"
)
par(oldpar)
## -----------------------------------------------------------------------------
data(haplohh_cgu_bta12)
furcation <- calc_furcation(haplohh_cgu_bta12,
mrk = "F1205400")
## ----furc, fig.align = 'center', fig.width = 7, fig.height = 7.5, fig.lp = 'fig:', fig.cap = 'Graphical output of the plot.furcation() function', fig.pos = "!h"----
plot(furcation)
## ----furczoom, fig.align = 'center', fig.width = 7, fig.height = 7.5, fig.lp = 'fig:', fig.cap = 'Graphical output of the plot.furcation() function', fig.pos = "!h"----
plot(furcation,
xlim = c(2.8E+7, 3E+7),
lwd = 0.05,
hap.names = hap.names(haplohh_cgu_bta12),
cex.lab = 0.3)
## -----------------------------------------------------------------------------
newick <- as.newick(furcation,
allele = 0,
side = "left",
hap.names = hap.names(haplohh_cgu_bta12))
## ----newick, eval = requireNamespace("ape", quietly = TRUE), fig.align = 'center', fig.width = 6, fig.height = 6, fig.lp = 'fig:', fig.cap = 'Graphical output of the ape::plot.phylo() function', fig.pos = "!h"----
library(ape)
tree <- ape::read.tree(text = newick)
plot(tree,
cex = 0.5,
direction = "leftwards",
edge.color = "blue",
underscore = TRUE,
no.margin = TRUE)
## -----------------------------------------------------------------------------
haplen <- calc_haplen(furcation)
## -----------------------------------------------------------------------------
haplen$mrk.name
## -----------------------------------------------------------------------------
haplen$position
## -----------------------------------------------------------------------------
haplen$xlim
## -----------------------------------------------------------------------------
head(haplen$haplen)
## ----haplo, fig.align = 'center', fig.width = 7, fig.height = 7.5, fig.lp = 'fig:', fig.cap = 'Graphical output of the plot.haplen() function', fig.pos = "!h"----
plot(haplen)
## ----haplozoom, fig.align = 'center', fig.width = 7, fig.height = 7.5, fig.lp = 'fig:', fig.cap = 'Graphical output of the plot.haplen() function', fig.pos = "!h"----
plot(haplen,
allele = 0,
xlim = c(haplen$xlim[1], haplen$position),
hap.names = hap.names(haplohh_cgu_bta12),
cex.lab = 0.3,
legend.xy.coords = "none")
## -----------------------------------------------------------------------------
# finding the index number of marker "F1205400"
mrk.nr = which(mrk.names(haplohh_cgu_bta12) == "F1205400")
# subset of all markers on the "left" of the focal one
hh_left = subset(haplohh_cgu_bta12, select.mrk = 1:mrk.nr)
# check for duplicated rows
which(duplicated(haplo(hh_left)))
# row 248 is identical to a previous row, but which one?
# get the other row by a search in opposite direction
which(duplicated(haplo(hh_left), fromLast = TRUE))
# extract the corresponding haplotype names
hap.names(hh_left)[c(236, 248)]
## -----------------------------------------------------------------------------
remove.example.files()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.