inst/doc/rehh.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(comment = ">",fig.height = 4.5, fig.width = 4.5, fig.show = "hold")

## ----library, message = FALSE-------------------------------------------------
library(rehh)

## ----make_examples, results = 'hide'------------------------------------------
make.example.files()

## ----minimalcodeexample, fig.align = 'center', results = "hide"---------------
hh <-                  # data input
  data2haplohh(
    hap_file = "bta12_cgu.hap",
    map_file = "map.inp",
    chr.name = "12",
    allele_coding = "map"
  )
scan <- scan_hh(hh)    # calculation of EHH and integration
                       # (combine results from different chromosomes)
ihs <- ihh2ihs(scan)   # log ratio for alleles and standardization
manhattanplot(ihs)     # plot of the statistics

## ----map_inp------------------------------------------------------------------
# show first 6 lines of file
cat(readLines("map.inp", n = 6), sep = "\n")

## ----eval = FALSE-------------------------------------------------------------
#  ?data2haplohh

## ----example1-----------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_cgu.hap",
                   map_file = "map.inp",
                   chr.name = 12,
                   allele_coding = "map")

## ----error = TRUE-------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_cgu.hap",
                   map_file = "map.inp",
                   allele_coding = "map") 

## ----error = TRUE-------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_cgu.hap",
                   map_file = "map.inp",
                   chr.name = 18,
                   allele_coding = "map")

## ----example2-----------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_cgu.thap",
                   map_file = "map.inp",
                   chr.name = 12,
                   allele_coding = "map",
                   haplotype.in.columns = TRUE)

## ----example3-----------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_hapguess_switch.out",
                   map_file = "map.inp",
                   chr.name = 12,
                   popsel = 7,
                   allele_coding = "map")

## ----error = TRUE-------------------------------------------------------------
hh <- data2haplohh(hap_file = "bta12_hapguess_switch.out",
                   map_file = "map.inp",
                   chr.name = 12,
                   allele_coding = "map")

## ----vcf_example, eval = requireNamespace("data.table", quietly = TRUE) & requireNamespace("R.utils", quietly = TRUE)----
hh <- data2haplohh(hap_file = "bta12_cgu.vcf.gz",
                   polarize_vcf = FALSE,
                   vcf_reader = "data.table")

## ----ms_example, eval = requireNamespace("gap", quietly = TRUE)---------------
hh <- data2haplohh(hap_file = "ms.out",
                   chr.name = 2,
                   position_scaling_factor = 1000)

## -----------------------------------------------------------------------------
hh_subset = subset(hh, select.hap = 1:5, min_maf = 0)

## -----------------------------------------------------------------------------
hh_subset = subset(hh, select.mrk = -1)

## ----eval=FALSE---------------------------------------------------------------
#  ?calc_ehh

## -----------------------------------------------------------------------------
#example haplohh object (280 haplotypes, 1424 SNPs) see ?haplohh_cgu_bta12 for details
data(haplohh_cgu_bta12)
#computing EHH statistics for the focal SNP with name "F1205400"
#which displays a strong signal of selection
res <- calc_ehh(haplohh_cgu_bta12, 
                mrk = "F1205400", 
                include_nhaplo = TRUE)

## -----------------------------------------------------------------------------
res$mrk.name

## -----------------------------------------------------------------------------
res$freq

## -----------------------------------------------------------------------------
res$ehh

## -----------------------------------------------------------------------------
res$ihh

## ---- ehh, fig.align = 'center', fig.cap = "Graphical output of the plot.ehh() function", fig.pos = '!h', fig.lp = 'fig:'----
plot(res)

## ----ehh2, fig.align = 'center', fig.cap = "Graphical output of the plot.ehh() function", fig.pos = '!h', fig.lp = 'fig:'----
plot(calc_ehh(haplohh_cgu_bta12, 
              mrk = "F1205400", 
              phased = FALSE),
     xlim = c(2.55E7, 3.05E7))

## ---- eval=FALSE--------------------------------------------------------------
#  ?calc_ehhs

## -----------------------------------------------------------------------------
data(haplohh_cgu_bta12)
res <- calc_ehhs(haplohh_cgu_bta12, 
                 mrk = "F1205400", 
                 include_nhaplo = TRUE)

## -----------------------------------------------------------------------------
res$mrk.name

## -----------------------------------------------------------------------------
res$ehhs 

## -----------------------------------------------------------------------------
res$IES

## -----------------------------------------------------------------------------
res$INES

## ----ehhs, fig.align = 'center', fig.cap = 'Graphical output of the plot.ehhs() function', fig.pos = '!h', fig.lp = 'fig:'----
plot(res)

## ----nehhs, fig.align = 'center', fig.cap = 'Graphical output of the plot.ehhs() function', fig.pos = '!h', fig.lp = 'fig:'----
plot(res, nehhs = TRUE)

## ----ehhs2, fig.align = 'center', fig.cap = 'Graphical output of the plot.ehhs() function', fig.pos = '!h', fig.lp = 'fig:'----
plot(calc_ehhs(haplohh_cgu_bta12,
               mrk = "F1205400",
               phased = FALSE))

## ----eval=FALSE---------------------------------------------------------------
#  ?scan_hh

## -----------------------------------------------------------------------------
data(haplohh_cgu_bta12)
scan <- scan_hh(haplohh_cgu_bta12)

## -----------------------------------------------------------------------------
scan[453:459,]

## -----------------------------------------------------------------------------
# perform scan using scan_hh
system.time(scan <- scan_hh(haplohh_cgu_bta12))

## ----slow_scan----------------------------------------------------------------
# perform scan applying calc_ehh and calc_ehhs to each marker
slow_scan_hh <- function(haplohh) {
  # create empty vectors of size nmrk
  IHH_A <- IHH_D <- IES <- INES <- vector("numeric", nmrk(haplohh))
  # invoke calc_ehh and calc_ehhs for each marker
  for (i in 1:nmrk(haplohh)) {
    res <- calc_ehh(haplohh, mrk = i)
    IHH_A[i] <- res$ihh["IHH_A"]
    IHH_D[i] <- res$ihh["IHH_D"]
    res <- calc_ehhs(haplohh, mrk = i)
    IES[i] <- res$IES
    INES[i] <- res$INES
  }
  # create data frame (the return value of this function)
  data.frame(IHH_A = IHH_A, 
             IHH_D = IHH_D,
             IES = IES,
             INES = INES)
}
system.time(slow_scan <- slow_scan_hh(haplohh_cgu_bta12))

## ----scancomp-----------------------------------------------------------------
identical(slow_scan[, "IHH_A"], scan[, "IHH_A"])
identical(slow_scan[, "IHH_D"], scan[, "IHH_D"])
identical(slow_scan[, "IES"], scan[, "IES"])
identical(slow_scan[, "INES"], scan[, "INES"])

## ----ihh2ihs1, eval = FALSE---------------------------------------------------
#  ## demo code - no data files for all chromosomes provided
#  for(i in 1:29) {
#    # haplotype file name for each chromosome
#    hap_file = paste("hap_chr_", i, ".cgu", sep = "")
#    # create internal representation
#    hh <- data2haplohh(hap_file = hap_file,
#                       map_file = "map.inp",
#                       chr.name = i,
#                       allele_coding = "map")
#    # perform scan on a single chromosome (calculate iHH values)
#    scan <- scan_hh(hh)
#    # concatenate chromosome-wise data frames to
#    # a data frame for the whole genome
#    # (more efficient ways certainly exist...)
#    if (i == 1) {
#      wgscan <- scan
#    } else {
#      wgscan <- rbind(wgscan, scan)
#    }
#  }
#  # calculate genome-wide iHS values
#  wgscan.ihs <- ihh2ihs(wgscan)

## ----ihh2ihs2-----------------------------------------------------------------
library(rehh.data)
data(wgscan.cgu)
wgscan.ihs.cgu <- ihh2ihs(wgscan.cgu)

## -----------------------------------------------------------------------------
head(wgscan.ihs.cgu$ihs)

## -----------------------------------------------------------------------------
head(wgscan.ihs.cgu$frequency.class)

## ----ines2rsb2----------------------------------------------------------------
data(wgscan.cgu) ; data(wgscan.eut)
## results from a genome scan (44,057 SNPs) see ?wgscan.eut and ?wgscan.cgu for details
rsb.cgu_eut <- ines2rsb(scan_pop1 = wgscan.cgu,
                        scan_pop2 = wgscan.eut,
                        popname1 = "CGU",
                        popname2 = "EUT")

## -----------------------------------------------------------------------------
head(rsb.cgu_eut)

## ----ies2xpehh2---------------------------------------------------------------
data(wgscan.cgu) ; data(wgscan.eut)
## results from a genome scan (44,057 SNPs) see ?wgscan.eut and ?wgscan.cgu for details
xpehh.cgu_eut <- ies2xpehh(scan_pop1 =  wgscan.cgu,
                           scan_pop2 =  wgscan.eut,
                           popname1 = "CGU",
                           popname2 = "EUT")

## -----------------------------------------------------------------------------
head(xpehh.cgu_eut)

## ----candidate_regions--------------------------------------------------------
cr.cgu <- calc_candidate_regions(wgscan.ihs.cgu,
                                 threshold = 4,
                                 pval = TRUE,
                                 window_size = 1E6,
                                 overlap = 1E5,
                                 min_n_extr_mrk = 2)
cr.cgu

## ----freqbin, fig.align='center', fig.lp='fig:', fig.cap='Graphical output of the freqbinplot() function', fig.pos='!h'----
freqbinplot(wgscan.ihs.cgu)

## ----comp, echo = TRUE, fig.align = 'center', fig.lp = 'fig:', fig.cap = 'Comparison of Rsb and XP-EHH values across the CGU and EUT populations', fig.pos = "!h"----
plot(rsb.cgu_eut[, "RSB_CGU_EUT"],
     xpehh.cgu_eut[, "XPEHH_CGU_EUT"],
     xlab = "Rsb",
     ylab = "XP-EHH",
     pch = ".",
     xlim = c(-7.5, 7.5),
     ylim = c(-7.5, 7.5))
# add red circle for marker with name "F1205400"
points(rsb.cgu_eut["F1205400", "RSB_CGU_EUT"],
       xpehh.cgu_eut["F1205400", "XPEHH_CGU_EUT"],
       col = "red")
# add dashed diagonal
abline(a = 0, b = 1, lty = 2)

## ----distribplot, fig.align = 'center', fig.lp = 'fig:', fig.cap = 'Graphical output of the distribplot() function', fig.pos="!h"----
distribplot(wgscan.ihs.cgu$ihs$IHS, xlab = "iHS")

## ----qqplot, fig.align = 'center', fig.lp='fig:', fig.cap = 'Graphical output of the distribplot() function', fig.pos = "!h"----
distribplot(wgscan.ihs.cgu$ihs$IHS, 
            xlab = "iHS", 
            qqplot = TRUE)

## ----manhattanplot, fig.align = 'center', fig.width = 7, fig.lp = 'fig:', fig.cap = 'Graphical output of the manhattanplot() function', fig.pos = '!h'----
manhattanplot(wgscan.ihs.cgu,
              main = "iHS (CGU cattle breed)")

## ----pval, fig.align = 'center', fig.width = 7, fig.lp = 'fig:', fig.cap = "Graphical output of the manhattanplot() function", fig.pos = '!h'----
manhattanplot(wgscan.ihs.cgu,
              pval = TRUE,
              threshold = 4,
              main = "p-value of iHS (CGU cattle breed)")

## ----manhattanplotsub, fig.align = 'center', fig.width = 7, fig.lp = 'fig:', fig.cap = 'Graphical output of the manhattanplot() function', fig.pos = '!h'----
# re-define colors
palette(c("red", "green"))
manhattanplot(wgscan.ihs.cgu, 
              pval = TRUE,
              threshold = 4, 
              chr.name = c("1", "4", "5", "12"), 
              main = "iHS (CGU cattle breed)", 
              cr = cr.cgu,
              mrk = "F1205400",
              inset = 1E+7,
              resolution = c(200000, 0.05))
# set back to default colors
palette("default")

## ----rehh2qqman---------------------------------------------------------------
# extract data frame from result list
ihs <- wgscan.ihs.cgu$ihs
# create new data frame
wgscan.cgu.ihs.qqman <- data.frame(
    CHR = as.integer(factor(ihs$CHR, 
                            levels = unique(ihs$CHR))),
                               # chromosomes as integers
    BP = ihs$POSITION,         # base pairs
    P = 10**(-ihs$LOGPVALUE),  # transform back to p-values
    SNP = row.names(ihs)       # SNP names
    )

## ----qqman, eval = requireNamespace("qqman", quietly = TRUE), fig.align = 'center', fig.width = 7, fig.lp = 'fig:', fig.cap = 'Graphical output of the qqman::manhattan() function', fig.pos = '!h', message = FALSE----
library(qqman)
qqman::manhattan(wgscan.cgu.ihs.qqman,
                 col = c("red","green"),
                 chrlabs = unique(ihs$CHR),
                 suggestiveline = 4,
                 highlight = "F1205400",
                 annotatePval = 0.0001)

## ----plothaplo, results = "hide", fig.align = 'center', fig.width = 7, fig.height = 5, fig.lp = 'fig:', fig.cap = 'Graphical output of the plot.haplohh() function', fig.pos = '!h'----
hh_subset <- subset(haplohh_cgu_bta12, select.mrk = 350:550)
oldpar <- par(mar = c(3, 2, 2, 2) + 0.1)
plot(
  hh_subset,
  mrk = "F1205400",
  group_by_allele = TRUE,
  ignore.distance = TRUE,
  col = c(NA, "red"),
  linecol = c("lightblue", "lightpink"),
  mrk.col = "black",
  cex = 0.1,
  pos.lab.hap = "none",
  pos.lab.mrk = "none"
)
par(oldpar)

## -----------------------------------------------------------------------------
data(haplohh_cgu_bta12)
furcation <- calc_furcation(haplohh_cgu_bta12,
                            mrk = "F1205400")

## ----furc, fig.align = 'center', fig.width = 7, fig.height = 7.5, fig.lp = 'fig:', fig.cap = 'Graphical output of the plot.furcation() function', fig.pos = "!h"----
plot(furcation)

## ----furczoom, fig.align = 'center', fig.width = 7, fig.height = 7.5, fig.lp = 'fig:', fig.cap = 'Graphical output of the plot.furcation() function', fig.pos = "!h"----
plot(furcation,
     xlim = c(2.8E+7, 3E+7),
     lwd = 0.05,
     hap.names = hap.names(haplohh_cgu_bta12),
     cex.lab = 0.3)

## -----------------------------------------------------------------------------
newick <- as.newick(furcation,
                    allele = 0,
                    side = "left",
                    hap.names = hap.names(haplohh_cgu_bta12))

## ----newick, eval = requireNamespace("ape", quietly = TRUE), fig.align = 'center', fig.width = 6, fig.height = 6, fig.lp = 'fig:', fig.cap = 'Graphical output of the ape::plot.phylo() function', fig.pos = "!h"----
library(ape)
tree <- ape::read.tree(text = newick)
plot(tree, 
     cex = 0.5, 
     direction = "leftwards", 
     edge.color = "blue",
     underscore = TRUE,
     no.margin = TRUE)

## -----------------------------------------------------------------------------
haplen <- calc_haplen(furcation)

## -----------------------------------------------------------------------------
haplen$mrk.name

## -----------------------------------------------------------------------------
haplen$position

## -----------------------------------------------------------------------------
haplen$xlim

## -----------------------------------------------------------------------------
head(haplen$haplen)

## ----haplo, fig.align = 'center', fig.width = 7, fig.height = 7.5, fig.lp = 'fig:', fig.cap = 'Graphical output of the plot.haplen() function', fig.pos = "!h"----
plot(haplen)

## ----haplozoom, fig.align = 'center', fig.width = 7, fig.height = 7.5, fig.lp = 'fig:', fig.cap = 'Graphical output of the plot.haplen() function', fig.pos = "!h"----
plot(haplen,
     allele = 0,
     xlim = c(haplen$xlim[1], haplen$position),
     hap.names = hap.names(haplohh_cgu_bta12),
     cex.lab = 0.3,
     legend.xy.coords = "none")

## -----------------------------------------------------------------------------
# finding the index number of marker "F1205400"
mrk.nr = which(mrk.names(haplohh_cgu_bta12) == "F1205400")
# subset of all markers on the "left" of the focal one
hh_left = subset(haplohh_cgu_bta12, select.mrk = 1:mrk.nr)
# check for duplicated rows
which(duplicated(haplo(hh_left)))
# row 248 is identical to a previous row, but which one?
# get the other row by a search in opposite direction
which(duplicated(haplo(hh_left), fromLast = TRUE))
# extract the corresponding haplotype names
hap.names(hh_left)[c(236, 248)]

## -----------------------------------------------------------------------------
remove.example.files()

Try the rehh package in your browser

Any scripts or data that you put into this service are public.

rehh documentation built on Sept. 15, 2021, 5:06 p.m.