inst/doc/vignette-hierinf.R

## -----------------------------------------------------------------------------
# load the package
library(hierinf)

# random number generator (for parallel computing)
RNGkind("L'Ecuyer-CMRG")

# We use a small build-in dataset for our toy example.
data(simGWAS)

# The genotype, phenotype and the control variables are saved in
# different objects.
sim.geno  <- simGWAS$x
sim.pheno <- simGWAS$y
sim.clvar <- simGWAS$clvar

## -----------------------------------------------------------------------------
# Define the second level of the tree structure.
block <- data.frame("colname" = paste0("SNP.", 1:1000),
                    "block" = rep(c("chrom 1", "chrom 2"), each = 500),
                    stringsAsFactors = FALSE)

# Cluster the SNPs
dendr <- cluster_var(x = sim.geno,
                     block = block)
                     # # the following arguments have to be specified
                     # # for parallel computation
                     # parallel = "multicore",
                     # ncpus = 2)

## ----eval=FALSE---------------------------------------------------------------
#  # Store the positions of the SNPs.
#  position <- data.frame("colnames" = paste0("SNP.", 1:1000),
#                         "position" = seq(from = 1, to = 1000),
#                         stringsAsFactors = FALSE)
#  
#  
#  # Build the hierarchical tree based on the position
#  # The argument block defines the second level of the tree structure.
#  dendr.pos <- cluster_position(position = position,
#                                block = block)
#                                # # the following arguments have to be
#                                # # specified for parallel computation
#                                # parallel = "multicore",
#                                # ncpus = 2)

## -----------------------------------------------------------------------------
# Test the hierarchy using multi sample split
set.seed(1234)
result <- test_hierarchy(x = sim.geno,
                         y = sim.pheno,
                         clvar = sim.clvar,
                         # alternatively: dendr = dendr.pos
                         dendr = dendr,
                         family = "binomial")
                         # # the following arguments have to be
                         # # specified for parallel computation
                         # parallel = "multicore",
                         # ncpus = 2)

## -----------------------------------------------------------------------------
print(result, n.terms = 4)

## -----------------------------------------------------------------------------
(coln.cluster <- result$res.hierarchy[[2, "significant.cluster"]])

## -----------------------------------------------------------------------------
compute_r2(x = sim.geno, y = sim.pheno, clvar = sim.clvar,
           res.test.hierarchy = result, family = "binomial",
           colnames.cluster = coln.cluster)

## -----------------------------------------------------------------------------
# The datasets need to be stored in different elements of a list.
# Note that the order has to be the same for all the lists.
# As a simple example, we artificially split the observations of the
# toy dataset in two parts, i.e. two datasets.
set.seed(89)
ind1 <- sample(1:500, 250)
ind2 <- setdiff(1:500, ind1)
sim.geno.2dat  <- list(sim.geno[ind1, ],
                       sim.geno[ind2, ])
sim.clvar.2dat <- list(sim.clvar[ind1, ],
                       sim.clvar[ind2, ])
sim.pheno.2dat <- list(sim.pheno[ind1],
                       sim.pheno[ind2])

# Cluster the SNPs
dendr <- cluster_var(x = sim.geno.2dat,
                     block = block)
                     # # the following arguments have to be specified
                     # # for parallel computation
                     # parallel = "multicore",
                     # ncpus = 2)

# Test the hierarchy using multi sample split
set.seed(1234)
result <- test_hierarchy(x = sim.geno.2dat,
                         y = sim.pheno.2dat,
                         clvar = sim.clvar.2dat,
                         dendr = dendr,
                         family = "binomial")
                         # # the following arguments have to be
                         # # specified for parallel computation
                         # parallel = "multicore",
                         # ncpus = 2)

## -----------------------------------------------------------------------------
print(result, n.terms = 4)

Try the hierinf package in your browser

Any scripts or data that you put into this service are public.

hierinf documentation built on Nov. 8, 2020, 7:08 p.m.