inst/doc/pcair.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, message = FALSE)

## ---- eval=FALSE--------------------------------------------------------------
#  geno <- MatrixGenotypeReader(genotype = genotype, snpID = snpID,
#                               chromosome = chromosome, position = position,
#                               scanID = scanID)
#  genoData <- GenotypeData(geno)

## ---- eval=FALSE--------------------------------------------------------------
#  geno <- GdsGenotypeReader(filename = "genotype.gds")
#  genoData <- GenotypeData(geno)

## ---- eval=FALSE--------------------------------------------------------------
#  snpgdsBED2GDS(bed.fn = "genotype.bed",
#                bim.fn = "genotype.bim",
#                fam.fn = "genotype.fam",
#                out.gdsfn = "genotype.gds")

## -----------------------------------------------------------------------------
gdsfile <- system.file("extdata", "HapMap_ASW_MXL_geno.gds", package="GENESIS")

## ----seed, include=FALSE------------------------------------------------------
# set seed for LD pruning
set.seed(100)

## -----------------------------------------------------------------------------
library(SNPRelate)
# read in GDS data
gds <- snpgdsOpen(gdsfile)
snpset <- snpgdsLDpruning(gds, method="corr", slide.max.bp=10e6, 
                          ld.threshold=sqrt(0.1), verbose=FALSE)
pruned <- unlist(snpset, use.names=FALSE)
length(pruned)
head(pruned)
snpgdsClose(gds)

## -----------------------------------------------------------------------------
# create matrix of KING estimates
library(GENESIS)
KINGmat <- kingToMatrix(
    c(system.file("extdata", "MXL_ASW.kin0", package="GENESIS"),
      system.file("extdata", "MXL_ASW.kin", package="GENESIS")),
      estimator = "Kinship")
KINGmat[1:5,1:5]

## -----------------------------------------------------------------------------
library(GWASTools)
HapMap_geno <- GdsGenotypeReader(filename = gdsfile)
# create a GenotypeData class object
HapMap_genoData <- GenotypeData(HapMap_geno)
HapMap_genoData

## -----------------------------------------------------------------------------
# run PC-AiR on pruned SNPs
mypcair <- pcair(HapMap_genoData, kinobj = KINGmat, divobj = KINGmat,
                 snp.include = pruned)

## ---- echo=FALSE, eval=FALSE--------------------------------------------------
#  #kinobj and divobj are now required arguments
#  # As PCA is an unsupervised method, it is often difficult to identify what population structure each of the top PCs represents.  In order to provide some understanding of the inferred structure, it is sometimes recommended to include reference population samples of known ancestry in the analysis.  If the data set contains such reference population samples, we may want to use only those individuals as the "unrelated subset" for performing the PCA and predict values for all other sample individuals.  This can be accomplished by setting the input `unrel.set` equal to a vector, `IDs`, of the individual IDs for the reference population samples.
#  mypcair <- pcair(HapMap_genoData, unrel.set = IDs)

## ---- eval=FALSE--------------------------------------------------------------
#  mypcair <- pcair(HapMap_genoData, kinobj = KINGmat, divobj = KINGmat,
#                   snp.include = pruned, unrel.set = IDs)

## -----------------------------------------------------------------------------
part <- pcairPartition(kinobj = KINGmat, divobj = KINGmat)

## -----------------------------------------------------------------------------
head(part$unrels); head(part$rels)

## -----------------------------------------------------------------------------
summary(mypcair)

## ---- fig.show='hold', fig.small=TRUE-----------------------------------------
# plot top 2 PCs
plot(mypcair)
# plot PCs 3 and 4
plot(mypcair, vx = 3, vy = 4)

## -----------------------------------------------------------------------------
# run PC-Relate
HapMap_genoData <- GenotypeBlockIterator(HapMap_genoData, snpInclude=pruned)
mypcrelate <- pcrelate(HapMap_genoData, pcs = mypcair$vectors[,1:2], 
                       training.set = mypcair$unrels)

## -----------------------------------------------------------------------------
plot(mypcrelate$kinBtwn$k0, mypcrelate$kinBtwn$kin, xlab="k0", ylab="kinship")

## -----------------------------------------------------------------------------
iids <- as.character(getScanID(HapMap_genoData))
pcrelateToMatrix(mypcrelate, sample.include = iids[1:5], thresh = 2^(-11/2), scaleKin = 2)

## ---- echo=FALSE--------------------------------------------------------------
close(HapMap_genoData)

Try the GENESIS package in your browser

Any scripts or data that you put into this service are public.

GENESIS documentation built on Jan. 30, 2021, 2:01 a.m.