inst/doc/CNVrd2.R

## ----setup, include=FALSE, cache=FALSE, eval = TRUE-------------------------------------
library(knitr)
opts_chunk$set(fig.path='./figures/CNVrd2-', 
               fig.align='center', fig.show='asis', 
               eval = TRUE, fig.width = 6,
               fig.height = 6, size='small')
options(replace.assign=TRUE,width=90)

## ----options,echo=FALSE-----------------------------------------------
options(width=72)

## ----warning=FALSE, results="hide"------------------------------------
library('CNVrd2')

## ----warning=FALSE, results="hide", eval=FALSE------------------------
#  objectCNVrd2 <- new("CNVrd2", windows = 1000, chr = "chr1",
#                      st = 161100001, en = 162100000,
#                      dirBamFile = "BamMXL",
#                      genes = c(161592986, 161601753),
#                      geneNames = "3B")
#  

## ----warnings=FALSE, results="hide", eval=FALSE-----------------------
#  readCountMatrix <- countReadInWindow(Object = objectCNVrd2, correctGC = TRUE)
#  

## ----warning=FALSE, results="hide", fig.keep='none', eval=FALSE-------
#  ##Obtain segmentation scores
#  resultSegment <- segmentSamples(Object = objectCNVrd2, stdCntMatrix = readCountMatrix)

## ----'fcgr3bSS', fig.show='asis', results="hide", fig.cap='FCGR3B segmentation score.'----
##Load data into R
data(fcgr3bMXL)
##Reload readCountMatrix
readCountMatrix <- resultSegment$stdCntMatrix
##Take a quick look the data
readCountMatrix[1:2, 1:2]
##Make a CNVrd2 object 
objectCNVrd2 <- new("CNVrd2", windows = 1000, chr = "chr1",
                    st = 161100001, en = 162100000,
                    dirBamFile = "BamMXL",
                    genes = c(161592986, 161601753),
                    geneNames = "3B")
##Obtain segmentation scores
resultSegment <- segmentSamples(Object = objectCNVrd2, stdCntMatrix = readCountMatrix)
##View these segmentation results
sS <- resultSegment$segmentationScores
hist(sS[, 1], 100, xlab = 'Segmentation score', main = '')

## ----'fcgr3b4CNs', fig.show='asis', results="hide",warning=FALSE, fig.cap='FCGR3B CN groups.'----
objectCluster <- new("clusteringCNVs",
                     x = resultSegment$segmentationScores[, 1], k = 4, EV = TRUE)
#Cluster into 4 groups
copynumberGroups <- groupCNVs(Object = objectCluster)


## ----warning=FALSE----------------------------------------------------
copynumberGroups$allGroups[1:3, ]

## ----'fcgr3b3CNs', fig.show='asis', results="hide",warning=FALSE, fig.cap = 'FCGR3B CN groups (rightLimit = 1.5).'----
#Set right limit = 1.5 to make values > 1.5 be into the largest group.
objectCluster <- new("clusteringCNVs",
                     x = resultSegment$segmentationScores[, 1], k = 3, EV = TRUE)
copynumberGroups <- groupCNVs(Object = objectCluster, rightLimit = 1.5)

## ----'fcgr3bdupSamples', fig.show='asis', warning=FALSE, fig.cap='MXL duplicated samples.', size='small'----
allGroups <- copynumberGroups$allGroups
###Obtain names of duplicate samples
duplicatedSamples <- rownames(allGroups[allGroups[, 2] > 2,])
###Plot 6 duplicate samples
par(mfrow = c(3, 2))
for (ii in duplicatedSamples[1:6])
    plotCNVrd2(Object = objectCNVrd2,
                         
               segmentObject = resultSegment,
               
                             
               sampleName = ii)


## ----warning=FALSE, results="hide"------------------------------------
##Obtain VCF-file information in CNVrd2 package
vcfFile <- system.file(package="CNVrd2", "extdata",
                       "chr1.161600000.161611000.vcf.gz")
##Make a data frame named sampleCNV including samples, CNs, population names
sampleCNV <- data.frame(copynumberGroups$allGroups[, c(1,2) ],rep("MXL", dim(copynumberGroups$allGroups)[1]))
rownames(sampleCNV) <- substr(sampleCNV[, 1], 1, 7)
sampleCNV[, 1] <- rownames(sampleCNV)
##The first column must be the sample names and some samples should be in the vcf file
tagSNPandINDELofMXL <- calculateLDSNPandCNV(sampleCNV = sampleCNV,
                                            vcfFile = vcfFile, cnvColumn = 2,
                                            population = "MXL", popColumn = 3,
                                            nChunkForVcf = 5, chr = "1",
                                            st = 161600001, en = 161611000,
                                            codeSNP= "Three", codeCNV = "ThreeGroup")

## ----warning=FALSE----------------------------------------------------
head(tagSNPandINDELofMXL)

## ----'ccl3l1Histogram', warning=FALSE, fig.cap = 'CCL3L1 segmentation score.'----
##Load data into R:
data(ccl3l1data)
head(ccl3l1data)
hist(ccl3l1data$SS, 100)

## ----'EUccl3l1Histogram', warning=FALSE, fig.cap = 'European-ancestry segmentation score.'----
xyEuro <- ccl3l1data[grep("CEU|TSI|IBS|GBR|FIN", ccl3l1data[, 2]), ]
yEuro <- xyEuro[, 3]
names(yEuro) <- xyEuro[, 1]
hist(yEuro, 100, xlab = '', main = '')


## ----'EUccl3l1results', warning=FALSE, fig.cap = 'Clustering results of European-ancestry sample sets.', results="hide"----
##Clustering European segmentation 
##scores into group: 5 groups were chosen

objectClusterEuroCCL3L1 <- new("clusteringCNVs", x = yEuro, k = 5)

europeanCCL3L1Groups <- groupCNVs(Object = objectClusterEuroCCL3L1)


## ----warning=FALSE, results="hide"------------------------------------
#Means
lambda0 <- as.numeric(europeanCCL3L1Groups$m)
#SD
sdEM <- as.numeric(europeanCCL3L1Groups$sigma)
#Proportions
pEM <- as.numeric(europeanCCL3L1Groups$p)

## ----warning=FALSE----------------------------------------------------
lambda0
sdEM
pEM
###Calculate the distances between groups
for (ii in 2:5){print(lambda0[ii] - lambda0[ii-1])}

###All segmentation scores
ccl3l1X <- ccl3l1data$SS
names(ccl3l1X) <- as.character(ccl3l1data$Name)
range(ccl3l1X)


## ---------------------------------------------------------------------
##Set prior information:
#prior for the sd of the means of groups: 
#5 was set for the third group = 2 CN
sd <- c(1, 1, 5, 1, 1) 
ccl3l1X <- sort(ccl3l1X)
###Data
xData <- ccl3l1X
###Number of groups
nGroups <- 10 
###prior for means of groups
lambda0 <- lambda0 
###Prior for mixing proportions
alpha0 <-  c(3, 29, 44, 18, 7,  5, rep(2, nGroups -length(pEM) -1))
##Prior for the distances between groups
distanceBetweenGroups = 0.485

sdEM = sdEM


## ---------------------------------------------------------------------
##Adjust standard deviation for the fifth group
sdEM[5] <- sdEM[4]
 

## ----eval = FALSE, echo = TRUE----------------------------------------
#  set.seed(123)
#  groupCCL3L1allPops <- groupBayesianCNVs(xData = xData, nGroups = nGroups,
#                                          lambda0 = lambda0,
#                                          sd0 = sdEM, alpha0 = alpha0,
#                                          distanceBetweenGroups = distanceBetweenGroups,
#                                          sdOftau = sd,
#                                          rightLimit = 4)

## ----warning=FALSE, results="hide"------------------------------------
rownames(ccl3l1data) <- ccl3l1data[, 1]


## ----warning=FALSE, results="hide"------------------------------------
##Obtain vcf-file information in CNVrd2
vcfFileCCL3L1 <- system.file(package="CNVrd2", "extdata",
                       "chr17.34800000.34830000.vcf.gz")
##Set populations we would like to identify tagSNPs
allPops <- c("TSI", "CEU", "GBR", "FIN", "IBS")

## ----warning = FALSE, results="hide"----------------------------------
##Identify tag SNPs/INDELs
tagSNPandINDELofCCL3L1 <- calculateLDSNPandCNV(sampleCNV = ccl3l1data,
                                            vcfFile = vcfFileCCL3L1, cnvColumn = 4,
                                            population = allPops, popColumn = 2,
                                            nChunkForVcf = 5, chr = "17",
                                            st = 34800000, en = 34830000 )


## ----warnings=FALSE, size='tiny'--------------------------------------
lapply(tagSNPandINDELofCCL3L1, head)

## ----warning=FALSE, results="hide"------------------------------------
##Notice: these results are in a list
for (ii in 1:length(allPops))
    write.table(tagSNPandINDELofCCL3L1[[ii]], 
                paste("TagSNPforPop", ii, ".csv", sep = ""), 
                quote = FALSE, sep = ",")


## ----warnings=FALSE---------------------------------------------------
fcgr3PolymorphicRegion <- identifyPolymorphicRegion(Object = objectCNVrd2,
                                                    segmentObject = resultSegment, 
                                                    plotLegend = FALSE)


## ----'FCGR3polymorphc1', warning=FALSE, fig.cap = 'CN polymorphic region at FCGR3 locus, represented by quantiles of the distribution of segmentation scores across samples.'----
plotPolymorphicRegion(Object = objectCNVrd2, polymorphicRegionObject = fcgr3PolymorphicRegion,
                      xlim = c(161300000, 161800000), drawThresholds = TRUE,
                      typePlot = "SD")


## ----'FCGR3polymorphc2', warning=FALSE, fig.cap = 'CN polymorphic region at FCGR3 locus, represented by quantiles of the distribution of segmentation scores across samples.'----
plotPolymorphicRegion(Object = objectCNVrd2, polymorphicRegionObject = fcgr3PolymorphicRegion,
                      xlim = c(161300000, 161800000), sdThreshold = 0.05, drawThresholds = TRUE,
                      typePlot = "SD")

## ---------------------------------------------------------------------
sessionInfo()

Try the CNVrd2 package in your browser

Any scripts or data that you put into this service are public.

CNVrd2 documentation built on Nov. 8, 2020, 5:30 p.m.