Nothing
## ---- results='hide', message=FALSE-------------------------------------------
#if (!requireNamespace("BiocManager", quietly=TRUE))
#install.packages("BiocManager")
library(GenomicRanges)
library(AnnotationHub)
library(pd.genomewidesnp.6)
library(rtracklayer)
library(biovizBase) #needed for stain information
library(CINdex)
library(IRanges)
## ----seg----------------------------------------------------------------------
#Load example segment data into the workspace.
data("grl.data")
#Examining the class of the object - GRangesList
class(grl.data)
#Print first few rows
head(grl.data)
#The names of the list items in the GRangesList
names(grl.data)
# NOTE - The names of the list items in 'grl.data' must match the
# sample names in the clinical data input 'clin.crc'
#Extracting segment information for the sample named "s4" shown as a GRanges object
grl.data[["s4"]]
#You can see that each row in the GRanges object is a segment. The "value" columns shows the
#copy number value for that segment.
## ----probeAnnotHg19, cache=TRUE-----------------------------------------------
#connect to the underlying SQLite database that is part of the pd.genomewidesnp.6 package
con <- db(pd.genomewidesnp.6)
# get the copy number probes
cnv <- dbGetQuery(con, "select man_fsetid, chrom, chrom_start, chrom_stop,
strand from featureSetCNV;")
head(cnv, n =3) #print first few rows
#get the SNP probes
snp <- dbGetQuery(con, "select man_fsetid, dbsnp_rs_id, chrom, physical_pos,
strand from featureSet;")
head(snp, n=3)
## ---- cache=TRUE--------------------------------------------------------------
#function to convert Copy number data into GRanges object
convert.to.gr.cnv <- function(cnv2) {
cnv2$chrom <- paste0("chr", cnv2$chrom)
# subset out SNPs with missing location
cnv2 <- cnv2[!(is.na(cnv2$chrom) | is.na(cnv2$chrom_start)),]
# convert strand info to +,-,*
cnv2$strand[is.na(cnv2$strand)] <- 2
cnv2$strand <- cnv2$strand + 1
cnv2$strand <- c("+", "-", "*")[cnv2$strand]
#convert into GRanges object
cnv2.gr <- GRanges(cnv2$chrom, IRanges(cnv2$chrom_start,cnv2$chrom_stop),
cnv2$strand, ID = cnv2$man_fsetid)
return(cnv2.gr)
}
#function to convert SNP data into GRanges object
convert.to.gr.snp <- function(snp2) {
# make chromosomes the same as a chain file
snp2$chrom <- paste0("chr", snp2$chrom)
# subset out SNPs with missing location
snp2 <- snp2[!(is.na(snp2$chrom) | is.na(snp2$physical_pos)),]
# convert strand info to +,-,*
snp2$strand[is.na(snp2$strand)] <- 2
snp2$strand <- snp2$strand + 1
snp2$strand <- c("+", "-", "*")[snp2$strand]
snp2.gr <- GRanges(snp2$chrom, IRanges(snp2$physical_pos,snp2$physical_pos),
snp2$strand, ID = snp2$man_fsetid,
dbsnp = snp2$dbsnp_rs_id)
return(snp2.gr)
}
# convert this copy number data from into a GRanges object
cnv.gr <- convert.to.gr.cnv(cnv2 = cnv)
head(cnv.gr, n=3)
# convert this SNP data from into a GRanges object
snp.gr <- convert.to.gr.snp(snp2 = snp)
head(snp.gr, n=3)
## ---- cache=TRUE--------------------------------------------------------------
#subset only those probes that are in autosomes
snpgr.19.auto <- subset(snp.gr, seqnames(snp.gr) %in% c("chr1",
"chr2", "chr3","chr4",
"chr5", "chr6", "chr7",
"chr8", "chr9", "chr10",
"chr11", "chr12", "chr13",
"chr14", "chr15", "chr16",
"chr17", "chr18","chr19",
"chr20", "chr21", "chr22"))
#subset only those probes that are in autosomes
cnvgr.19.auto <- subset(cnv.gr, seqnames(cnv.gr) %in% c("chr1",
"chr2", "chr3","chr4",
"chr5", "chr6", "chr7",
"chr8", "chr9", "chr10",
"chr11", "chr12", "chr13",
"chr14", "chr15", "chr16",
"chr17", "chr18","chr19",
"chr20", "chr21", "chr22"))
#This gives a total of 1756096 probes (copy number and SNP) on this Affymetrix chip
## ----probeAnnotHg18, cache=TRUE-----------------------------------------------
con <- db(pd.genomewidesnp.6)
cnv2 <- dbGetQuery(con, "select man_fsetid, chrom, chrom_start,
chrom_stop, strand from featureSetCNV;")
snp2 <- dbGetQuery(con, "select man_fsetid, dbsnp_rs_id, chrom,
physical_pos, strand from featureSet;")
## ---- cache=TRUE--------------------------------------------------------------
# convert this copy number data into a GRanges object
cnv2gr <- convert.to.gr.cnv(cnv2 = cnv2)
head(cnv2gr, n=3)
# convert this SNP data into a GRanges object
snp2gr <- convert.to.gr.snp(snp2 = snp2)
head(snp2gr, n=3)
## ---- eval=FALSE--------------------------------------------------------------
# download.file("http://hgdownload.cse.ucsc.edu/gbdb/hg19/liftOver/hg19ToHg18.over.chain.gz",
# "hg19ToHg18.over.chain.gz")
# system("gzip -d hg19ToHg18.over.chain.gz") ## have to decompress
## ---- cache=TRUE--------------------------------------------------------------
## now use liftOver from rtracklayer, using the hg19ToHg18.over.chain from UCSC
chain <- import.chain("hg19ToHg18.over.chain")
snpgr.18 <- unlist(liftOver(snp2gr, chain))
head(snpgr.18, n=3)
cnvgr.18 <- unlist(liftOver(cnv2gr, chain))
head(cnvgr.18, n=3)
#subset only those probes that are in autosomes
snpgr.18.auto <- subset(snpgr.18, seqnames(snpgr.18) %in% c("chr1",
"chr2", "chr3","chr4",
"chr5", "chr6", "chr7",
"chr8", "chr9", "chr10",
"chr11", "chr12", "chr13",
"chr14", "chr15", "chr16",
"chr17", "chr18","chr19",
"chr20", "chr21", "chr22"))
#subset only those probes that are in autosomes
cnvgr.18.auto <- subset(cnvgr.18, seqnames(cnvgr.18) %in% c("chr1",
"chr2", "chr3","chr4",
"chr5", "chr6", "chr7",
"chr8", "chr9", "chr10",
"chr11", "chr12", "chr13",
"chr14", "chr15", "chr16",
"chr17", "chr18","chr19",
"chr20", "chr21", "chr22"))
#This gives us a total of 1756029 probes (about 1.7 million)
#Save these objects for future
#save(cnvgr.18.auto, file = "cnvgr.18.auto.RData")
#save(snpgr.18.auto, file = "snpgr.18.auto.RData")
## ----refGen, message=FALSE, warning=FALSE-------------------------------------
hg18.ucsctrack <- getIdeogram("hg18", cytoband = TRUE)
head(hg18.ucsctrack, n=3)
#The user must ensure that the input object is a GRanges object
#Save this object for future use
#save(hg18.ucsctrack, file = "hg18.ucsctrack.RData")
#NOTE - To get the reference file for hg19, use the code below
#hg19IdeogramCyto <- getIdeogram("hg19", cytoband = TRUE)
## ----Clin---------------------------------------------------------------------
data("clin.crc")
# checking the class of the object
class(clin.crc)
# checking the structure
str(clin.crc)
#Let us examine the first five rows of this object
head(clin.crc,5)
# Look at sample names
clin.crc[,1]
## ----results='hide', message=FALSE--------------------------------------------
##BiocManager::install("TxDb.Hsapiens.UCSC.hg18.knownGene")
##BiocManager::install("Homo.sapiens")
library(TxDb.Hsapiens.UCSC.hg18.knownGene)
library(Homo.sapiens)
## ----GeneAnn------------------------------------------------------------------
# We will continue to use hg18 gene annotations in this tutorial.
TxDb(Homo.sapiens) <- TxDb.Hsapiens.UCSC.hg18.knownGene
z <- select(Homo.sapiens, keys(Homo.sapiens, "ENTREZID"),
c("CDSID","CDSCHROM","CDSSTRAND","CDSSTART","CDSEND","SYMBOL"), "ENTREZID")
z1 <- na.omit(object = z) #remove NA values
# extracting only the columns we want as a matrix
geneAnno <- cbind(z1$CDSCHROM, z1$CDSSTRAND, z1$CDSSTART, z1$CDSEND, z1$SYMBOL)
colnames(geneAnno) <- c("chrom","strand", "cdsStart", "cdsEnd", "GeneName")
#So this gene annotation file looks like this
head(geneAnno, n=3)
# Examining the class and structure of this oject
class(geneAnno)
str(geneAnno)
#Save this object for future use
#save(geneAnno, file = "geneAnno.RData")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.