Nothing
## ----message=FALSE, warning=FALSE, echo = FALSE-------------------------------
library("knitr")
require("Ravages")
## -----------------------------------------------------------------------------
# Import data in a bed matrix
x <- as.bed.matrix(x=LCT.matrix.bed, fam=LCT.matrix.fam, bim=LCT.snps)
# Add population
x@ped[,c("pop", "superpop")] <- LCT.matrix.pop1000G[,c("population", "super.population")]
# Select EUR superpopulation
x <- select.inds(x, superpop=="EUR")
x@ped$pop <- droplevels(x@ped$pop)
# Group variants within know genes by extending their positions
# 500bp upstream and downstream
# (the function uses build 37 unless told otherwise)
x <- set.genomic.region(x, flank.width=500)
## -----------------------------------------------------------------------------
# a quick look at the result
table(x@snps$genomic.region, useNA = "ifany")
# Filter variants with maf in the entire sample lower than 1%
# And keep only genomic region with at least 10 SNPs
x1 <- filter.rare.variants(x, filter = "whole", maf.threshold = 0.01, min.nb.snps = 10)
table(x1@snps$genomic.region, useNA="ifany")
# run burden test CAST, using the 1000Genome population as "outcome"
# Null model for CAST
x1.H0.burden <- NullObject.parameters(x1@ped$pop, ref.level = "CEU",
RVAT = "burden", pheno.type = "categorical")
burden(x1, NullObject = x1.H0.burden, burden = "CAST", cores = 1)
# run SKAT, using the 1000Genome population as "outcome"
# COnstruct null model for SKAT, then run test with only a few permutations
x1.H0.SKAT <- NullObject.parameters(x1@ped$pop, RVAT = "SKAT", pheno.type = "categorical")
SKAT(x1, x1.H0.SKAT, params.sampling=list(perm.target = 10, perm.max = 500))
# run a similar analysis but using the RAVA-FIRST approach with WSS
# RAVA-FIRST(x1, filter = "whole", maf.threshold = 0.01, min.nb.snps = 10,
# burden = TRUE, x1.H0.burden, SKAT = F)
## ---- eval = F----------------------------------------------------------------
# # Example bed matrix with 4 variants
# x.ex <- as.bed.matrix(x=matrix(0, ncol=4, nrow=10),
# bim=data.frame(chr=1:4, id=paste("rs", 1:4, sep=""),
# dist = rep(0,4), pos=c(150,150,200,250),
# A1=rep("A", 4), A2=rep("T", 4)))
#
# # Example genes dataframe
# genes.ex <- data.frame(Chr=c(1,1,3,4), Start=c(10,110,190,220), End=c(170,180,250,260),
# Gene_Name=factor(letters[1:4]))
#
# # Attribute genomic regions without splitting the variants
# # attributed to multiple genomic regions
# x.ex <- set.genomic.region(x.ex, regions = genes.ex, split = FALSE)
# x.ex@snps$genomic.region
#
# # Split genomic regions
# x.ex.split <- bed.matrix.split.genomic.region(x.ex, split.pattern = ",")
# x.ex.split@snps$genomic.region
## -----------------------------------------------------------------------------
# Calculation of the genetic score with a maf threshold of 1%
CAST.score <- CAST(x = x1, genomic.region = x1@snps$genomic.region, maf.threshold = 0.01)
head(CAST.score)
## -----------------------------------------------------------------------------
WSS.score <- WSS(x = x1, genomic.region = x1@snps$genomic.region)
head(WSS.score)
## -----------------------------------------------------------------------------
Sum.score <- burden.weighted.matrix(x = x1, weights = rep(1, ncol(x1)))
head(Sum.score)
## ---- eval = F----------------------------------------------------------------
# # Null model
# x1.H0 <- NullObject.parameters(x1@ped$pop, ref.level = "CEU",
# RVAT = "burden", pheno.type = "categorical")
# # WSS
# burden(x = x1, NullObject = x1.H0, burden ="WSS",
# alpha=0.05, get.effect.size=TRUE, cores = 1)
#
# # Sex + a simulated variable as covariates
# sex <- x1@ped$sex
# u <- runif(nrow(x1))
# covar <- cbind(sex, u)
# # Null model with the covariate "sex"
# x1.H0.covar <- NullObject.parameters(x1@ped$pop, ref.level = "CEU",
# RVAT = "burden", pheno.type = "categorical",
# data = covar, formula = ~ sex)
#
# # Regression with the covariate "sex" without OR values
# # Using the score matrix WSS computed previously
# burden(NullObject = x1.H0.covar, burden=WSS.score, cores = 1)
## ---- eval = F----------------------------------------------------------------
# # Random continuous phenotype
# set.seed(1) ; pheno1 <- rnorm(nrow(x1))
# # Null model
# x1.H0.continuous <- NullObject.parameters(pheno1, RVAT = "burden",
# pheno.type = "continuous")
# # Test CAST
# burden(x1, NullObject = x1.H0.continuous, burden = "CAST", cores = 1)
## -----------------------------------------------------------------------------
# *** Functionally-informed WSS analysis ***
# Attribution of variants to regions and subregions
x2 <- set.genomic.region.subregion(x, regions = genes.b37,
subregions = subregions.LCT)
# Burden test
burden.subscores(x2, x1.H0.burden, cores = 1)
## ---- eval = F----------------------------------------------------------------
# # Null model
# x1.null <- NullObject.parameters(x1@ped$pop, RVAT = "SKAT", pheno.type = "categorical")
# # Permutations because no covariates
# SKAT(x1, x1.null, get.moments = "permutations", debug = TRUE,
# params.sampling = list(perm.target = 100, perm.max =5e4))
# # Theoretical on 1 core
# SKAT(x1, x1.null, get.moments = "theoretical", debug = TRUE, cores = 1)
## ---- eval = F----------------------------------------------------------------
# # Random continuous phenotype
# set.seed(1) ; pheno1 <- rnorm(nrow(x1))
# # Null Model with covariates
# x1.H0.c <- NullObject.parameters(pheno1, RVAT = "SKAT", pheno.type = "continuous",
# data = covar)
# # Run SKAT
# SKAT(x1, x1.H0.c)
## ---- eval = F----------------------------------------------------------------
# # Attribution of CADD regions
# x.CADDregions <- set.CADDregions(x)
# # Annotation of variants with adjusted CADD scores
# x <- adjustedCADD.annotation(x)
## ---- eval = FALSE------------------------------------------------------------
# # Keep only CADD regions with 2 variants and variants with a MAF greater than 1%
# # and with an adjusted CADD score greater than the median
# x.median <- filter.adjustedCADD(x.CADDregions, maf.threshold = 0.01, min.nb.snps = 2)
## ---- eval = FALSE------------------------------------------------------------
# # Functionally-informed WSS analysis
# x.burden <- burden.subscores(x.median, x1.H0.burden, burden.function = WSS)
# # SKAT analysis
# x.SKAT <- SKAT(x.median, x1.H0.SKAT)
## ---- eval = F----------------------------------------------------------------
# # *** RAVA-FIRST analysis with functionally-informed WSS and SKAT
# #Burden parameters
# burden.parameters <- list(get.effect.size = T, burden.function = WSS)
# # Chromosome by chromosome
# res.bychr <- vector("list", 22)
# for(chr in 1:22){
# x <- read.bed.matrix(paste0(path_file, prefix_vcf, chr, ".vcf.gz"))
# res.bychr[[chr]] <- RAVA.FIRST(x, H0.burden = H0.burden, H0.SKAT = H0.SKAT,
# burden.parameters = burden.parameters)
# }
## -----------------------------------------------------------------------------
# Selection of each group of individuals
CEU <- select.inds(x1, pop=="CEU")
CEU
FIN <- select.inds(x1, pop=="FIN")
FIN
GBR <- select.inds(x1, pop=="GBR")
GBR
# Combine in one file:
CEU.FIN.GBR <- rbind(CEU, FIN, GBR)
CEU.FIN.GBR
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.