inst/doc/RVS.R

## ----include=FALSE, cache=FALSE-----------------------------------------------
suppressMessages(library(RVS))
suppressMessages(library(kinship2))
suppressMessages(library(snpStats))

## -----------------------------------------------------------------------------
data(samplePedigrees)

# store the pedigrees
fam_type_A <- samplePedigrees$firstCousinPair
fam_type_B <- samplePedigrees$secondCousinPair
fam_type_C <- samplePedigrees$firstCousinTriple
fam_type_D <- samplePedigrees$secondCousinTriple

# re-label the family ids for this example
fam_type_A$famid <- rep('SF_A', length(fam_type_A$id))
fam_type_B$famid <- rep('SF_B', length(fam_type_B$id))
fam_type_C$famid <- rep('SF_C', length(fam_type_C$id))
fam_type_D$famid <- rep('SF_D', length(fam_type_D$id))

## -----------------------------------------------------------------------------
plot(fam_type_D)

## -----------------------------------------------------------------------------
p <- RVsharing(fam_type_D)

## -----------------------------------------------------------------------------
# compute the sharing probabilities for all families
fams <- list(fam_type_A, fam_type_B, fam_type_C, fam_type_D)
sharing_probs <- suppressMessages(RVsharing(fams))
signif(sharing_probs, 3)

# compute p-value for this sharing pattern
sharing_pattern <- c(TRUE, TRUE, FALSE, FALSE)
names(sharing_pattern) <- names(sharing_probs)
multipleFamilyPValue(sharing_probs, sharing_pattern)

## -----------------------------------------------------------------------------
pedfile <- system.file("extdata/sample.ped.gz", package="RVS")
sample <- snpStats::read.pedfile(pedfile, snps=paste('variant', LETTERS[1:20], sep='_'))

## -----------------------------------------------------------------------------
A_fams <- lapply(1:3, function(i) samplePedigrees$firstCousinPair)
B_fams <- lapply(1:3, function(i) samplePedigrees$secondCousinPair)
C_fams <- lapply(1:3, function(i) samplePedigrees$firstCousinTriple)
D_fams <- lapply(1:3, function(i) samplePedigrees$secondCousinTriple)
fams <- c(A_fams, B_fams, C_fams, D_fams)
famids <- unique(sample$fam$pedigree)
for (i in 1:12)
{
    fams[[i]]$famid <- rep(famids[i], length(fams[[i]]$id))
}
sharingProbs <- suppressMessages(RVsharing(fams))
signif(sharingProbs, 3)

## -----------------------------------------------------------------------------
result <- multipleVariantPValue(sample$genotypes, sample$fam, sharingProbs)
signif(result$pvalues, 3)

## -----------------------------------------------------------------------------
result <- multipleVariantPValue(sample$genotypes, sample$fam, sharingProbs, filter='bonferroni', alpha=0.05)

## -----------------------------------------------------------------------------
pvals <- result$pvalues
ppvals <- result$potential_pvalues
ppvals_sub <- ppvals[names(pvals)] # subset potential p-values

plot(-log10(ppvals[order(ppvals)]), ylab="-log10 p-value", col="blue", type="l", xaxt="n", xlab="variants", ylim=c(0,8))
xlabel <- sapply(names(ppvals)[order(ppvals)], function(str) substr(str, nchar(str), nchar(str)))
axis(1, at=1:length(ppvals), labels=xlabel)
points(-log10(pvals[order(ppvals_sub)]), pch=20, cex=1.3)
bcut <- 0.05/(1:20)
lines(1:20,-log10(bcut),col="red",type="b",pch=20)

## -----------------------------------------------------------------------------
# calculate p-values for each MAF
freq <- seq(0,0.05,0.005)
variants <- names(sort(result$pvalues))[1:3]
pvals <- matrix(nrow=length(freq), ncol=length(variants))
pvals[1,] = sort(result$pvalues)[1:3]
for (i in 2:length(freq))
{
    sharingProbs <- suppressMessages(RVsharing(fams, alleleFreq=freq[i]))
    pvals[i,] <- multipleVariantPValue(sample$genotypes[,variants], sample$fam, sharingProbs)$pvalues
}
colnames(pvals) <- variants

# plot p-values as a function of MAF
plot(NULL, xlim=c(min(freq),max(freq)), ylim=c(0,max(pvals)), type='l',
    xlab="minor allele frequency", ylab="p-value",
    main="sensitivity of p-value to allele frequency in three variants")
lines(freq, pvals[,1], col="black")
lines(freq, pvals[,2], col="red")
lines(freq, pvals[,3], col="blue")
legend(min(freq), max(pvals), legend=colnames(pvals), col=c("black", "red", "blue"), lty=1)

## -----------------------------------------------------------------------------
# calculate p-values for each kinship coefficient
kin_coef <- seq(0, 0.05, length=6)
variants <- names(sort(result$pvalues))[1:3]
pvals <- matrix(nrow=length(kin_coef), ncol=length(variants))
pvals[1,] = sort(result$pvalues)[1:3]
for (i in 2:length(kin_coef))
{
    sharingProbs <- suppressMessages(RVsharing(fams, kinshipCoeff=kin_coef[i]))
    pvals[i,] <- multipleVariantPValue(sample$genotypes[,variants], sample$fam, sharingProbs)$pvalues
}
colnames(pvals) <- variants

# plot p-values as a function of kinship
plot(NULL, xlim=c(min(kin_coef), max(kin_coef)), ylim=c(0,max(pvals)), type='l',
    xlab="kinship coefficient", ylab="p-value",
    main="sensitivity of p-value to kinship in three variants")
lines(kin_coef, pvals[,1], col="black")
lines(kin_coef, pvals[,2], col="red")
lines(kin_coef, pvals[,3], col="blue")
legend(min(kin_coef), max(pvals), legend=colnames(pvals), col=c("black", "red", "blue"), lty=1)

## -----------------------------------------------------------------------------
enrichmentPValue(sample$genotypes, sample$fam, sharingProbs, 0.001)

## -----------------------------------------------------------------------------
carriers = c(15,16,17)
carrier.sets = list()
for (i in length(carriers):1)
carrier.sets = c(carrier.sets, combn(carriers,i,simplify=FALSE))
fam15157.pattern.prob = sapply(carrier.sets,function (vec) RVsharing(samplePedigrees$secondCousinTriple,carriers=vec))
fam15157.N = sapply(carrier.sets,length)

## -----------------------------------------------------------------------------
fam15157.pattern.prob = RVsharing(samplePedigrees$secondCousinTriple,splitPed=TRUE)

## -----------------------------------------------------------------------------
fam15157.pattern.prob = c(RVsharing(samplePedigrees$secondCousinTriple,carriers=c(15,16,17)),
    RVsharing(samplePedigrees$secondCousinTriple,carriers=c(15,16)),
    RVsharing(samplePedigrees$secondCousinTriple,carriers=c(15)))
fam15157.N = 3:1
fam15157.nequiv = c(1,3,3)

## -----------------------------------------------------------------------------
sum(fam15157.pattern.prob*fam15157.nequiv)

## -----------------------------------------------------------------------------
fam15157 <- samplePedigrees$secondCousinTriple
fam15157$affected[3] = 1
plot(fam15157)

## -----------------------------------------------------------------------------
carriers = c(3,15,16,17)
carrier.sets = list()
for (i in length(carriers):1)
carrier.sets = c(carrier.sets, combn(carriers,i,simplify=FALSE))
carrier.sets
carrier.sets = carrier.sets[-c(5,9,10)]
fam15157.pattern.prob = sapply(carrier.sets,function (vec) RVsharing(fam15157,carriers=vec,useAffected=TRUE))
fam15157.N = sapply(carrier.sets,length)

## -----------------------------------------------------------------------------
sum(fam15157.pattern.prob)

## -----------------------------------------------------------------------------
pobs = RVsharing(fam15157,carriers=c(3,16,17),useAffected=TRUE)

## -----------------------------------------------------------------------------
sum(fam15157.pattern.prob[fam15157.pattern.prob<=pobs & fam15157.N >= 3])

## -----------------------------------------------------------------------------
fam15157.pattern.prob = c(RVsharing(samplePedigrees$secondCousinTriple,carriers=c(15,16,17)),
    RVsharing(samplePedigrees$secondCousinTriple,carriers=c(15,16)),
    RVsharing(samplePedigrees$secondCousinTriple,carriers=c(15)))
fam15157.N = 3:1
fam15157.nequiv = c(1,3,3)

fam28003.pattern.prob = c(RVsharing(samplePedigrees$firstAndSecondCousinsTriple,carriers=c(36,104,110)),
RVsharing(samplePedigrees$firstAndSecondCousinsTriple,carriers=c(36,104)),
RVsharing(samplePedigrees$firstAndSecondCousinsTriple,carriers=c(104,110)),
RVsharing(samplePedigrees$firstAndSecondCousinsTriple,carriers=c(36)),
RVsharing(samplePedigrees$firstAndSecondCousinsTriple,carriers=c(104)))
fam28003.N = c(3,2,2,1,1)
fam28003.nequiv = c(1,2,1,1,2)

ex.pattern.prob.list = list("15157"=fam15157.pattern.prob,"28003"=fam28003.pattern.prob)
ex.nequiv.list = list("15157"=fam15157.nequiv,"28003"=fam28003.nequiv)
ex.N.list = list("15157"=fam15157.N,"28003"=fam28003.N)

## -----------------------------------------------------------------------------
data(famVCF)
fam15157.snp = VariantAnnotation::genotypeToSnpMatrix(fam15157.vcf)
fam28003.snp = VariantAnnotation::genotypeToSnpMatrix(fam28003.vcf)

## -----------------------------------------------------------------------------
ex.SnpMatrix.list = list(fam15157=fam15157.snp$genotypes,fam28003=fam28003.snp$genotypes)
ex.ped.obj = list(fam15157=samplePedigrees$secondCousinTriple,fam28003=samplePedigrees$firstAndSecondCousinsTriple)

## -----------------------------------------------------------------------------
sites = c(92,119)
ex.SnpMatrix.list[["fam15157"]][,sites[1]]@.Data
ex.SnpMatrix.list[["fam28003"]][,sites[2]]@.Data

## -----------------------------------------------------------------------------
RVgene(ex.SnpMatrix.list,ex.ped.obj,sites,pattern.prob.list=ex.pattern.prob.list,nequiv.list=ex.nequiv.list,N.list=ex.N.list,type="count")

## -----------------------------------------------------------------------------
ped <- samplePedigrees$firstCousinTriple
ped$affected[9] <- 0
plot(ped)

p <- RVsharing(ped)
p <- RVsharing(ped, useAffected=TRUE)
p <- RVsharing(ped, carriers=c(7,9,10))
p <- RVsharing(ped, carriers=c(10,11), useAffected=TRUE)

## -----------------------------------------------------------------------------
p <- RVsharing(samplePedigrees$firstCousinPair, alleleFreq=0.01)
p <- RVsharing(samplePedigrees$firstCousinPair, alleleFreq=0.01, nSim=1e5)

## -----------------------------------------------------------------------------
# assumption that 1 founder introduces variant
fDist <- function(N) sample(c(rep(0,N-1), 1))
p <- RVsharing(samplePedigrees$firstCousinPair, nSim=1e5, founderDist=fDist)
p <- RVsharing(samplePedigrees$firstCousinPair)

## -----------------------------------------------------------------------------
plot(samplePedigrees$twoGenerationsInbreeding)
ComputeKinshipPropCoef(samplePedigrees$twoGenerationsInbreeding)

Try the RVS package in your browser

Any scripts or data that you put into this service are public.

RVS documentation built on Nov. 8, 2020, 6:57 p.m.