Nothing
## ----setup, include=FALSE-----------------------------------------------------
set.seed(0)
options(rmarkdown.html_vignette.check_title = FALSE)
knitr::opts_chunk$set(
out.extra = 'style="display:block; margin: auto"',
fig.align = "center",
fig.path = "./",
collapse = TRUE,
comment = "#>",
dev = "png")
## ----echo=FALSE---------------------------------------------------------------
p1 <- "
1 2 3 2 2 7/7 7/10
2 0 0 1 1 -/- -/-
3 0 0 2 2 7/9 3/10
4 2 3 2 2 7/9 3/7
5 2 3 2 1 7/7 7/10
6 2 3 1 1 7/7 7/10
7 2 3 2 1 7/7 7/10
8 0 0 1 1 -/- -/-
9 8 4 1 1 7/9 3/10
10 0 0 2 1 -/- -/-
11 2 10 2 1 7/7 7/7
12 2 10 2 2 6/7 7/7
13 0 0 1 1 -/- -/-
14 13 11 1 1 7/8 7/8
15 0 0 1 1 -/- -/-
16 15 12 2 1 6/6 7/7
"
p2 <- as.data.frame(scan(file=textConnection(p1),what=list(0,0,0,0,0,"","")))
names(p2) <-c("id","fid","mid","sex","aff","GABRB1","D4S1645")
p3 <- data.frame(pid=10081,p2)
## ----pedtodot, fig.cap="An example pedigree", fig.height=8, fig.width=7-------
library(gap)
knitr::kable(p3,caption="An example pedigree")
library(DOT)
# one can see the diagram in RStudio
pedtodot_verbatim(p3,run=TRUE,toDOT=TRUE,return="verbatim")
library(DiagrammeR)
pedtodot_verbatim(p3)
grViz(readr::read_file("10081.dot"))
## ----lukas, fig.cap="A pedigree diagram", fig.height=8, fig.width=7-----------
# pedigree diagram
data(lukas, package="gap.datasets")
library(kinship2)
ped <- with(lukas,pedigree(id,father,mother,sex))
plot(ped,cex=0.4)
## -----------------------------------------------------------------------------
# unordered individuals
gk1 <- kin.morgan(lukas)
write.table(gk1$kin.matrix,"gap_1.txt",quote=FALSE)
library(kinship2)
kk1 <- kinship(lukas[,1],lukas[,2],lukas[,3])
write.table(kk1,"kinship_1.txt",quote=FALSE)
d <- gk1$kin.matrix-kk1
sum(abs(d))
# order individuals so that parents precede their children
library(pedigree)
op <- orderPed(lukas)
olukas <- lukas[order(op),]
gk2 <- kin.morgan(olukas)
write.table(olukas,"olukas.csv",quote=FALSE)
write.table(gk2$kin.matrix,"gap_2.txt",quote=FALSE)
kk2 <- kinship(olukas[,1],olukas[,2],olukas[,3])
write.table(kk2,"kinship_2.txt",quote=FALSE)
z <- gk2$kin.matrix-kk2
sum(abs(z))
## ----fb---------------------------------------------------------------------------------------------------------------------------------------------
options(width=150)
library(gap)
models <- matrix(c(
4.0, 0.01,
4.0, 0.10,
4.0, 0.50,
4.0, 0.80,
2.0, 0.01,
2.0, 0.10,
2.0, 0.50,
2.0, 0.80,
1.5, 0.01,
1.5, 0.10,
1.5, 0.50,
1.5, 0.80), ncol=2, byrow=TRUE)
outfile <- "fbsize.txt"
cat("gamma","p","Y","N_asp","P_A","H1","N_tdt","H2","N_asp/tdt",
"L_o","L_s\n",file=outfile,sep="\t")
for(i in 1:12) {
g <- models[i,1]
p <- models[i,2]
z <- fbsize(g,p)
cat(z$gamma,z$p,z$y,z$n1,z$pA,z$h1,z$n2,z$h2,z$n3,
z$lambdao,z$lambdas,file=outfile,append=TRUE,sep="\t")
cat("\n",file=outfile,append=TRUE)
}
table1 <- read.table(outfile,header=TRUE,sep="\t")
nc <- c(4,7,9)
table1[,nc] <- ceiling(table1[,nc])
dc <- c(3,5,6,8,10,11)
table1[,dc] <- round(table1[,dc],2)
unlink(outfile)
knitr::kable(table1,caption="Power/Sample size of family-based designs")
## ----alz--------------------------------------------------------------------------------------------------------------------------------------------
g <- 4.5
p <- 0.15
alz <- data.frame(fbsize(g,p))
knitr::kable(alz,caption="Power/Sample size of study on Alzheimer's disease")
## ----pb---------------------------------------------------------------------------------------------------------------------------------------------
library(gap)
kp <- c(0.01,0.05,0.10,0.2)
models <- matrix(c(
4.0, 0.01,
4.0, 0.10,
4.0, 0.50,
4.0, 0.80,
2.0, 0.01,
2.0, 0.10,
2.0, 0.50,
2.0, 0.80,
1.5, 0.01,
1.5, 0.10,
1.5, 0.50,
1.5, 0.80), ncol=2, byrow=TRUE)
outfile <- "pbsize.txt"
cat("gamma","p","p1","p5","p10","p20\n",sep="\t",file=outfile)
for(i in 1:dim(models)[1])
{
g <- models[i,1]
p <- models[i,2]
n <- vector()
for(k in kp) n <- c(n,ceiling(pbsize(k,g,p)))
cat(models[i,1:2],n,sep="\t",file=outfile,append=TRUE)
cat("\n",file=outfile,append=TRUE)
}
table5 <- read.table(outfile,header=TRUE,sep="\t")
knitr::kable(table5,caption="Sample size of population-based design")
## ---------------------------------------------------------------------------------------------------------------------------------------------------
library(gap)
# ARIC study
outfile <- "aric.txt"
n <- 15792
pD <- 0.03
p1 <- 0.25
alpha <- 0.05
theta <- c(1.35,1.40,1.45)
beta <- 0.2
s_nb <- c(1463,722,468)
cat("n","pD","p1","hr","q","power","ssize\n",file=outfile,sep="\t")
for(i in 1:3)
{
q <- s_nb[i]/n
power <- ccsize(n,q,pD,p1,log(theta[i]),alpha,beta,TRUE)
ssize <- ccsize(n,q,pD,p1,log(theta[i]),alpha,beta,FALSE)
cat(n,"\t",pD,"\t",p1,"\t",theta[i],"\t",q,"\t",
signif(power,3),"\t",ssize,"\n",
file=outfile,append=TRUE)
}
read.table(outfile,header=TRUE,sep="\t")
unlink(outfile)
# EPIC study
outfile <- "epic.txt"
n <- 25000
alpha <- 0.00000005
beta <- 0.2
s_pD <- c(0.3,0.2,0.1,0.05)
s_p1 <- seq(0.1,0.5,by=0.1)
s_hr <- seq(1.1,1.4,by=0.1)
cat("n","pD","p1","hr","alpha","ssize\n",file=outfile,sep="\t")
# direct calculation
for(pD in s_pD)
{
for(p1 in s_p1)
{
for(hr in s_hr)
{
ssize <- ccsize(n,q,pD,p1,log(hr),alpha,beta,FALSE)
if (ssize>0) cat(n,"\t",pD,"\t",p1,"\t",hr,"\t",alpha,"\t",
ssize,"\n",
file=outfile,append=TRUE)
}
}
}
knitr::kable(read.table(outfile,header=TRUE,sep="\t"),caption="Sample size of case-cohort designs")
unlink(outfile)
## ----qq, fig.cap="A Q-Q plot", fig.height=7, fig.width=7--------------------------------------------------------------------------------------------
library(gap)
u_obs <- runif(1000)
r <- qqunif(u_obs,pch=21,bg="blue",bty="n")
u_exp <- r$y
hits <- u_exp >= 2.30103
points(r$x[hits],u_exp[hits],pch=21,bg="green")
legend("topleft",sprintf("GC.lambda=%.4f",gc.lambda(u_obs)))
## ----chicken, fig.cap="A genome-wide association study on chickens", fig.height=7, fig.width=7, results="hide"--------------------------------------
ord <- with(w4,order(chr,pos))
w4 <- w4[ord,]
oldpar <- par()
par(cex=0.6)
colors <- c(rep(c("blue","red"),15),"red")
mhtplot(w4,control=mht.control(colors=colors,gap=1000),pch=19,srt=0)
axis(2,cex.axis=2)
suggestiveline <- -log10(3.60036E-05)
genomewideline <- -log10(1.8E-06)
abline(h=suggestiveline, col="blue")
abline(h=genomewideline, col="green")
abline(h=0)
## ----mhtplot, fig.cap="A Manhattan plot with gene annotation", fig.height=7, fig.width=7, messages=FALSE, results="hide"----------------------------
data <- with(mhtdata,cbind(chr,pos,p))
glist <- c("IRS1","SPRY2","FTO","GRIK3","SNED1","HTR1A","MARCH3","WISP3",
"PPP1R3B","RP1L1","FDFT1","SLC39A14","GFRA1","MC4R")
hdata <- subset(mhtdata,gene%in%glist)[c("chr","pos","p","gene")]
color <- rep(c("lightgray","gray"),11)
glen <- length(glist)
hcolor <- rep("red",glen)
par(las=2, xpd=TRUE, cex.axis=1.8, cex=0.4)
ops <- mht.control(colors=color,yline=1.5,xline=3)
hops <- hmht.control(data=hdata,colors=hcolor)
mhtplot(data,ops,hops,pch=19)
axis(2,pos=2,at=1:16,cex.axis=0.5)
## ----circos, fig.cap="A circos Manhattan plot", fig.height=7, fig.width=8---------------------------------------------------------------------------
circos.mhtplot(mhtdata, glist)
## ----circos2, fig.cap="Another circos Manhattan plot", fig.height=7, fig.width=8--------------------------------------------------------------------
require(gap.datasets)
library(dplyr)
testdat <- mhtdata[c("chr","pos","p","gene","start","end")] %>%
rename(log10p=p) %>%
mutate(chr=paste0("chr",chr),log10p=-log10(log10p))
dat <- mutate(testdat,start=pos,end=pos) %>%
select(chr,start,end,log10p)
labs <- subset(testdat,gene %in% glist) %>%
group_by(gene,chr,start,end) %>%
summarize() %>%
mutate(cols="blue") %>%
select(chr,start,end,gene,cols)
labs[2,"cols"] <- "red"
ticks <- 0:2*5
circos.mhtplot2(dat,labs,ticks=ticks,ymax=max(ticks))
## ----miami, fig.cap="Miami plots", fig.height=7, fig.width=7----------------------------------------------------------------------------------------
mhtdata <- within(mhtdata,{pr=p})
miamiplot(mhtdata,chr="chr",bp="pos",p="p",pr="pr",snp="rsn")
# An alternative implementation
gwas <- select(mhtdata,chr,pos,p) %>%
mutate(z=qnorm(p/2))
chrmaxpos <- miamiplot2(gwas,gwas,name1="Batch 2",name2="Batch 1",z1="z",z2="z")
labelManhattan(chr=c(2,16),pos=c(226814165,52373776),name=c("AnonymousGene","FTO"),gwas,gwasZLab="z",chrmaxpos=chrmaxpos)
## ----il12b, echo=FALSE, fig.align="left", fig.cap="Association of IL-12B", fig.height=6, fig.width=6------------------------------------------------
knitr::include_graphics("IL-12B.png")
## ----asplot, fig.cap="A regional association plot", fig.height=7, fig.width=7-----------------------------------------------------------------------
asplot(CDKNlocus, CDKNmap, CDKNgenes, best.pval=5.4e-8, sf=c(3,6))
title("CDKN2A/CDKN2B Region")
## ----cnv, fig.cap="A CNV plot", fig.height=7, fig.width=7-------------------------------------------------------------------------------------------
cnvplot(gap.datasets::cnv)
## ----esplot, fig.cap="rs12075 and traits", fig.height=7, fig.width=7--------------------------------------------------------------------------------
library(gap)
rs12075 <- data.frame(id=c("CCL2","CCL7","CCL8","CCL11","CCL13","CXCL6","Monocytes"),
b=c(0.1694,-0.0899,-0.0973,0.0749,0.189,0.0816,0.0338387),
se=c(0.0113,0.013,0.0116,0.0114,0.0114,0.0115,0.00713386))
ESplot(rs12075)
## ----forest, fig.cap="Forest plots", fig.height=6, fig.width=9, results="hide", warning=FALSE-------------------------------------------------------
data(OPG,package="gap.datasets")
meta::settings.meta(method.tau="DL")
METAL_forestplot(OPGtbl,OPGall,OPGrsid,width=6.75,height=5,digits.TE=2,digits.se=2,
col.diamond="black",col.inside="black",col.square="black")
METAL_forestplot(OPGtbl,OPGall,OPGrsid,package="metafor",method="FE",xlab="Effect",
showweights=TRUE)
## ----normal, echo=FALSE, fig.cap="Normal(0,1) distribution", fig.height=5, fig.width=9--------------------------------------------------------------
library(lattice)
z0 <- 1.96
z <- 5
a <- seq(-z, z, length = 10000)
b <- dnorm(a, 0, 1)
xyplot(b ~ a,
type = "l",
panel = function(x,y, ...)
{
panel.xyplot(x,y, ...)
panel.abline(v = 0, lty = 2)
xx <- c(-z, x[x>=-z & x<=-z0], -z0)
yy <- c(0, y[x>=-z & x<=-z0], 0)
panel.polygon(xx,yy, ..., col='red')
xx <- c(z0, x[x>=z0 & x<=z], z)
yy <- c(0, y[x>=z0 & x<=z], 0)
panel.polygon(xx,yy, ..., col='red')
},
xlab="z",
ylab=expression(1/sqrt(2*pi) * exp(-z^2/2))
)
## ---------------------------------------------------------------------------------------------------------------------------------------------------
require(gap)
zlist <- c(5,10,30,40,50,100,500,1000,2000,3000,5000)
zp <- sapply(zlist,function(z) {c(z,pvalue(z),logp(z),log10p(z))})
rownames(zp) <- c("z","P","log(P)","log10(P)")
knitr::kable(t(zp),caption="z, P, log(P) and log10(P)")
## ----pve, echo=FALSE, fig.cap="Comparisons of Var($R^2$) estimates", fig.height=6, fig.width=14, messages=FALSE, warning=FALSE----------------------
oldpar <- par()
par(mfrow=c(1,2))
N <- 2:500
R2 <- 2*(N-2)/(N-1)^2/(N+1)
R2LR <- 2/(N+1)^2
R2t <- 2/(N-1)^2
plot(N,R2,cex=0.6,xaxt="n",xlab="Sample size",ylab=expression(Var(R^2)),col="black",pch=20)
points(N,R2LR,cex=0.6,pch=15,col="red")
points(N,R2t,cex=0.6,pch=17,col="blue")
axis(1,at=c(2,(1:5)*100))
legend(400,0.03,c("Asymptotic","LR","t-statistic"),col=c("black","red","blue"),pch=c(20,15,17))
sLR <- (N-2)*(N+1)/(N-1)^2
st <- (N-2)/(N+1)
plot(N,sLR,cex=0.6,xaxt="n",xlab="Sample size",ylab="Asymptotic/approximation estimator ratio",col="red",pch=20)
points(N,st,cex=0.6,pch=15,col="blue")
abline(h=1,col="black")
axis(1,at=c(2,(1:5)*100))
legend(400,0.2,c("Asymptotic","LR","t-statistic"),col=c("black","red","blue"),pch=c(20,15,17))
par(oldpar)
## ---------------------------------------------------------------------------------------------------------------------------------------------------
get_b_se(0.6396966,23991,4.7245)
## ---------------------------------------------------------------------------------------------------------------------------------------------------
get_sdy(0.6396966,23991,0.04490488,0.009504684)
## ----mrdat, echo=FALSE------------------------------------------------------------------------------------------------------------------------------
mrdat <- '
rs188743906 0.6804 0.1104 0.00177 0.01660 NA NA
rs2289779 -0.0788 0.0134 0.00104 0.00261 -0.007543 0.0092258
rs117804300 -0.2281 0.0390 -0.00392 0.00855 0.109372 0.0362219
rs7033492 -0.0968 0.0147 -0.00585 0.00269 0.022793 0.0119903
rs10793962 0.2098 0.0212 0.00378 0.00536 -0.014567 0.0138196
rs635634 -0.2885 0.0153 -0.02040 0.00334 0.077157 0.0117123
rs176690 -0.0973 0.0142 0.00293 0.00306 -0.000007 0.0107781
rs147278971 -0.2336 0.0378 -0.01240 0.00792 0.079873 0.0397491
rs11562629 0.1155 0.0181 0.00960 0.00378 -0.010040 0.0151460
'
mrdat <- setNames(as.data.frame(scan(file=textConnection(mrdat), what=list("",0,0,0,0,0,0))),
c("SNP", "b.LIF.R", "SE.LIF.R", "b.FEV1", "SE.FEV1", "b.CAD", "SE.CAD"))
knitr::kable(mrdat,caption="LIF-R and CAD/FEV1")
## ----mr, echo=FALSE, fig.cap="Mendelian randomization", fig.height=7, fig.width=7-------------------------------------------------------------------
res <- mr(mrdat, "LIF.R", c("CAD","FEV1"), other_plots=TRUE)
s <- res$r[-1,]
colnames(s) <- res$r[1,]
r <- matrix(as.numeric(s[,-1]),nrow(s),dimnames=list(res$r[-1,1],res$r[1,-1]))
p <- sapply(c("IVW","EGGER","WM","PWM"), function(x)
format(2*pnorm(-abs(r[,paste0("b",x)]/r[,paste0("seb",x)])),digits=3,scientific=TRUE))
rp <- t(data.frame(round(r,3),p))
knitr::kable(rp, align="r", caption="LIFR variant rs635634 and CAD/FEV1")
## ----mr2, fig.cap="Combined forest plots for LIF.R, FEV1 and CAD", fig.height=7, fig.width=7--------------------------------------------------------
mr_names <- names(mrdat)
LIF.R <- cbind(mrdat[grepl("SNP|LIF.R",mr_names)],trait="LIF.R"); names(LIF.R) <- c("SNP","b","se","trait")
FEV1 <- cbind(mrdat[grepl("SNP|FEV1",mr_names)],trait="FEV1"); names(FEV1) <- c("SNP","b","se","trait")
CAD <- cbind(mrdat[grepl("SNP|CAD",mr_names)],trait="CAD"); names(CAD) <- c("SNP","b","se","trait")
mrdat2 <- within(rbind(LIF.R,FEV1,CAD),{y=b})
library(ggplot2)
p <- ggplot2::ggplot(mrdat2,aes(y = SNP, x = y))+
ggplot2::theme_bw()+
ggplot2::geom_point()+
ggplot2::facet_wrap(~ trait, ncol=3, scales="free_x")+
ggplot2::geom_segment(aes(x = b-1.96*se, xend = b+1.96*se, yend = SNP))+
ggplot2::geom_vline(lty=2, ggplot2::aes(xintercept=0), colour = 'red')+
ggplot2::xlab("Effect size")+
ggplot2::ylab("")
p
## ----tnfb, echo=FALSE-------------------------------------------------------------------------------------------------------------------------------
tnfb <- '
"multiple sclerosis" 0.69058600 0.059270400
"systemic lupus erythematosus" 0.76687500 0.079000500
"sclerosing cholangitis" 0.62671500 0.075954700
"juvenile idiopathic arthritis" -1.17577000 0.160293000
"psoriasis" 0.00582586 0.000800016
"rheumatoid arthritis" -0.00378072 0.000625160
"inflammatory bowel disease" -0.14334200 0.025272500
"ankylosing spondylitis" -0.00316852 0.000626225
"hypothyroidism" -0.00432054 0.000987324
"allergic rhinitis" 0.00393075 0.000926002
"IgA glomerulonephritis" -0.32696600 0.105262000
"atopic eczema" -0.00204018 0.000678061
'
tnfb <- as.data.frame(scan(file=textConnection(tnfb),what=list("",0,0))) %>%
setNames(c("outcome","Effect","StdErr")) %>%
mutate(outcome=gsub("\\b(^[a-z])","\\U\\1",outcome,perl=TRUE))
## ----fig.cap="Forest plots for MR results on TNFB", fig.align="left", fig.height=6, fig.width=9, results="hide"-------------------------------------
mr_forestplot(tnfb, colgap.forest.left="0.05cm", fontsize=14, leftlabs=c("Outcome","b","SE"),
common=FALSE, random=FALSE, print.I2=FALSE, print.pval.Q=FALSE, print.tau2=FALSE,
spacing=1.6,digits.TE=2,digits.se=2,xlab="Effect size",type.study="square",col.inside="black",col.square="black")
## ----fig.cap="Forest plots for MR results on TNFB (no summary statistics)", fig.align="left", fig.height=6, fig.width=9, results="hide"-------------
mr_forestplot(tnfb, colgap.forest.left="0.05cm", fontsize=14,
leftcols="studlab", leftlabs="Outcome", plotwidth="3inch", sm="OR", rightlabs="ci",
sortvar=tnfb[["Effect"]],
common=FALSE, random=FALSE, print.I2=FALSE, print.pval.Q=FALSE, print.tau2=FALSE,
backtransf=TRUE, spacing=1.6,type.study="square",col.inside="black",col.square="black")
## ----fig.cap="Forest plots for MR results on TNFB (with P values)", fig.align="left", fig.height=6, fig.width=9, results="hide"---------------------
mr_forestplot(tnfb,colgap.forest.left="0.05cm", fontsize=14,
leftcols=c("studlab"), leftlabs=c("Outcome"),
plotwidth="3inch", sm="OR", sortvar=tnfb[["Effect"]],
rightcols=c("effect","ci","pval"), rightlabs=c("OR","95%CI","P"),
digits=3, digits.pval=2, scientific.pval=TRUE,
common=FALSE, random=FALSE, print.I2=FALSE, print.pval.Q=FALSE, print.tau2=FALSE,
addrow=TRUE, backtransf=TRUE, spacing=1.6,type.study="square",col.inside="black",col.square="black")
## ---------------------------------------------------------------------------------------------------------------------------------------------------
require(gap)
s <- chr_pos_a1_a2(1,c(123,321),letters[1:2],letters[2:1])
s
inv_chr_pos_a1_a2(s)
inv_chr_pos_a1_a2("chr1:123-A_B",seps=c(":","-","_"))
## ---------------------------------------------------------------------------------------------------------------------------------------------------
example(ci2ms)
## ---------------------------------------------------------------------------------------------------------------------------------------------------
gc.lambda <- function(x, logscale=FALSE, z=FALSE) {
v <- x[!is.na(x)]
n <- length(v)
if (z) {
obs <- v^2
exp <- qchisq(log(1:n/n),1,lower.tail=FALSE,log.p=TRUE)
} else {
if (!logscale)
{
obs <- qchisq(v,1,lower.tail=FALSE)
exp <- qchisq(1:n/n,1,lower.tail=FALSE)
} else {
obs <- qchisq(-log(10)*v,1,lower.tail=FALSE,log.p=TRUE)
exp <- qchisq(log(1:n/n),1,lower.tail=FALSE,log.p=TRUE)
}
}
lambda <- median(obs)/median(exp)
return(lambda)
}
# A simplified version is as follows,
# obs <- median(chisq)
# exp <- qchisq(0.5, 1) # 0.4549364
# lambda <- obs/exp
# see also estlambda from GenABEL and qq.chisq from snpStats
# A related function
lambda1000 <- function(lambda, ncases, ncontrols)
1 + (lambda - 1) * (1 / ncases + 1 / ncontrols)/( 1 / 1000 + 1 / 1000)
## ---------------------------------------------------------------------------------------------------------------------------------------------------
invnormal <- function(x) qnorm((rank(x,na.last="keep")-0.5)/sum(!is.na(x)))
## ---------------------------------------------------------------------------------------------------------------------------------------------------
set.seed(12345)
Ni <- rpois(50, lambda = 4); table(factor(Ni, 0:max(Ni)))
y <- invnormal(Ni)
sd(y)
mean(y)
Ni <- 1:50
y <- invnormal(Ni)
mean(y)
sd(y)
## ---------------------------------------------------------------------------------------------------------------------------------------------------
alleles <- c("a","c","G","t")
revStrand(alleles)
## ----echo=FALSE-------------------------------------------------------------------------------------------------------------------------------------
library(gap)
search()
lsf.str("package:gap")
data(package="gap")$results
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.