Nothing
## ---- echo = FALSE, message=FALSE---------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
library("PhenotypeSimulator")
library("ggplot2")
## ---- eval=FALSE--------------------------------------------------------------
# # load kinship data, add small value to diagonal for numerical stability and
# # do an eigendecomposition
# indir <- "~/data/hmeyer/Supplementary/CEU.0908.impute.files"
#
# kinship <- read.table(paste(indir,
# "/genotypes_genome_hapgen.controls.grm.rel",sep=""),
# sep="\t", header=FALSE)
#
# kinship <- as.matrix(kinship)
# diag(kinship) <- diag(kinship) + 1e-4
#
# kinship_decomposed <- eigen(kinship)
# write.table(kinship,
# paste(indir, "/genotypes_genome_hapgen.controls.grm.rel",
# sep=""),
# sep="\t", col.names=FALSE, row.names=FALSE)
#
# write.table(kinship_decomposed$values,
# paste(indir, "/genotypes_eigenvalues",
# sep=""),
# sep="\t", col.names=FALSE, row.names=FALSE)
#
# write.table(kinship_decomposed$vectors,
# paste(indir, "/genotypes_eigenvectors",
# sep=""),
# sep="\t", col.names=FALSE, row.names=FALSE)
## ---- eval=FALSE--------------------------------------------------------------
# indir <- "/homes/hannah/data/hmeyer/Supplementary/CEU.0908.impute.files"
#
# # specify directory to save data; if it doesn't exist yet, create i.e. needs
# # writing permissions
# datadir <- '/homes/hannah/tmp/phenotypeSimulator'
# if (!dir.exists(datadir)) dir.create(datadir, recursive=TRUE)
#
# # specify filenames and parameters
# totalGeneticVar <- 0.4
# totalSNPeffect <- 0.1
# h2s <- totalSNPeffect/totalGeneticVar
# kinshipfile <- paste(indir, "/genotypes_genome_hapgen.controls.grm.rel",
# sep="")
#
# genoFilePrefix <- paste(indir, "/genotypes_", sep="")
# genoFileSuffix <- "_hapgen.controls.gen"
#
# # simulate phenotype with three phenotype components
# simulation <- runSimulation(N = 1000, P = 3, cNrSNP=10, seed=43,
# kinshipfile = kinshipfile,
# format = "oxgen",
# genoFilePrefix = genoFilePrefix,
# genoFileSuffix = genoFileSuffix,
# chr = c(5,7,11),
# mBetaGenetic = 0, sdBetaGenetic = 0.2,
# theta=1,
# NrSNPsOnChromosome=c(533966, 503129, 428863),
# genVar = totalGeneticVar, h2s = h2s,
# phi = 0.6, delta = 0.2, rho=0.2,
# NrFixedEffects = 2, NrConfounders = c(2, 2),
# distConfounders = c("bin", "norm"),
# probConfounders = 0.2,
# genoDelimiter=" ",
# kinshipDelimiter="\t",
# kinshipHeader=FALSE,
# verbose = TRUE )
## ---- eval=FALSE, echo=FALSE--------------------------------------------------
# savedir <- paste(system.file("extdata", package="PhenotypeSimulator"),
# "/resultsSimulationAndLinearModel", sep="")
# saveRDS(simulation$phenoComponentsFinal,
# paste(savedir, "/simulation_phenoComponentsFinal.rds", sep=""))
## ---- echo=FALSE--------------------------------------------------------------
savedir <- paste(system.file("extdata", package="PhenotypeSimulator"),
"/resultsSimulationAndLinearModel", sep="")
simulation <- list(
phenoComponentsFinal=readRDS(paste(savedir,
"/simulation_phenoComponentsFinal.rds",
sep="")))
## ----heatmaps, fig.keep='all', fig.height=3, fig.width=7, eval=TRUE, fig.cap="\\label{fig:heatmaps}Heatmaps of the trait-by-trait correlation (Pearson correlation) of the simulated phenotype and its five phenotype components."----
cor_phenotype <- reshape2::melt(cor(simulation$phenoComponentsFinal$Y))
cor_phenotype$type <- "Y"
cor_genBg <- reshape2::melt(cor(simulation$phenoComponentsFinal$Y_genBg))
cor_genBg$type <- "U"
cor_genFixed <- reshape2::melt(cor(simulation$phenoComponentsFinal$Y_genFixed))
cor_genFixed$type <- "XB"
cor_noiseFixed <- reshape2::melt(cor(simulation$phenoComponentsFinal$Y_noiseFixed))
cor_noiseFixed$type <- "WA"
cor_noiseBg <- reshape2::melt(cor(simulation$phenoComponentsFinal$Y_noiseBg))
cor_noiseBg$type <- "Psi"
cor_correlatedBg <- reshape2::melt(cor(simulation$phenoComponentsFinal$Y_correlatedBg))
cor_correlatedBg$type <- "T"
cor_components <- rbind(cor_phenotype, cor_genBg, cor_genFixed, cor_noiseBg,
cor_noiseFixed, cor_correlatedBg)
cor_components$type <- factor(cor_components$type, levels=c("Y",
"WA",
"XB",
"U",
"T",
"Psi"),
labels=c("bold(Y)",
"bold(WA)",
"bold(XB)",
"bold(U)",
"bold(T)",
"bold(Psi)"))
colnames(cor_components) <- c("TraitA", "TraitB", "Correlation", "type")
# For prettier plotting
cor_components$TraitA <- gsub("_", " ", cor_components$TraitA)
cor_components$TraitB <- gsub("_", " ", cor_components$TraitB)
p_corr <- ggplot(data=cor_components, aes(x=TraitA, y=TraitB, fill=Correlation))
p_corr <- p_corr + geom_tile() +
facet_grid(~ type, labeller = label_parsed) +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
scale_fill_gradient2(low="#F2AD00", high="#5BBCD6" , mid="white",
limits = c(-1,1),
breaks = c(-1, -0.5, 0, 0.5, 1),
labels = c(-1, -0.5, 0, 0.5, 1),
guide=guide_colorbar(direction = "horizontal",
title.position="top",
title.hjust =0.5)) +
xlab(" ") +
ylab(" ") +
theme_bw() +
coord_fixed() +
theme(axis.text.x = element_text(angle=90, vjust=0.5),
legend.position = "bottom",
strip.background = element_rect(fill="white", color="white"),
axis.text =element_text(size=8),
legend.title =element_text(size=8),
legend.text =element_text(size=8),
strip.text =element_text(size=8))
print(p_corr)
## ---- echo=FALSE, eval=FALSE--------------------------------------------------
# ggsave(plot=p_corr,
# filename=paste(savedir, "/correlation_phenotype.pdf", sep=""),
# units="mm", width=86, height=60)
## ---- eval=FALSE--------------------------------------------------------------
# # Save phenotypes, kinship and non-genetic covariates in csv and
# # GEMMA-specific format
# outdirectory <- savePheno(simulation, directory = datadir,
# intercept_gemma=TRUE, format=c("csv", "gemma"),
# saveIntermediate=TRUE)
## ---- eval=FALSE--------------------------------------------------------------
# datadir="/homes/hannah/tmp/phenotypeSimulator"
#
# causalSNPs <- fread(paste(datadir, "/SNP_NrSNP10.csv", sep=""),
# header=TRUE, sep=",", stringsAsFactors=FALSE,
# data.table=FALSE)
# causalSNPnames <- colnames(causalSNPs)
#
# LMM_mt <- fread(paste(datadir, "/GWAS_gemma_LMM_mt_pvalues.txt", sep=""),
# data.table=FALSE, header=FALSE, stringsAsFactors=FALSE)
# LMM_st_trait1 <- fread(paste(datadir, "/GWAS_gemma_LMM_st_trait1_pvalues.txt",
# sep=""),
# data.table=FALSE, header=FALSE, stringsAsFactors=FALSE)
# LMM_st_trait2 <- fread(paste(datadir, "/GWAS_gemma_LMM_st_trait2_pvalues.txt",
# sep=""),
# data.table=FALSE, header=FALSE, stringsAsFactors=FALSE)
# LMM_st_trait3 <- fread(paste(datadir, "/GWAS_gemma_LMM_st_trait3_pvalues.txt",
# sep=""),
# data.table=FALSE, header=FALSE, stringsAsFactors=FALSE)
#
# LMM_mt <- LMM_mt[order(LMM_mt[,2]),]
# LMM_mt$expected <- stats::ppoints(nrow(LMM_mt))
# LMM_mt$model <- "mvLMM"
# LMM_mt$trait <- "all"
# colnames(LMM_mt)[1:2] <- c("rsID", "observed")
#
# LMM_st_trait1 <- LMM_st_trait1[order(LMM_st_trait1[,2]),]
# LMM_st_trait1$expected <- LMM_mt$expected
# LMM_st_trait1$model <- "uvLMM"
# LMM_st_trait1$trait <- "trait1"
# colnames(LMM_st_trait1)[1:2] <- c("rsID", "observed")
#
# LMM_st_trait2 <- LMM_st_trait2[order(LMM_st_trait2[,2]),]
# LMM_st_trait2$expected <- LMM_mt$expected
# LMM_st_trait2$model <- "uvLMM"
# LMM_st_trait2$trait <- "trait2"
# colnames(LMM_st_trait2)[1:2] <- c("rsID", "observed")
#
# LMM_st_trait3 <- LMM_st_trait3[order(LMM_st_trait3[,2]),]
# LMM_st_trait3$expected <- LMM_mt$expected
# LMM_st_trait3$model <- "uvLMM"
# LMM_st_trait3$trait <- "trait3"
# colnames(LMM_st_trait3)[1:2] <- c("rsID", "observed")
#
# pvalues <- rbind(LMM_mt, LMM_st_trait1, LMM_st_trait2, LMM_st_trait3)
# pvalues$causal <- "all"
# pvalues$causal[which(pvalues$rsID %in% causalSNPnames)] <- "simulated causal"
# pvalues$causal <- factor(pvalues$causal, levels=c("all", "simulated causal"))
#
# p_qq <- ggplot(data=pvalues,
# aes(x=-log10(expected), y=-log10(observed)))
# p_qq <- p_qq + geom_point(aes(color=causal, shape=trait)) +
# scale_color_manual(values=c('darkgrey', '#1b9e77'),
# name="SNPs",
# guide=guide_legend(nrow = 2, title.position="top",
# title.hjust =0.5,
# override.aes = list(shape=21))) +
# scale_shape_manual(values=c(18, 15:17),
# name="Traits",
# guide=guide_legend(nrow = 2,
# override.aes = list(colour='darkgrey'),
# title.position="top",
# title.hjust =0.5)) +
# geom_abline(intercept=0, slope=1, col="black") +
# geom_point(data=dplyr::filter(pvalues, causal == "simulated causal"),
# color='#1b9e77', aes(shape=trait)) +
# xlab(expression(Expected~~-log[10](italic(p))) ) +
# ylab(expression(Observed~~-log[10](italic(p)))) +
# facet_grid(~ model) +
# theme_bw() +
# theme(strip.background = element_rect(fill="white"),
# axis.title = element_text(size=12),
# axis.text =element_text(size=10),
# legend.title =element_text(size=12),
# legend.text =element_text(size=10),
# strip.text =element_text(size=10),
# legend.position = "bottom",
# legend.direction = "horizontal")
## ----echo=FALSE, eval=FALSE, out.width='100%'---------------------------------
# ggsave(plot=p_qq,
# filename=paste(savedir, "/gwas_results_gemma_qqplot.png", sep=""),
# units="mm", width=86, height=100)
## ----qqplot, echo=FALSE, fig.cap="\\label{fig:qqplot}Quantile-quantile plots of p-values observed from the multivariate linear mixed model (mvLMM, traits:all) and the univariate linear mixed models (uvLMM, traits: trait1/trait2/trait3) fitted to each of about eight million genome-wide SNPs (grey), including the ten SNPs for which a phenotype effect was modelled (green)."----
#knitr::include_graphics("../docs/articles/Simulation-and-LinearModel_files/figure-html/qqplot-1.png")
knitr::include_graphics("gwas_results_gemma_qqplot.png")
## ---- eval=FALSE--------------------------------------------------------------
# sigSNPs <- dplyr::filter(pvalues, causal == "simulated causal",
# observed < 5*10^-8)
# sigSNPs$observed_adjusted <- sigSNPs$observed
# sigSNPs$observed_adjusted[which(sigSNPs$trait != "all")] <-
# 3*sigSNPs$observed[which(sigSNPs$trait != "all")]
# sigSNPs <- dplyr::filter(sigSNPs, observed_adjusted < 5*10^-8)
## ---- echo=FALSE, eval=FALSE--------------------------------------------------
# write.table(sigSNPs,
# paste(savedir, "/sigSNPs.csv", sep=""),
# col.names=TRUE, row.names=FALSE, quote=FALSE, sep=",")
## ----sigSNPs, echo=FALSE------------------------------------------------------
sigSNPs <- read.table(paste(savedir, "/sigSNPs.csv", sep=""), header=TRUE,
sep=",")
knitr::kable(sigSNPs[, c(1,4,5,7)],
caption="\\label{tab:sigSNPs}P-values from multi-trait GWAS (model == mvLMM) and
single-trait GWAS (model == uvLMM)",
table.attr = "id=\"sigSNPs\"",
digits=40)
## ---- eval=FALSE--------------------------------------------------------------
# # Read the SNPs simulated to have an effect on the phenotype and their SNP
# # effects
# SNPs <- read.csv(paste(datadir,"/SNP_NrSNP10.csv", sep=""),
# row.names=1)
# SNPeffect <- read.csv(paste(datadir,"/SNP_effects_NrSNP10.csv", sep=""),
# row.names=1)
#
#
# # HapGen simulated SNPs might be encoded as 7-25481146 , which R converts to
# # X7.25481146 via make.names in read.table; substitute X and . to get original
# # names
# SNPnames <- gsub("\\.", "-", gsub("X", "", colnames(SNPs)))
# SNPnames <- SNPnames[order(SNPnames)]
# simulated_withEffect <- dplyr::filter(pvalues, rsID %in% SNPnames)
## ---- echo=FALSE, eval=FALSE--------------------------------------------------
# write.table(simulated_withEffect,
# paste(savedir,"/simulated_withEffect.csv", sep=""), col.names=TRUE,
# quote=FALSE,row.names=FALSE)
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
SNPeffect <- read.csv(paste(savedir,"/SNP_effects_NrSNP10.csv", sep=""),
row.names=1)
SNPs <- read.csv(paste(savedir,"/SNP_NrSNP10.csv", sep=""),
row.names=1)
SNPnames <- gsub("\\.", "-", gsub("X", "", colnames(SNPs)))
SNPnames <- SNPnames[order(SNPnames)]
simulated_withEffect <- read.csv(paste(savedir,"/simulated_withEffect.csv", sep=""))
## ----freq-beta, fig.cap="\\label{fig:freq-beta}P-values, allele frequencies and simulated effect sizes of the ten SNPs simulated to have an effect on phenotype."----
# get the allele frequencies of the SNPs simulated to have an effect on the
# phenotype
freq <- apply(SNPs, 2, getAlleleFrequencies)
minor <- apply(freq, 2, min)
minor <- minor[order(gsub("X", "", names(minor)))]
# format the effect size table and combine with p-value and frequency
# information
effects <- reshape2::melt(SNPeffect, value.name="beta", variable.name="name")
effects$type <- gsub('\\..*', '', effects$name)
effects$rsID <- gsub("\\.", "-", gsub('.*_', '', effects$name))
effects$trait <- rep(paste("trait", 1:3, sep=""), 10)
effects <- effects[order(effects$rsID),]
effects <- dplyr::filter(effects, rsID %in% simulated_withEffect$rsID)
simulated_withEffect <- simulated_withEffect[order(simulated_withEffect[, 1]),]
simulated_withEffect$beta <- NA
simulated_withEffect$beta[simulated_withEffect$trait != "all"] <- effects$beta
simulated_withEffect$freq <- rep(minor[which(SNPnames %in% effects$rsID)], each=4)
# plot association p-values in relation to the absolute value of the simulated
# effect sizes and the SNPs allele frequencies
p <- ggplot(dplyr::filter(simulated_withEffect, trait != "all"))
p <- p +
geom_point(aes(x=freq, y=-log10(observed), color=abs(beta), shape=trait)) +
geom_hline(yintercept=-log10(5*10^(-8)), color="darkgrey") +
scale_color_gradientn(colours=c('#f0f9e8','#ccebc5','#a8ddb5','#7bccc4',
'#4eb3d3','#2b8cbe','#08589e'),
name=expression("| simulated" ~beta~ "|")) +
scale_shape_discrete(name="Phenotype in GWAS") +
xlab("Allele frequencies") +
ylab(expression(-log[10](p-value))) +
theme_bw()
print(p)
## ----echo=FALSE, eval=FALSE, out.width='100%'---------------------------------
# ggsave(plot=p,
# filename=paste(savedir, "/effectsizes-freq-pvalues.pdf", sep=""),
# units="mm", width=150, height=120)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.