Nothing
## ---- echo = FALSE, message=FALSE---------------------------------------------
library(knitr)
opts_chunk$set(collapse = TRUE, comment = "#>")
library("PhenotypeSimulator")
## -----------------------------------------------------------------------------
# Set parameters
genVar <- 0.4
noiseVar <- 1- genVar
shared <- 0.6
independent <- 1 - shared
# simulate simple bi-allelic genotypes and estimate kinship
genotypes <- simulateGenotypes(N = 100, NrSNP = 10000,
frequencies = c(0.05, 0.1, 0.3, 0.4),
verbose = FALSE)
genotypes_sd <- standardiseGenotypes(genotypes$genotypes)
kinship <- getKinship(N=100, X=genotypes_sd, verbose = FALSE)
# simulate phenotype components
genBg <- geneticBgEffects(N = 100, kinship = kinship, P = 15)
noiseBg <- noiseBgEffects(N = 100, P = 15)
# rescale phenotype components
genBg_shared_scaled <- rescaleVariance(genBg$shared, shared * genVar)
genBg_independent_scaled <- rescaleVariance(genBg$independent,
independent * genVar)
noiseBg_shared_scaled <- rescaleVariance(noiseBg$shared, shared * noiseVar)
noiseBg_independent_scaled <- rescaleVariance(noiseBg$independent,
independent *noiseVar)
# Total variance proportion shave to add up yo 1
total <- independent * noiseVar + independent * genVar +
shared * noiseVar + shared * genVar
total == 1
# combine components into final phenotype
Y <- scale(genBg_shared_scaled$component + noiseBg_shared_scaled$component +
genBg_independent_scaled$component + noiseBg_independent_scaled$component)
# transform phenotype non-linearly
Y_nl <- transformNonlinear(Y, alpha=0.7, method="exp")
## -----------------------------------------------------------------------------
# simulate phenotype with population structure and observational noise effects
# only
# genetic variance
genVar <- 0.4
# random genetic variance: h2b
phenotype <- runSimulation(N = 100, P = 15, tNrSNP = 10000,
SNPfrequencies = c(0.05, 0.1,0.3,0.4),
genVar = 0.4, h2bg = 1, phi = 1,
verbose = TRUE, nonlinear="exp",
proportionNonlinear = 0.7)
## -----------------------------------------------------------------------------
# read genotypes from external file
# use one of the sample genotype file provided in the
# extdata/genotypes/subfolders (e.g.extdata/genotypes/hapgen )
genotypefile <- system.file("extdata/genotypes/hapgen",
"genotypes_hapgen.controls.gen",
package = "PhenotypeSimulator")
# remove the .gen ending (oxgen specific endings .gen and .sample are added
# automatically )
genotypefile <- gsub("\\.gen","", genotypefile)
genotypes <- readStandardGenotypes(N=100, filename = genotypefile,
format="oxgen",
delimiter = ",", verbose=TRUE)
genotypes_sd <- standardiseGenotypes(genotypes$genotypes)
# kinship estimate based on standardised SNPs
kinship <- getKinship(N=100, X=genotypes_sd, verbose = FALSE)
# simulate 30 genetic variant effects (from non-standardised SNP genotypes)
causalSNPs <- getCausalSNPs(N=100, genotypes = genotypes$genotypes,
NrCausalSNPs = 30, verbose = FALSE)
genFixed <- geneticFixedEffects(N = 100, P = 15, X_causal = causalSNPs)
# simulate infinitesimal genetic effects
genBg <- geneticBgEffects(N=100, kinship = kinship, P = 15)
# simulate 4 different confounder effects:
# * 1 binomial covariate effect shared across all traits
# * 2 categorical (3 categories) independent covariate effects
# * 1 categorical (4 categories) independent covariate effect
# * 2 normally distributed independent and shared covariate effects
noiseFixed <- noiseFixedEffects(N = 100, P = 15, NrFixedEffects = 4,
NrConfounders = c(1, 2, 1, 2),
pIndependentConfounders = c(0, 1, 1, 0.5),
distConfounders = c("bin", "cat_norm",
"cat_unif", "norm"),
probConfounders = 0.2,
catConfounders = c(3, 4))
# simulate correlated effects with max correlation of 0.8
correlatedBg <- correlatedBgEffects(N = 100, P = 15, pcorr = 0.8)
# simulate observational noise effects
noiseBg <- noiseBgEffects(N = 100, P = 15)
# total SNP effect on phenotype: 0.01
genVar <- 0.6
noiseVar <- 1 - genVar
totalSNPeffect <- 0.01
h2s <- totalSNPeffect/genVar
phi <- 0.6
rho <- 0.1
delta <- 0.3
shared <- 0.8
independent <- 1 - shared
# rescale phenotype components
genFixed_shared_scaled <- rescaleVariance(genFixed$shared, shared * h2s *genVar)
genFixed_independent_scaled <- rescaleVariance(genFixed$independent,
independent * h2s *genVar)
genBg_shared_scaled <- rescaleVariance(genBg$shared, shared * (1-h2s) *genVar)
genBg_independent_scaled <- rescaleVariance(genBg$independent,
independent * (1-h2s) * genVar)
noiseBg_shared_scaled <- rescaleVariance(noiseBg$shared, shared * phi* noiseVar)
noiseBg_independent_scaled <- rescaleVariance(noiseBg$independent,
independent * phi* noiseVar)
correlatedBg_scaled <- rescaleVariance(correlatedBg$correlatedBg,
shared * rho * noiseVar)
noiseFixed_shared_scaled <- rescaleVariance(noiseFixed$shared, shared * delta *
noiseVar)
noiseFixed_independent_scaled <- rescaleVariance(noiseFixed$independent,
independent * delta * noiseVar)
# Total variance proportions have to add up yo 1
total <- shared * h2s *genVar + independent * h2s * genVar +
shared * (1-h2s) * genVar + independent * (1-h2s) * genVar +
shared * phi* noiseVar + independent * phi* noiseVar +
rho * noiseVar +
shared * delta * noiseVar + independent * delta * noiseVar
total == 1
# combine components into final phenotype
Y <- scale(genBg_shared_scaled$component + noiseBg_shared_scaled$component +
genBg_independent_scaled$component + noiseBg_independent_scaled$component +
genFixed_shared_scaled$component + noiseFixed_shared_scaled$component +
genFixed_independent_scaled$component + noiseFixed_independent_scaled$component +
correlatedBg_scaled$component)
## ---- tidy=TRUE, tidy.opts = list(width.cutoff = 60)--------------------------
# simulate phenotype with the same five phenotype components and settings as
# above; display progress via verbose=TRUE
phenotype <- runSimulation(N = 100, P = 15, genotypefile=genotypefile,
format ="oxgen",
cNrSNP=30,
genVar = genVar,
h2s = h2s, phi = 0.6, delta = 0.3,
distBetaGenetic = "unif", mBetaGenetic = 0.5,
sdBetaGenetic = 1,
NrFixedEffects = 4, NrConfounders = c(1, 2, 1, 2),
pIndependentConfounders = c(0, 1, 1, 0.5),
distConfounders = c("bin", "cat_norm",
"cat_unif", "norm"),
probConfounders = 0.2,
catConfounders = c(3, 4),
pcorr = 0.8,
verbose = TRUE )
## ---- echo=FALSE--------------------------------------------------------------
# show proportion of variance of the different phenotype components in the
# final phenotype
varGenFixed <- t(phenotype$varComponents
[grepl("var_genFix", names(phenotype$varComponents))])
varGenBg <- t(phenotype$varComponents
[grepl("var_genBg", names(phenotype$varComponents))])
varNoiseFixed <- t(phenotype$varComponents
[grepl("var_noiseFixed", names(phenotype$varComponents))])
varNoiseBg <- t(phenotype$varComponents
[grepl("var_noiseBg", names(phenotype$varComponents))])
varNoiseCorr <- t(phenotype$varComponents
[grepl("var_noiseCor", names(phenotype$varComponents))])
totalPropVariance <- as.matrix(t(data.frame(varGenFixed,
varGenBg,
varNoiseFixed,
varNoiseBg,
varNoiseCorr=c(varNoiseCorr, 0))))
totalPropVariance <- cbind(totalPropVariance, rowSums(totalPropVariance))
totalPropVariance <- rbind(totalPropVariance,
sumProportion=colSums(totalPropVariance))
colnames(totalPropVariance) <- c("shared effect", "independent effect",
"total effect")
knitr::kable(totalPropVariance, caption="Proportion of variance explained
by the different phenotype components")
## ---- fig.show='hold', fig.height=3.4, fig.width=3.4--------------------------
### show 'image' of phenotype and correlation between phenotypic traits
image(t(phenotype$phenoComponentsFinal$Y), main="Phenotype: [samples x traits]",
axes=FALSE, cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
image(cor(phenotype$phenoComponentsFinal$Y),
main="Correlation of traits [traits x traits]", axes=FALSE, cex.main=0.8)
mtext(side = 1, text = "Traits", line = 1)
mtext(side = 2, text = "Traits", line = 1)
## ---- eval = FALSE------------------------------------------------------------
# out <- savePheno(phenotype, directory="/tmp",
# outstring="test_simulation",
# format=c("csv", "plink"), verbose=FALSE)
## -----------------------------------------------------------------------------
## a) Draw cuasal SNPs from a simulated genotype matrix
# simulate 10,000 bi-allelic SNP genotypes for 100 samples with randomly drawn
# allele frequencies of 0.05, 0.1, 0.3 and 0.4.
genotypes <- simulateGenotypes(N = 100, NrSNP = 10000,
frequencies = c(0.05, 0.1, 0.3,0.4),
verbose = FALSE)
# draw 10 causal SNPs from the genotype matrix (use non-standardised allele
# codes i.e. (0,1,2))
causalSNPs <- getCausalSNPs(N=100, NrCausalSNPs = 10,
genotypes = genotypes$genotypes)
## -----------------------------------------------------------------------------
## b) Draw SNPs from external genotype files:
# read genotypes from external file
# use one of the sample genotype file provided in the
# extdata/genotypes/subfolders (e.g.extdata/genotypes/hapgen )
genotypefile <- system.file("extdata/genotypes/hapgen",
"genotypes_hapgen.controls.gen",
package = "PhenotypeSimulator")
# remove the .gen ending (oxbgen specific endings .gen and .sample are added
# automatically )
genotypefile <- gsub("\\.gen","", genotypefile)
genotypes <- readStandardGenotypes(N = 100, filename = genotypefile,
format="oxgen",
delimiter = ",", verbose=TRUE)
causalSNPsFromFile <- getCausalSNPs(N = 100, NrCausalSNPs = 10,
genotypes=genotypes$genotypes)
## -----------------------------------------------------------------------------
## c) draw 10 causal SNPs from external genotype files: sample 10 SNPs from
## chromosome 22
# use sample genotype file provided in the extdata/genotypes folder
genotypeFile <- system.file("extdata/genotypes/", "genotypes_chr22.csv",
package = "PhenotypeSimulator")
genoFilePrefix <- gsub("chr.*", "", genotypeFile)
genoFileSuffix <- ".csv"
causalSNPsSampledFromFile <- getCausalSNPs(N = 10, NrCausalSNPs = 10, chr = 22,
genoFilePrefix = genoFilePrefix,
genoFileSuffix = genoFileSuffix,
delimiter = ",", verbose=TRUE)
## ---- fig.show='hold'---------------------------------------------------------
# create genetic variant effects with 20% of SNPs having a specific effect,
# affecting 40% of all simulated traits
fixedGenetic <- geneticFixedEffects(X_causal = causalSNPsFromFile,
N = 100, P = 10,
pIndependentGenetic = 0.2,
pTraitIndependentGenetic = 0.4)
## ---- fig.show='hold', echo=FALSE, fig.height=3.4, fig.width=3.4--------------
image(fixedGenetic$shared, main="Shared genetic variant effects", axes=FALSE,
cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
image(fixedGenetic$independent, main="Independent genetic variant effects",
axes=FALSE, cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
## ---- fig.show='hold'---------------------------------------------------------
## a) Estimate kinship from simulated genotypes
genotypes <- simulateGenotypes(N = 100, NrSNP = 10000,
frequencies = c(0.05, 0.1, 0.3,0.4),
verbose = FALSE)
genotypes_sd <- standardiseGenotypes(genotypes$genotypes)
kinship <- getKinship(N=100, X=genotypes_sd, verbose = FALSE)
## b) Read kinship from external kinship file
kinshipFile <- system.file("extdata/kinship/", "kinship.csv",
package = "PhenotypeSimulator")
kinshipFromFile <- getKinship(N=50, kinshipfile = kinshipFile,
verbose = FALSE)
genBg <- geneticBgEffects(N=100, kinship = kinship, P = 15)
## ---- fig.show='hold', echo=FALSE, fig.height=3.4, fig.width=3.4--------------
image(genBg$shared, main="Shared infinitesimal genetic effects", axes=FALSE,
cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
image(genBg$independent, main="Independent infinitesimal genetic effects",
axes=FALSE, cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
## ---- fig.show='hold'---------------------------------------------------------
# create 1 non-genetic covariate effect affecting 30% of all simulated traits. The effect
# follows a uniform distribution between 30 and 40 (resembling for instance age
# in a study cohort).
fixedNoiseUniform <- noiseFixedEffects(N = 100, P = 10, NrConfounders = 1,
pIndependentConfounders = 1,
pTraitIndependentConfounders = 0.3,
distConfounders = "unif",
mConfounders = 35, sdConfounders = 5)
# create 2 non-genetic covariate effects with 1 specific confounder affecting
# 20% of all simulated traits. The effects follow a normal distribution
fixedNoiseNormal <- noiseFixedEffects(N = 100, P = 10, NrConfounders = 2,
pIndependentConfounders = 0.5,
pTraitIndependentConfounders = 0.2,
distConfounders = "norm",
mConfounders = 0, sdConfounders = 1)
# create 1 non-genetic covariate effects affecting all simulated traits. The
# effect follows a binomial distribution with probability 0.5 (resembling for
# instance sex in a study cohort).
fixedNoiseBinomial <- noiseFixedEffects(N = 100, P = 10, NrConfounders = 1,
pIndependentConfounders = 0,
distConfounders = "bin",
probConfounders = 0.5)
## ---- fig.show='hold', echo=FALSE, fig.height=3.5, fig.width=4----------------
image(fixedNoiseUniform$independent,
main="Independent non-genetic covariate effects\n(uniform confounder dist)",
axes=FALSE, cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
image(fixedNoiseNormal$shared,
main="Shared non-genetic covariate effects\n(normal confounder dist)",
axes=FALSE, cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
image(fixedNoiseNormal$independent,
main="Independent non-genetic covariate effects\n(normal confounder dist)",
axes=FALSE, cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
image(fixedNoiseBinomial$shared,
main="Shared non-genetic covariate effects\n(binomial confounder dist)",
axes=FALSE, cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
## ---- fig.show='hold', fig.height=3.4, fig.width=3.4--------------------------
# simulate correlated noise effect for 10 traits with top-level
# correlation of 0.8
correlatedNoise <- correlatedBgEffects(N = 100, P = 10, pcorr = 0.8 )
# correlation structure of the traits: strong the closer to the diagonal,
# little correlation at the furthest distance to the diagonal
furthestDistCorr <- 0.4^(10-1)
pairs(correlatedNoise$correlatedBg, pch = ".",
main=paste("Correlation at furthest distance to diagonal:\n",
furthestDistCorr), cex.main=0.8)
image(correlatedNoise$correlatedBg, main="Correlated effects", axes=FALSE,
cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
## ----fig.show='hold'----------------------------------------------------------
# simulate a noise random effect for 10 traits
noiseBg <- noiseBgEffects(N = 100, P = 10, mean = 0, sd = 1)
## ---- fig.show='hold', echo=FALSE, fig.height=3.4, fig.width=3.4--------------
image(noiseBg$shared, main="Shared observational noise effects", axes=FALSE,
cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
image(noiseBg$independent, main="Independent observational noise effects",
axes=FALSE, cex.main=0.8)
mtext(side = 1, text = "Samples", line = 1)
mtext(side = 2, text = "Traits", line = 1)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.