knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
<<<<<<< Updated upstream:vignettes/documentation.Rmd
======= The goal of this document is to get you up and running with `multiLocalFDR` as quickly as possible. # Installation ```r library(devtools) install_github("JungiinChoi/multiLocalFDR")
You can install multiLocalFDR from the Github directory. For Mac users, you need to install Xcode before the installation. (If Xcode is not installed, R automatically redirects you to install XCode.)
library(copula) library(mvtnorm) library(ggplot2) library(scatterplot3d) >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd library(multiLocalFDR)
SPMix <- function(z, tol = 5e-6, p_value = FALSE, alternative = "greater", max_iter = 30, mono = TRUE, thre_z = 0.9, Uthre_gam = 0.9, Lthre_gam = 0.01 )
plotSPMix <- function(z, p0, mu0, sig0, f, f1, localFDR, p_value = FALSE, alternative = "greater", thre_localFDR = 0.2, testing = TRUE, xlab = "x", ylab = "y", coord_legend = c(8, -5, 0.2))
In this section, we will use SPMix() to estimate mixture density and compute local-FDR for generated data. Also, plotSPMix() can visualize the fitted results for both univariate and 2-dimensional data.
library(copula) library(mclust) library(mvtnorm) library(ggplot2) library(scatterplot3d)
Let's consider randomly generated 1000 data from the density below.
$$f = 0.8 \cdot N(0,1) + 0.2 \cdot N(3.5, 0.5^2)$$
<<<<<<< Updated upstream:vignettes/documentation.Rmd
n=10000 p0=0.8 ======= ```r # set seed set.seed(1) n <- 500 p0 <- 0.8 >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd n0 <- rbinom(1, n, p0) n1 <- n - n0 z0 <- rnorm(n0) z1 <- rnorm(n1, mean = 3, sd = 0.5) z_NN1d <- c(z0, z1) <<<<<<< Updated upstream:vignettes/documentation.Rmd NN1d <- SPMix(z_NN1d, tol = 1e-10, thre_z = 0.99, Uthre_gam = 0.9) summary(NN1d) ======= NN1d <- SPMix(z_NN1d, min_iter = 10) str(NN1d) >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
SPMix() returns null probability($p0$), null parameters($\mu_0, \sig_0$), estimated density and nonnull density($f, f_1$) and estimated local-FDR value.
# Hypothesis testing plotSPMix(NN1d)
plotSPMix() can be used for both hypothesis testing and density estimation. If testing is TRUE (default), it gives density estimation for both normal and nonparametric parts. Also, threshold of given data by given local-FDR threshold is marked with dashed line.
# Density Estimation plotSPMix(NN1d, testing = FALSE)
If testing is FALSE, it gives density estimation and classifies two parts(normal/nonparametric) by local-FDR value (local-FDR $> 0.5$?).
What if alternative distribution is not normal?
$$f = 0.8 \cdot N(0,1) + 0.2 \cdot Gamma(shape = 12, rate = 4)$$
z1_gamma <- rgamma(n1, shape = 12, rate = 4) z_NG1d <- c(z0, z1_gamma) <<<<<<< Updated upstream:vignettes/documentation.Rmd NG1d <- SPMix(z_NG1d, tol = 1e-10, thre_z = 0.99) ======= NG1d <- SPMix(z_NG1d, min_iter = 10) >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
# Hypothesis testing plotSPMix(NG1d) # Density Estimation plotSPMix(NG1d, testing = FALSE)
<<<<<<< Updated upstream:vignettes/documentation.Rmd n=3000 ======= p0 <- 0.8 n <- 100 >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd n0 <- rbinom(1, n, p0) n1 <- n - n0 sig0 <- matrix(c(1, 0.25, 0.25, 1), ncol = 2, byrow = T) V <- rCopula(n1, ellipCopula(family = "normal", param = 0.5, dim = 2)) z0 <- rmvnorm(n0, sigma = sig0) z1 <- qnorm(V, mean = 2.0, sd = .5) z_NN2d <- data.frame(rbind(z0, z1)) ggplot(z_NN2d, aes(x = z_NN2d[,1], y = z_NN2d[,2])) + geom_point(aes(color = factor(c(rep(1,n0), rep(2,n1)), labels = c("Normal(Null)", "Gaussian Copula(Alternative)")))) + scale_color_manual(values = c("#999999", "#E69F00"), name="") + labs(title = "Random Generated Mixture density data", x = "x", y = "y") + theme_classic()
NN2d <- SPMix(z_NN2d, tol = 1e-10) <<<<<<< Updated upstream:vignettes/documentation.Rmd plotSPMix(z_NN2d, NN2d$p0, NN2d$mu0, NN2d$sig0, NN2d$f, NN2d$f1, NN2d$localFDR, xlab = "x", ylab = "y") ======= plotSPMix(NN2d, coord_legend = c(4, -1, 0)) >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
plotSPMix() also provides visualization of 2-dimensional data. You can set name for x-axis and y-axis using xlab, ylab.
Let's consider nonnull distribution as Frank copula with marginal gamma distribution.
<<<<<<< Updated upstream:vignettes/documentation.Rmd z1_gamma <- qgamma(V, shape = 12, rate = 4) z_NG <- data.frame(rbind(z0, z1_gamma)) ======= z1_gamma <- qgamma(V, shape = 10, rate = 4) z_NG2d <- data.frame(rbind(z0, z1_gamma)) >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd ggplot(z_NG, aes(x = z_NG[,1], y = z_NG[,2])) + geom_point(aes(color = factor(c(rep(1,n0), rep(2,n1)), labels = c("Normal", "Gamma")))) + scale_color_manual(values = c("#999999", "#E69F00"), name="") + labs(title = "Random Generated Normal/Gamma Mixture", x="x", y = "y") + theme_classic()
NG <- SPMix(z_NG, tol = 1e-10) <<<<<<< Updated upstream:vignettes/documentation.Rmd plotSPMix(z_NG, NG$p0, NG$mu0, NG$sig0, NG$f, NG$f1, NG$localFDR, xlab = "x", ylab = "y") ======= plotSPMix(NG2d) >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
Let's consider null distribution and multivariate normal density and nonnull distribution as Gaussian copula with marginal normal distribution.
library(scatterplot3d) Sigma0 <- matrix(c(1, 0.25, 0.25, 0.25, 1, 0.25, 0.25,0.25,1), ncol = 3) n0 <- rbinom(1, n, p0) n1 <- n - n0 V <- rCopula(n1, ellipCopula(family = "normal", param = 0.5, dim = 3)) z0_NN3d <- rmvnorm(n0, sigma = Sigma0) z1_NN3d <- qnorm(V, mean = 2.0, sd = .5) z_NN3d <- rbind(z0_NN3d, z1_NN3d) NN3d <- SPMix(z_NN3d, 1e-10)
<<<<<<< Updated upstream:vignettes/documentation.Rmd
colors <- c("#999999", "#E69F00") colors <- colors[as.numeric(NN3d$localFDR <= 0.1)+1] scatterplot_NN<- scatterplot3d(z_NN3d, pch = 16, color=colors, xlab = "x", ylab = "y", zlab = "z") legend_pathway <- factor((NN3d$localFDR <= 0.1), levels = c(FALSE, TRUE), labels = c("Nonsignificant", "Significant")) legend(scatterplot_NN$xyz.convert(6, -5, 0), legend = levels(legend_pathway), col = c("#999999", "#E69F00"), pch = 16) ======= ```r NN3d <- SPMix(z_NN3d) plotSPMix(NN3d, thre_localFDR = 0.1) >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
Orange dots are given data with lower local-FDR (localFDR $< 0.1$).
V_NG <- rCopula(n1, frankCopula(param = 0.5, dim = 3)) z0_NG3d <- rmvnorm(n0, sigma = Sigma0) z1_NG3d <- qgamma(V_NG, shape = 10, rate = 4) z_NG3d <- rbind(z0_NG3d, z1_NG3d) NG3d <- SPMix(z_NG3d, 1e-10)
<<<<<<< Updated upstream:vignettes/documentation.Rmd
colors <- c("#999999", "#E69F00") colors <- colors[as.numeric(NG3d$localFDR <= 0.1)+1] scatterplot_NG<- scatterplot3d(z_NG3d, pch = 16, color=colors, xlab = "x", ylab = "y", zlab = "z") legend_pathway <- factor((NG3d$localFDR <= 0.2), levels = c(FALSE, TRUE), labels = c("Nonsignificant", "Significant")) legend(scatterplot_NG$xyz.convert(6, -5, 0), legend = levels(legend_pathway), col = c("#999999", "#E69F00"), pch = 16) ======= ```r NG3d <- SPMix(z_NG3d) plotSPMix(NG3d, thre_localFDR = 0.1) >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
Internal data 'galaxy' is a data frame including radial velocities of globular clusters of M104 and Milky Way stars. It's known that Milky Way stars have much lower radial velocities.
data("galaxy", package = "multiLocalFDR") head(galaxy)
z_galaxy <- galaxy$velocity x_galaxy <- SPMix(z_galaxy, alternative = "less")
plotSPMix(x_galaxy, testing = FALSE, xlab = "velocity(km/s)")
Internal data 'pathways' is a list of significantly upregulated genes in three published studies: (1) peripheral leukocytes(L), (2) orbital inflammatory disease(O), and (3) sinus brushings(S) compared to healthy controls.
data("pathways", package = "multiLocalFDR") head(pathways)
## SPMix for each pathway x_L <- SPMix(pathways[,2], p_value = TRUE) x_O <- SPMix(pathways[,3], p_value = TRUE) x_S <- SPMix(pathways[,4], p_value = TRUE) ## Plot results for each pathway plotSPMix(x_L, xlab = "Peripheral Leukocytes") plotSPMix(x_O, xlab = "Orbital Tissue") plotSPMix(x_S, xlab = "Sinus Brushings")
## SpMix for 3-dimensional data <<<<<<< Updated upstream:vignettes/documentation.Rmd params_LOS <- SPMix(pathways[,2:4], p_value = TRUE, tol = 1e-10) ## Pathways which md-fdr <= 0.01 head(pathways[params_LOS$localFDR <= 0.01,]$Gene.Set) length(pathways[params_LOS$localFDR <= 0.01,]$Gene.Set) # md-fdr dataframe pathways_df <- pathways pathways_df$localFDR <- params_LOS$localFDR pathways_df$sgnf <- (params_LOS$localFDR <= 0.01) head(pathways_df) ======= x_LOS <- SPMix(pathways[,2:4], p_value = TRUE) ## Pathways which md-fdr <= 0.01 head(pathways[x_LOS$localFDR <= 0.01,]$Gene.Set) length(pathways[x_LOS$localFDR <= 0.01,]$Gene.Set) >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
<<<<<<< Updated upstream:vignettes/documentation.Rmd colors <- c("#999999", "#E69F00") colors <- colors[as.numeric(pathways_df$sgnf)+1] scatterplot<- scatterplot3d(qnorm(1-as.matrix(pathways_df[,2:4])), pch = 16, color=colors, xlab = "Peripheral Leukocytes", ylab = "Orbital Tissue", zlab = "Sinus Brushings") legend_pathway <- factor(pathways_df$sgnf, levels = c(FALSE, TRUE), labels = c("Nonsignificant", "Significant")) legend(scatterplot$xyz.convert(6, -5, 0), legend = levels(legend_pathway), col = c("#999999", "#E69F00"), pch = 16) ======= plotSPMix(x_LOS, thre_localFDR = 0.01, xlab = "Peripheral Leukocytes", ylab = "Orbital Tissue", zlab = "Sinus Brushings", coord_legend = c(6, -5, 0)) >>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
Orange dots are pathways with md-fdr $< 0.01$.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.