knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

<<<<<<< Updated upstream:vignettes/documentation.Rmd

=======
The goal of this document is to get you up and running with `multiLocalFDR` as quickly as possible. 

# Installation

```r
library(devtools)
install_github("JungiinChoi/multiLocalFDR")

You can install multiLocalFDR from the Github directory. For Mac users, you need to install Xcode before the installation. (If Xcode is not installed, R automatically redirects you to install XCode.)

required libraries

library(copula)
library(mvtnorm)
library(ggplot2)
library(scatterplot3d)

>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
library(multiLocalFDR)

Main Functions

SPMix()

SPMix <- function(z, tol = 5e-6, p_value = FALSE, alternative = "greater", max_iter = 30, mono = TRUE, thre_z = 0.9, Uthre_gam = 0.9, Lthre_gam = 0.01 )

plotSPMix()

plotSPMix <- function(z, p0, mu0, sig0, f, f1, localFDR, p_value = FALSE,
                      alternative = "greater", thre_localFDR = 0.2, testing = TRUE,
                      xlab = "x", ylab = "y", coord_legend = c(8, -5, 0.2))

Simulations

In this section, we will use SPMix() to estimate mixture density and compute local-FDR for generated data. Also, plotSPMix() can visualize the fitted results for both univariate and 2-dimensional data.

required libraries

library(copula)
library(mclust)
library(mvtnorm)
library(ggplot2)
library(scatterplot3d)

Univaraite

Normal + Normal

Let's consider randomly generated 1000 data from the density below.

$$f = 0.8 \cdot N(0,1) + 0.2 \cdot N(3.5, 0.5^2)$$

<<<<<<< Updated upstream:vignettes/documentation.Rmd

n=10000
p0=0.8
=======
```r
# set seed
set.seed(1)

n <- 500
p0 <- 0.8
>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
n0 <- rbinom(1, n, p0)
n1 <- n - n0
z0 <- rnorm(n0)
z1 <- rnorm(n1, mean = 3, sd = 0.5)
z_NN1d <- c(z0, z1)

<<<<<<< Updated upstream:vignettes/documentation.Rmd
NN1d <- SPMix(z_NN1d, tol = 1e-10, thre_z = 0.99, Uthre_gam = 0.9)
summary(NN1d)
=======
NN1d <- SPMix(z_NN1d, min_iter = 10)
str(NN1d)
>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd

SPMix() returns null probability($p0$), null parameters($\mu_0, \sig_0$), estimated density and nonnull density($f, f_1$) and estimated local-FDR value.

# Hypothesis testing 
plotSPMix(NN1d)

plotSPMix() can be used for both hypothesis testing and density estimation. If testing is TRUE (default), it gives density estimation for both normal and nonparametric parts. Also, threshold of given data by given local-FDR threshold is marked with dashed line.

# Density Estimation
plotSPMix(NN1d, testing = FALSE)

If testing is FALSE, it gives density estimation and classifies two parts(normal/nonparametric) by local-FDR value (local-FDR $> 0.5$?).

Normal + Gamma

What if alternative distribution is not normal?

$$f = 0.8 \cdot N(0,1) + 0.2 \cdot Gamma(shape = 12, rate = 4)$$

z1_gamma <- rgamma(n1, shape = 12, rate = 4)
z_NG1d <- c(z0, z1_gamma)
<<<<<<< Updated upstream:vignettes/documentation.Rmd

NG1d <- SPMix(z_NG1d, tol = 1e-10, thre_z = 0.99)
=======
NG1d <- SPMix(z_NG1d, min_iter = 10)
>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
# Hypothesis testing 
plotSPMix(NG1d)

# Density Estimation
plotSPMix(NG1d, testing = FALSE)

2-dimensional data

Normal + Gaussian Copula

<<<<<<< Updated upstream:vignettes/documentation.Rmd
n=3000
=======
p0 <- 0.8
n <- 100
>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd
n0 <- rbinom(1, n, p0)
n1 <- n - n0
sig0 <- matrix(c(1, 0.25, 0.25, 1), ncol = 2, byrow = T)
V <- rCopula(n1, ellipCopula(family = "normal", param = 0.5, dim = 2))
z0 <- rmvnorm(n0, sigma = sig0)
z1 <- qnorm(V, mean = 2.0, sd = .5)
z_NN2d <- data.frame(rbind(z0, z1))

ggplot(z_NN2d, aes(x = z_NN2d[,1], y = z_NN2d[,2])) +
  geom_point(aes(color = factor(c(rep(1,n0), rep(2,n1)), labels = c("Normal(Null)", "Gaussian Copula(Alternative)")))) +
  scale_color_manual(values = c("#999999", "#E69F00"), name="")  +
  labs(title = "Random Generated Mixture density data", x = "x", y = "y") +
  theme_classic()
NN2d <- SPMix(z_NN2d, tol = 1e-10)

<<<<<<< Updated upstream:vignettes/documentation.Rmd
plotSPMix(z_NN2d, NN2d$p0, NN2d$mu0, NN2d$sig0, NN2d$f, NN2d$f1, NN2d$localFDR, 
          xlab = "x", ylab = "y")
=======
plotSPMix(NN2d, coord_legend = c(4, -1, 0))
>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd

plotSPMix() also provides visualization of 2-dimensional data. You can set name for x-axis and y-axis using xlab, ylab.

Normal + Frank Copula

Let's consider nonnull distribution as Frank copula with marginal gamma distribution.

<<<<<<< Updated upstream:vignettes/documentation.Rmd
z1_gamma <- qgamma(V, shape = 12, rate = 4)
z_NG <- data.frame(rbind(z0, z1_gamma))
=======
z1_gamma <- qgamma(V, shape = 10, rate = 4)
z_NG2d <- data.frame(rbind(z0, z1_gamma))
>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd

ggplot(z_NG, aes(x = z_NG[,1], y = z_NG[,2])) +
  geom_point(aes(color = factor(c(rep(1,n0), rep(2,n1)), labels = c("Normal", "Gamma")))) +
  scale_color_manual(values = c("#999999", "#E69F00"), name="")  +
  labs(title = "Random Generated Normal/Gamma Mixture", x="x", y = "y") +
  theme_classic()
NG <- SPMix(z_NG, tol = 1e-10)

<<<<<<< Updated upstream:vignettes/documentation.Rmd
plotSPMix(z_NG, NG$p0, NG$mu0, NG$sig0, NG$f, NG$f1, NG$localFDR, 
          xlab = "x", ylab = "y")
=======
plotSPMix(NG2d)
>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd

3-dimensional data

Normal + Gaussian Copula

Let's consider null distribution and multivariate normal density and nonnull distribution as Gaussian copula with marginal normal distribution.

library(scatterplot3d)

Sigma0 <- matrix(c(1, 0.25, 0.25, 0.25, 1, 0.25, 0.25,0.25,1), ncol = 3)
n0 <- rbinom(1, n, p0)
n1 <- n - n0
V <- rCopula(n1, ellipCopula(family = "normal", param = 0.5, dim = 3))
z0_NN3d <- rmvnorm(n0, sigma = Sigma0)
z1_NN3d <- qnorm(V, mean = 2.0, sd = .5)
z_NN3d <- rbind(z0_NN3d, z1_NN3d)

NN3d <- SPMix(z_NN3d, 1e-10)

<<<<<<< Updated upstream:vignettes/documentation.Rmd

colors <- c("#999999", "#E69F00")
colors <- colors[as.numeric(NN3d$localFDR <= 0.1)+1]
scatterplot_NN<- scatterplot3d(z_NN3d, pch = 16, color=colors,
                            xlab = "x", ylab = "y", zlab = "z")
legend_pathway <- factor((NN3d$localFDR <= 0.1), levels = c(FALSE, TRUE), 
                         labels = c("Nonsignificant", "Significant"))
legend(scatterplot_NN$xyz.convert(6, -5, 0), legend = levels(legend_pathway),
       col = c("#999999", "#E69F00"), pch = 16)
=======
```r
NN3d <- SPMix(z_NN3d)

plotSPMix(NN3d, thre_localFDR = 0.1)
>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd

Orange dots are given data with lower local-FDR (localFDR $< 0.1$).

Normal + Frank Copula

V_NG <- rCopula(n1, frankCopula(param = 0.5, dim = 3))
z0_NG3d <- rmvnorm(n0, sigma = Sigma0)
z1_NG3d <- qgamma(V_NG, shape = 10, rate = 4)
z_NG3d <- rbind(z0_NG3d, z1_NG3d)

NG3d <- SPMix(z_NG3d, 1e-10)

<<<<<<< Updated upstream:vignettes/documentation.Rmd

colors <- c("#999999", "#E69F00")
colors <- colors[as.numeric(NG3d$localFDR <= 0.1)+1]
scatterplot_NG<- scatterplot3d(z_NG3d, pch = 16, color=colors,
                            xlab = "x", ylab = "y", zlab = "z")
legend_pathway <- factor((NG3d$localFDR <= 0.2), levels = c(FALSE, TRUE), 
                         labels = c("Nonsignificant", "Significant"))
legend(scatterplot_NG$xyz.convert(6, -5, 0), legend = levels(legend_pathway),
       col = c("#999999", "#E69F00"), pch = 16)
=======
```r
NG3d <- SPMix(z_NG3d)

plotSPMix(NG3d, thre_localFDR = 0.1)
>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd

Applications

Galaxy Velocity (Univariate)

Internal data 'galaxy' is a data frame including radial velocities of globular clusters of M104 and Milky Way stars. It's known that Milky Way stars have much lower radial velocities.

data("galaxy", package = "multiLocalFDR")
head(galaxy)
z_galaxy <- galaxy$velocity
x_galaxy <- SPMix(z_galaxy, alternative = "less")
plotSPMix(x_galaxy, testing = FALSE, xlab = "velocity(km/s)")

Pathways (Multivariate)

Internal data 'pathways' is a list of significantly upregulated genes in three published studies: (1) peripheral leukocytes(L), (2) orbital inflammatory disease(O), and (3) sinus brushings(S) compared to healthy controls.

data("pathways", package = "multiLocalFDR")
head(pathways)

Univariate analysis

## SPMix for each pathway
x_L <- SPMix(pathways[,2], p_value = TRUE)
x_O <- SPMix(pathways[,3], p_value = TRUE)
x_S <- SPMix(pathways[,4], p_value = TRUE)

## Plot results for each pathway
plotSPMix(x_L, xlab = "Peripheral Leukocytes")
plotSPMix(x_O, xlab = "Orbital Tissue")
plotSPMix(x_S, xlab = "Sinus Brushings")

Multivariate Analysis

## SpMix for 3-dimensional data
<<<<<<< Updated upstream:vignettes/documentation.Rmd
params_LOS <- SPMix(pathways[,2:4], p_value = TRUE, tol = 1e-10)

## Pathways which md-fdr <= 0.01
head(pathways[params_LOS$localFDR <= 0.01,]$Gene.Set)
length(pathways[params_LOS$localFDR <= 0.01,]$Gene.Set)

# md-fdr dataframe
pathways_df <- pathways
pathways_df$localFDR <- params_LOS$localFDR
pathways_df$sgnf <- (params_LOS$localFDR <= 0.01)
head(pathways_df)
=======
x_LOS <- SPMix(pathways[,2:4], p_value = TRUE)

## Pathways which md-fdr <= 0.01
head(pathways[x_LOS$localFDR <= 0.01,]$Gene.Set)
length(pathways[x_LOS$localFDR <= 0.01,]$Gene.Set)
>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd

3d scatter plot

<<<<<<< Updated upstream:vignettes/documentation.Rmd
colors <- c("#999999", "#E69F00")
colors <- colors[as.numeric(pathways_df$sgnf)+1]
scatterplot<- scatterplot3d(qnorm(1-as.matrix(pathways_df[,2:4])), pch = 16, color=colors,
              xlab = "Peripheral Leukocytes",
              ylab = "Orbital Tissue",
              zlab = "Sinus Brushings")

legend_pathway <- factor(pathways_df$sgnf, levels = c(FALSE, TRUE), labels = c("Nonsignificant", "Significant"))
legend(scatterplot$xyz.convert(6, -5, 0), legend = levels(legend_pathway),
       col = c("#999999", "#E69F00"), pch = 16)
=======
plotSPMix(x_LOS, thre_localFDR = 0.01, xlab = "Peripheral Leukocytes",
          ylab = "Orbital Tissue", zlab = "Sinus Brushings", coord_legend = c(6, -5, 0))
>>>>>>> Stashed changes:vignettes/multiLocalFDR_vignettes.Rmd

Orange dots are pathways with md-fdr $< 0.01$.



JungiinChoi/multiLocalFDR documentation built on Aug. 15, 2024, 1:04 a.m.