Introduction_to_pathwayPCA.R
In pathwayPCA: Integrative Pathway Analysis with Modern PCA Methodology and Gene Selection

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE,
                      cache = FALSE,
                      comment = "#>")

## ----packLoad, message=FALSE--------------------------------------------------
library(tidyverse)
library(pathwayPCA)

## ----loadAssay----------------------------------------------------------------
gitHubPath_char <- "https://raw.githubusercontent.com/lizhongliu1996/pathwayPCAdata/master/"
ovSurv_df <- readRDS(
  url(paste0(gitHubPath_char, "ovarian_PNNL_survival.RDS"))
)

## ----printAssay, eval=TRUE----------------------------------------------------
ovSurv_df [1:5, 1:5]

## ----rWikiPathways, eval=FALSE------------------------------------------------
#  library(rWikiPathways)
#  # library(XML) # necessary if you encounter an error with readHTMLTable
#  downloadPathwayArchive(
#    organism = "Homo sapiens", format = "gmt"
#  )

## ----printWPgmtpath, echo=FALSE-----------------------------------------------
print("wikipathways-20190110-gmt-Homo_sapiens.gmt")

## ----ourWikiPWC---------------------------------------------------------------
dataDir_path <- system.file(
  "extdata", package = "pathwayPCA", mustWork = TRUE
)
wikipathways_PC <- read_gmt(
  paste0(dataDir_path, "/wikipathways_human_symbol.gmt"),
  description = TRUE
)

## ----createOmics, comment=""--------------------------------------------------
ov_OmicsSurv <- CreateOmics(
  # protein expression data
  assayData_df = ovSurv_df[, -(2:3)], 
  # pathway collection
  pathwayCollection_ls = wikipathways_PC,
  # survival phenotypes
  response = ovSurv_df[, 1:3],
  # phenotype is survival data
  respType = "survival",
  # retain pathways with > 5 proteins
  minPathSize = 5  
)

## ----inspectOmicsObj----------------------------------------------------------
ov_OmicsSurv

## ----aespcaCall, comment=""---------------------------------------------------
ovarian_aespcOut <- AESPCA_pVals(
  # The Omics data container
  object = ov_OmicsSurv,
  # One principal component per pathway
  numPCs = 1,
  # Use parallel computing with 2 cores
  parallel = TRUE,
  numCores = 2,
  # # Use serial computing
  # parallel = FALSE,
  # Estimate the p-values parametrically
  numReps = 0,
  # Control FDR via Benjamini-Hochberg
  adjustment = "BH"
)

## ----inspectAESPCAout---------------------------------------------------------
names(ovarian_aespcOut)

## ----headPVals----------------------------------------------------------------
getPathpVals(ovarian_aespcOut, numPaths = 10)

## ----subset_top_genes---------------------------------------------------------
ovOutGather_df <- getPathpVals(ovarian_aespcOut, score = TRUE)

## ----pathway_bar_plots, fig.height = 5, fig.width = 8-------------------------
ggplot(ovOutGather_df) +
  # set overall appearance of the plot
  theme_bw() +
  # Define the dependent and independent variables
  aes(x = reorder(terms, score), y = score) +
  # From the defined variables, create a vertical bar chart
  geom_col(position = "dodge", fill = "#66FFFF", width = 0.7) +
  # Add pathway labels
  geom_text(
    aes(
      x = reorder(terms, score),
      label = reorder(description, score),
      y = 0.1
    ),
    color = "black",
    size = 2, 
    hjust = 0
  ) +
  # Set main and axis titles
  ggtitle("AES-PCA Significant Pathways: Ovarian Cancer") +
  xlab("Pathways") +
  ylab("Negative LN (p-Value)") +
  # Add a line showing the alpha = 0.01 level
  geom_hline(yintercept = -log(0.01), size = 1, color = "blue") +
  # Flip the x and y axes
  coord_flip()

## ----getWP195pcl--------------------------------------------------------------
wp195PCLs_ls <- getPathPCLs(ovarian_aespcOut, "WP195")
wp195PCLs_ls

## ----WP195loads---------------------------------------------------------------
wp195Loadings_df <- 
  wp195PCLs_ls$Loadings %>% 
  filter(PC1 != 0)
wp195Loadings_df

## ----loadDirections-----------------------------------------------------------
wp195Loadings_df <- 
  wp195Loadings_df %>% 
  # Sort Loading from Strongest to Weakest
  arrange(desc(abs(PC1))) %>% 
  # Order the Genes by Loading Strength
  mutate(featureID = factor(featureID, levels = featureID)) %>% 
  # Add Directional Indicator (for Colour)
  mutate(Direction = factor(ifelse(PC1 > 0, "Up", "Down"))) 

## ----graphLoads, fig.height = 3.6, fig.width = 6.5----------------------------
ggplot(data = wp195Loadings_df) +
  # Set overall appearance
  theme_bw() +
  # Define the dependent and independent variables
  aes(x = featureID, y = PC1, fill = Direction) +
  # From the defined variables, create a vertical bar chart
  geom_col(width = 0.5, fill = "#005030", color = "#f47321") +
  # Set main and axis titles
  labs(
    title = "Gene Loadings on IL-1 Signaling Pathway",
    x = "Protein",
    y = "Loadings of PC1"
  ) +
  # Remove the legend
  guides(fill = FALSE)

## ----wp195PCs, fig.height = 3.6, fig.width = 6.5------------------------------
ggplot(data = wp195PCLs_ls$PCs) +
  # Set overall appearance
  theme_classic() + 
  # Define the independent variable
  aes(x = V1) +
  # Add the histogram layer
  geom_histogram(bins = 10, color = "#005030", fill = "#f47321") +
  # Set main and axis titles
  labs(
    title = "Distribution of Sample-specific Estimate of Pathway Activities",
    subtitle = paste0(wp195PCLs_ls$pathway, ": ", wp195PCLs_ls$description),
    x = "PC1 Value for Each Sample",
    y = "Count"
  )

## ----subsetWP195data----------------------------------------------------------
wp195Data_df <- SubsetPathwayData(ov_OmicsSurv, "WP195")
wp195Data_df

## ----gatherWP195--------------------------------------------------------------
wp195gather_df <- wp195Data_df %>% 
  arrange(EventTime) %>% 
  select(-EventTime, -EventObs) %>% 
  gather(protein, value, -sampleID)

## ----heatmapWP195, fig.width=8, fig.asp=0.62----------------------------------
ggplot(wp195gather_df, aes(x = protein, y = sampleID)) +
  geom_tile(aes(fill = value)) +
  scale_fill_gradient2(low = "red", mid = "black", high = "green") +
  labs(x = "Proteins", y = "Subjects", fill = "Protein level") +
  theme(axis.text.x  = element_text(angle = 90)) +
  coord_flip()

## ----dataSubsetMod------------------------------------------------------------
library(survival)
NFKB1_df <- 
  wp195Data_df %>% 
  select(EventTime, EventObs, NFKB1)

wp195_mod <- coxph(
  Surv(EventTime, EventObs) ~ NFKB1,
  data = NFKB1_df
)

summary(wp195_mod)

## ----NFKB1Surv----------------------------------------------------------------
# Add the direction
NFKB1_df <- 
  NFKB1_df %>% 
  # Group subjects by gene expression
  mutate(NFKB1_Expr = ifelse(NFKB1 > median(NFKB1), "High", "Low")) %>% 
  # Re-code time to years
  mutate(EventTime = EventTime / 365.25) %>% 
  # Ignore any events past 10 years
  filter(EventTime <= 10) 

# Fit the survival model
NFKB1_fit <- survfit(
  Surv(EventTime, EventObs) ~ NFKB1_Expr,
  data = NFKB1_df
)

## ----NFKB1SurvPlot, message=FALSE, fig.height = 4, fig.width = 6--------------
library(survminer)

ggsurvplot(
  NFKB1_fit,
  conf.int = FALSE, pval = TRUE,
  xlab = "Time in Years",
  palette = c("#f47321", "#005030"),
  xlim = c(0, 10)
)

## ----CNVsetup-----------------------------------------------------------------
copyNumberClean_df <- readRDS(
  url(paste0(gitHubPath_char, "OV_surv_x_CNV2.RDS"))
)

## ----CNVomics, eval=FALSE-----------------------------------------------------
#  ovCNV_Surv <- CreateOmics(
#    assayData_df = copyNumberClean_df[, -(2:3)],
#    pathwayCollection_ls = wikipathways_PC,
#    response = copyNumberClean_df[, 1:3],
#    respType = "survival",
#    minPathSize = 5
#  )

## ----CNV_aespca, eval=FALSE---------------------------------------------------
#  ovCNV_aespcOut <- AESPCA_pVals(
#    object = ovCNV_Surv,
#    numPCs = 1,
#    parallel = TRUE,
#    numCores = 20,
#    numReps = 0,
#    adjustment = "BH"
#  )

## ----CNV_aespca_read, echo=FALSE----------------------------------------------
ovCNV_aespcOut <- readRDS(
  url(paste0(gitHubPath_char, "ovarian_copyNum_aespcOut.RDS"))
)

## ----multiOmicsPvals----------------------------------------------------------
# Copy Number
CNVpvals_df <- 
  getPathpVals(ovCNV_aespcOut, alpha = 0.05) %>% 
  mutate(rawp_CNV = rawp) %>% 
  select(description, rawp_CNV)

# Proteomics
PROTpvals_df <- 
  getPathpVals(ovarian_aespcOut, alpha = 0.05) %>% 
  mutate(rawp_PROT = rawp) %>% 
  select(description, rawp_PROT)


# Intersection
SigBoth_df <- inner_join(PROTpvals_df, CNVpvals_df, by = "description")
# WnT Signaling Pathway is listed as WP363 and WP428

## ----intersectPaths-----------------------------------------------------------
SigBoth_df

## ----wp195--------------------------------------------------------------------
# Copy Number Loadings
CNVwp195_ls <- getPathPCLs(ovCNV_aespcOut, "WP195")
CNV195load_df <- 
  CNVwp195_ls$Loadings %>% 
  filter(abs(PC1) > 0) %>% 
  rename(PC1_CNV = PC1)

# Protein Loadings
PROTwp195_ls <- getPathPCLs(ovarian_aespcOut, "WP195")
PROT195load_df <- 
  PROTwp195_ls$Loadings %>% 
  filter(abs(PC1) > 0) %>% 
  rename(PC1_PROT = PC1)

# Intersection
inner_join(CNV195load_df, PROT195load_df)

## ----mergeGene----------------------------------------------------------------
NFKB1data_df <- inner_join(
  copyNumberClean_df %>% 
    select(Sample, NFKB1) %>% 
    rename(CNV_NFKB1 = NFKB1),
  ovSurv_df %>% 
    select(Sample, NFKB1) %>% 
    rename(PROT_NFKB1 = NFKB1)
)

## ----NFKB1cor, comment=""-----------------------------------------------------
NFKB1_cor <- cor.test(NFKB1data_df$CNV_NFKB1, NFKB1data_df$PROT_NFKB1)
NFKB1_cor

## ----NFKB1plot, fig.height = 4, fig.width = 4---------------------------------
ggplot(data = NFKB1data_df) +
  # Set overall appearance
  theme_bw() +
  # Define the dependent and independent variables
  aes(x = CNV_NFKB1, y = PROT_NFKB1) +
  # Add the scatterplot
  geom_point(size = 2) +
  # Add a trendline
  geom_smooth(method = lm, se = FALSE, size = 1) +
  # Set main and axis titles
  labs(
    title = "NFKB1 Expressions in Multi-Omics Data",
    x = "Copy Number Variation (Centred and Scaled)",
    y = "Protein Expression (Centred and Scaled)"
  ) +
  # Include the correlation on the graph
  annotate(
    geom = "text", x = 0.5, y = -0.6,
    label = paste("rho = ", round(NFKB1_cor$estimate, 3))
  ) +
  annotate(
    geom = "text", x = 0.5, y = -0.7,
    label = "p-val < 10^-5"
  )

## ----kidneyData---------------------------------------------------------------
kidney_df <- readRDS(
  url(paste0(gitHubPath_char, "KIRP_Surv_RNAseq_inner.RDS"))
)

## ----kidneyOmics--------------------------------------------------------------
# Create Omics Container
kidney_Omics <- CreateOmics(
  assayData_df = kidney_df[, -(2:4)],
  pathwayCollection_ls = wikipathways_PC,
  response = kidney_df[, 1:3],
  respType = "surv",
  minPathSize = 5
)

## ----kidneyAESPCA-------------------------------------------------------------
# AESPCA
kidney_aespcOut <- AESPCA_pVals(
  object = kidney_Omics,
  numPCs = 1,
  parallel = TRUE,
  numCores = 2,
  numReps = 0,
  adjustment = "BH"
)

## ----sexInteractFun-----------------------------------------------------------
TestIntxn <- function(pathway, pcaOut, resp_df){
  
  # For the given pathway, extract the PCs and loadings from the pcaOut list
  PCL_ls <- getPathPCLs(pcaOut, pathway)
  # Select and rename the PC
  PC_df <- PCL_ls$PCs %>% select(PC1 = V1)
  # Bind this PC to the phenotype data
  data_df <- bind_cols(resp_df, PC_df)
  
  # Construct a survival model with sex interaction
  sex_mod <- coxph(
    Surv(time, status) ~ PC1 + male + PC1 * male, data = data_df
  )
  # Extract the model fit statistics for the interaction
  modStats_mat <- t(
    coef(summary(sex_mod))["PC1:maleTRUE", ]
  )
 
  # Return a data frame of the pathway and model statistics
  list(
    statistics = data.frame(
      terms = pathway,
      description = PCL_ls$description,
      modStats_mat,
      stringsAsFactors = FALSE
    ),
    model = sex_mod
  )

}

## ----testNewFun---------------------------------------------------------------
TestIntxn("WP195", kidney_aespcOut, kidney_df[, 2:4])$model

## ----applyTestSexFun----------------------------------------------------------
paths_char <- kidney_aespcOut$pVals_df$terms
interactions_ls <- lapply(
    paths_char,
    FUN = TestIntxn,
    pcaOut = kidney_aespcOut,
    resp_df = kidney_df[, 2:4]
)
names(interactions_ls) <- paths_char

interactions_df <- 
  # Take list of interactions
  interactions_ls %>% 
  # select the first element (the data frame of model stats)
  lapply(`[[`, 1) %>% 
  # stack these data frames
  bind_rows() %>% 
  as.tibble() %>% 
  # sort the rows by significance
  arrange(`Pr...z..`)

interactions_df %>% 
  filter(`Pr...z..` < 0.05)

## ----modelRes-----------------------------------------------------------------
summary(interactions_ls[["WP1559"]]$model)

## ----kidneySurvPlotData-------------------------------------------------------
# Bind the Pheno Data to WP1559's First PC
kidneyWP1559_df <- bind_cols(
  kidney_df[, 2:4],
  getPathPCLs(kidney_aespcOut, "WP1559")$PCs %>% select(PC1 = V1)
)

# Add Grouping Feature
kidneySurvWP1559grouped_df <- 
  kidneyWP1559_df %>% 
  # add strength indicator
  mutate(direction = ifelse(PC1 > median(PC1), "High", "Low")) %>% 
  # group by interaction of sex and strength on PC
  mutate(group = paste0(direction, ifelse(male, "_M", "_F"))) %>% 
  # recode time in years
  mutate(time = time / 365.25) %>% 
  # remove summarized columns
  select(-male, -PC1, -direction)

## ----plotKidneyGroupedSurv, message=FALSE, fig.height = 5, fig.width = 7------
# Fit the survival model
fit <- survfit(
  Surv(time, status) ~ group,
  data = kidneySurvWP1559grouped_df
)

ggsurvplot(
  fit, conf.int = FALSE,
  xlab = "Time in Years",
  xlim = c(0, 10),
  ylim = c(0.4, 1)
)

## ----sessionDetails-----------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
pathwayPCA documentation built on Dec. 15, 2020, 6:14 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
pathwayPCA
Integrative Pathway Analysis with Modern PCA Methodology and Gene Selection

inst/doc/Introduction_to_pathwayPCA.R
In pathwayPCA: Integrative Pathway Analysis with Modern PCA Methodology and Gene Selection

Try the pathwayPCA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pathwayPCA Integrative Pathway Analysis with Modern PCA Methodology and Gene Selection

inst/doc/Introduction_to_pathwayPCA.R In pathwayPCA: Integrative Pathway Analysis with Modern PCA Methodology and Gene Selection

Try the pathwayPCA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pathwayPCA
Integrative Pathway Analysis with Modern PCA Methodology and Gene Selection

inst/doc/Introduction_to_pathwayPCA.R
In pathwayPCA: Integrative Pathway Analysis with Modern PCA Methodology and Gene Selection