I. Introduction {.tabset}

Report Overview

This report compares the data of our case population (r phipmake::getparam(config, "proj_id"), n = r phipmake::getparam(config, "case_names") %>% strsplit(phipmake::getparam(config, "delimiter")) %>% unlist %>% length) against that of a control population (r phipmake::getparam(config, "ctrl_id"), n = r phipmake::getparam(config, "ctrl_names") %>% strsplit(phipmake::getparam(config, "delimiter")) %>% unlist %>% length) to generate a list of candidate antigens from the r phipmake::getparam(config, "library") library.

For each of these samples, we performed PhIP-Seq to measure antibody response against our libraries of phage displayed peptides.

Types of summary statistics used in this analysis

Navigating this Document

Config Documentation

These are the configuration parameters that were fed into drake::make(phipcc::define_plan_case_control).

knitr::kable(config) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"), fixed_thead = TRUE) %>% 
  kableExtra::column_spec(2, width = "10em")

Technical Documentation

sessionInfo()

II. Candidate Antigen Tables {.tabset}

[Top]

After standard post-processing of the assay sequencing data to generate enrichment scores, we perform Fisher's exact test to compare the scores of cases vs. controls for each peptide & protein. The p values from this Fisher's test are corrected using Benjamini & Hochberg method (BH).

We now take a subset of peptides/proteins for which:

Annotated

This table represents candidate antigens with known associations in our database. (e.g. autoantigens, surface proteins). Click to the next tab to see all hits.

border <- "1px solid gray"
scroll_height <- "1000px"
use_scrollbox <- FALSE

flag_table <- params$candidate_table_flagged
if(nrow(flag_table) > 17) use_scrollbox <- TRUE

if(nrow(flag_table) > 0){
  flag_table <- flag_table %>%
      knitr::kable(
        format = "html", escape = FALSE,
        caption = "Annotated Candidate Antigens", row.names = FALSE) %>%
      kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"), fixed_thead = TRUE) %>% 
      kableExtra::add_header_above(
        c(" " = 4, "Hit Freq" = 2,"Median Hit Score" = 2," " = 1)) %>%
       kableExtra::column_spec(1,border_left = border) %>%
       kableExtra::column_spec(ncol(flag_table), border_right = border) %>%
       kableExtra::column_spec(c(1:4,6,8), border_right = border) 

  if(use_scrollbox){
    flag_table <- flag_table %>% kableExtra::scroll_box(height = scroll_height)
  }

flag_table

}

All Hits

full_table <- params$candidate_table_full
if(nrow(full_table) > 17) use_scrollbox <- TRUE

if(nrow(full_table) > 0){
  full_table <- full_table %>%
      knitr::kable(
        format = "html", escape = FALSE,
        caption = "All Candidate Antigens", row.names = FALSE) %>%
      kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"), fixed_thead = TRUE) %>% 
      kableExtra::add_header_above(
        c(" " = 4, "Hit Freq" = 2,"Median Hit Score" = 2," " = 1)) %>%
       kableExtra::column_spec(1,border_left = border) %>%
       kableExtra::column_spec(ncol(full_table), border_right = border) %>%
       kableExtra::column_spec(c(1:4,6,8), border_right = border)

  if(use_scrollbox){
    full_table <- full_table %>% kableExtra::scroll_box(height = scroll_height)
  }

  full_table

}

AVARDA Hits

AVARDA_table <- params$AVARDA_candidate_table
if(!is.na(AVARDA_table[[1]][1])){
  use_scrollbox <- FALSE
  if(nrow(AVARDA_table) > 17) use_scrollbox <- TRUE

  if(nrow(AVARDA_table) > 0){
    AVARDA_table <- AVARDA_table %>%
      knitr::kable(
        format = "html", escape = FALSE,
        caption = "AVARDA Candidate Virus Antigens", row.names = FALSE) %>%
      kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"), fixed_thead = TRUE) %>%
      kableExtra::add_header_above(
        c(" " = 1, "Seropos Frequency" = 2, "Median Seropos Score" = 2, 
          " " = 1, "Seropos Breadth" = 2,
          #"Breadth Hit Frequency" = 2, "Median Hit Breadth" = 2,
          " " = 1)) %>%
      kableExtra::column_spec(1, border_left = border) %>%
      kableExtra::column_spec(ncol(AVARDA_table), border_right = border) %>% 
      kableExtra::column_spec(c(1, 3, 5, 6, 8, 9), border_right = border)

    if(use_scrollbox){
      AVARDA_table <- AVARDA_table %>% kableExtra::scroll_box(height = scroll_height)
    }

    AVARDA_table
  }


}

III. Exploratory Analysis {.tabset}

[Top]

We can continue to explore this data via some graphs. Note these plots are interactive. You can click on the legend to enable/disable different traces, and you can drag a box around regions of the graph to zoom in.

Hit Frequency vs. Hit Score in Cases

We define a "hit" as scoring above the 95th percentile of the control population (RCP>0.95). Among our case samples, we plot the hit frequency among cases against the median score of these hits.

plotly::ggplotly(graphs[[1]])

Median Hit Score, Cases vs. Controls

Based on our definition of hit, there are always ~5% of controls that also score as a hit for any given peptide or protein. Here we plot the median score of hits among cases against the median score of hits among controls.

plotly::ggplotly(graphs[[2]])

P-Value vs. Median Case Hit Score

As mentioned earlier, we perform a Fisher's exact test comparing the number of hits in cases vs. controls and perform a Benjamini-Hochberg correction of the p values from this test. Here we plot the negative log base 10 of these corrected p values against the median score of hits among cases.

plotly::ggplotly(graphs[[3]])

IV. Hit Clustering {.tabset}

[Top]

Clustergram

I produce a clustergram of these hits using Ward's method for agglomeration. Each row corresponds to a peptide or protein from our filtered hit table and each column corresponds to a case sample. Squares shaded in black indicate an RCP hit, meaning that the sample scored above the 95th percentile of the control population for that peptide or protein. The dendrograms from this clustering are displayed on the following tabs.

if(!is.na(params$cluster1[[1]][1])){
  clustergram_sorted <- params$cluster1[[3]]
  htext <- params$cluster1[[2]]
  ncolors <- 2
  my_palette <- colorRampPalette(c("white","black"))(n = ncolors)


 heatmaply::heatmaply(
    clustergram_sorted, custom_hovertext = htext,
    main = paste("Candidate Antigen Clustergram"),
    colors = my_palette,
    fontsize_row = 8,
    fontsize_col = 8,
    column_text_angle = 90,
    margins = c(65, 120),
    trace = "both",
    tracecol = "grey",
    Rowv = FALSE,
    Colv = FALSE,
    grid_gap = 1,
    hide_colorbar = TRUE)
}

Row (Peptide) Dendrogram

if(!is.na(params$cluster1[[1]][1])){
  row_dendrogram <- params$cluster1[[1]]$rowDendrogram
  library(plotly)

  g <- ggdendro::ggdendrogram(row_dendrogram, rotate = TRUE) + 
    ggplot2::theme(axis.text.y = ggplot2::element_text(size = 8))
  # plotly::ggplotly(g)
  g
}

Column (Sample) Dendrogram

if(!is.na(params$cluster1[[1]][1])){
  col_dendrogram <- params$cluster1[[1]]$colDendrogram

  g <- ggdendro::ggdendrogram(col_dendrogram, rotate = TRUE) + 
    ggplot2::theme(axis.text.y = ggplot2::element_text(size = 8))
  # plotly::ggplotly(g)
  g
}

V. Epitope Clustering

[Top]

epitopefindr Motifs {.tabset}

Now we call epitopefindr, an R pipeline designed to look for BLAST alignments among these filtered hit peptides. Peptides that share sequences that might be cross-reactively recognized by a single antibody are grouped, and these groups are reclustered along with any peptides that lacked significant alignments. We find evidence for the following motifs.

#print precomputed motifs
# msa_path <- "data/epitopefindr/intermediate_files/msa/"
msa_path <- params$motifs
msa_name <- paste0(msa_path, "msa.pdf")

if(file.exists(msa_name)){
  num_page <- pdftools::pdf_info(msa_name)$pages
  num_length <- nchar(num_page)

  for(i in 1:num_page){
    num_padded <- formatC(i, width = num_length, flag = "0")
    png_name <- paste0(msa_path, "msa-", num_padded, ".png")
    pdf_name <- paste0(msa_path, "msa-", num_padded, ".pdf")

    if(!file.exists(png_name)){
          pdftools::pdf_convert(pdf_name, format = "png", filenames = png_name,
                          dpi = 300,verbose = FALSE)

    }

            cat("  \n### MSA",  i, "  \n")
        for(j in 1:length(png_name)){
            cat("![](",png_name[j],")",sep="")
          # png_name[j] %>% (EBImage::readImage) %>% (EBImage::display)
        }
        cat("  \n  \n")


  }

}

Epitope-Collapsed Hit Clustering {.tabset}

Epitope Clustergram

We re-cluster, collapsing peptides for which epitopefindr detected possible cross-reactivity into single rows.

if(!is.na(params$cluster2[[1]][1])){
  clustergram_sorted <- params$cluster2[[3]]
  htext <- params$cluster2[[2]]
  ncolors <- 5
  my_palette <- colorRampPalette(c("white","black"))(n = ncolors)


   heatmaply::heatmaply(
      clustergram_sorted, custom_hovertext = htext,
      main = paste("Candidate Antigen Clustergram (Epitope-Collapsed)"),
      colors = my_palette,
      fontsize_row = 8,
      fontsize_col = 8,
      column_text_angle = 90,
      margins = c(65, 120),
      trace = "both",
      tracecol = "grey",
      Rowv = FALSE,
      Colv = FALSE,
      grid_gap = 1,
      hide_colorbar = TRUE)

}

Row (Peptide/Epitope) Clustergram

if(!is.na(params$cluster2[[1]][1])){
  row_dendrogram <- params$cluster2[[1]]$rowDendrogram

  g <- ggdendro::ggdendrogram(row_dendrogram, rotate = TRUE) + 
    ggplot2::theme(axis.text.y = ggplot2::element_text(size = 8)) 
  # plotly::ggplotly(g)
  g
}

Column (Sample) Clustergram

if(!is.na(params$cluster2[[1]][1])){
  col_dendrogram <- params$cluster2[[1]]$colDendrogram

  g <- ggdendro::ggdendrogram(col_dendrogram, rotate = TRUE) + 
    ggplot2::theme(axis.text.y = ggplot2::element_text(size = 8)) 
  # plotly::ggplotly(g)
  g
}


brandonsie/phipcc documentation built on June 2, 2020, 6:19 a.m.