R/utils_Analysis.R

Defines functions process_for_download calculate_pca_data calculate_var_table_data render_pca_scree_graph render_pca_biplot_graph render_pca_summary_graph render_cor_graph render_var_graph render_timepoint_graph render_dilutions_graph render_var_table render_samples_graph render_standards_cv_graph render_standards_graph render_data_table render_std_table process_data process_std

Documented in render_standards_cv_graph

#' Processes standards
#'
#' @description A utils function
#'
#' @return table with all stuff
#' @export
#'
#' @noRd
process_std <- function(data) {
  data %>%
    group_by(Sample, Cytokine) %>%
    drop_na(Value) %>%
    summarise(
      Average = mean(Value) %>% round(., digits = 2),
      CV = 100*sd(Value)/mean(Value) %>% round(., digits = 2)
    ) %>%
    ungroup()
}

#' Processes data
#'
#' @description A utils function
#'
#' @return table with all stuff
#' @export
#'
#' @noRd
process_data <- function(data) {
  data %>%
    group_by(Sample, Sample_day, Cytokine) %>%
    drop_na(Value) %>%
    mutate(
      Average = mean(Value) %>% round(., digits = 2),
      CV = 100*sd(Value)/mean(Value) %>% round(., digits = 2)
    ) %>%
    ungroup() %>%
    select(-Location, -Sample, -Sample_day)
}

#' Renders standards
#'
#' @description A utils function
#'
#' @param data the standards
#' @param height size of the scrollbox
#'
#' @return knitr table with all stuff
#' @export
#'
#' @noRd
render_std_table <- function(data, height = NULL) {
  data %>%
    arrange(Cytokine) %>%
    mutate(CV = ifelse( ((CV >= 25) | (CV == 0)), kableExtra::cell_spec(round(CV, 2), bold = TRUE, background = "yellow", color = "black"), round(CV, 2))) %>%
    kableExtra::kbl(digits = 2, format = "html", align = "c", escape = FALSE, caption = "Standards Table") %>%
    kableExtra::kable_styling(position = "center", bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE) %>%
    kableExtra::scroll_box(height = height)
}

#' Renders data
#'
#' @description A utils function
#'
#' @return knitr table with all stuff
#' @export
#'
#' @noRd
render_data_table <- function(data) {
  data %>%
    arrange(Name, Day, Cytokine, Dilution) %>%
    kableExtra::kbl(digits = 2, format = "html", align = "c", caption = "Data Table") %>%
    kableExtra::kable_styling(position = "center", bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE)
}


#' Renders Standards Plot
#'
#' @param data standards
#' @export
#'
#' @return standards plot
#' @export
#'
#' @noRd
render_standards_graph <- function(data) {
  ggplot(
    data,
    aes(x = Cytokine, y = Value, color = Sample)
  ) +
    geom_boxplot() +
    scale_color_viridis_d(name = "Standard:", end = .85) +
    theme_classic() +
    theme(
      axis.text.x = element_text(angle = 45, face = "bold", hjust = 1, vjust = 1),
      legend.position = "bottom"
    ) +
    ggtitle(label = "Summary of the Standards Used") +
    xlab("Cytokine") +
    ylab("Value")
}


#' Renders CV for standards tilemap
#'
#' @param data the standards data
#'
#' @return the graph of CV tiles
#' @export
#'
#' @importFrom shadowtext geom_shadowtext
render_standards_cv_graph <- function(data) {
  mutate(data, CV = round(CV, 1)) %>%
    ggplot(
      aes(x = Cytokine, y = forcats::fct_rev(Sample), fill = CV, label = CV)
    ) +
    geom_tile() +
    theme_classic() +
    scale_fill_viridis_c(name = "CV (%):", end = .8, limits = c(0, NA)) +
    ggtitle(label = "Summary of the Standards CV%") +
    theme(
      axis.text.x = element_text(angle = 45, face = "bold", hjust = 1, vjust = 1),
      axis.title.y = element_blank(),
      axis.title.x = element_blank(),
      legend.position = "bottom"
    ) +
    geom_shadowtext(color = "white", bg.colour = "gray50", size = 2.5) +
    ylab("Standard")
}

#' Renders Samples
#'
#' @param data standards
#'
#' @return
#' @export
#'
#' @noRd
render_samples_graph <- function(data) {
  ggplot(
    data,
    aes(x = Cytokine, y = Value, color = Sample_day)
  ) +
    geom_boxplot() +
    scale_color_viridis_d(name = "Sample:", end = .85) +
    theme_classic() +
    theme(
      axis.text.x = element_text(angle = 45, face = "bold", hjust = 1, vjust = 1),
      legend.position = "bottom"
    )
}



#' Renders variations
#'
#' @description A utils function
#'
#' @param data the data
#' @param height the height of the scrollbox
#'
#' @return knitr table with all stuff
#' @export
#'
#' @noRd
render_var_table <- function(data, height = NULL) {
  data %>%
    arrange(Name, Day, Cytokine, Dilution) %>%
    select(Name, Day, Cytokine, Dilution, Average = AVG, CV) %>%
    mutate(
      CV = ifelse(
        CV > 25,
        kableExtra::cell_spec(CV, format = "html", bold = TRUE, background = "yellow", color = "black"),
        ifelse(
          CV == 0,
          kableExtra::cell_spec(CV, format = "html", bold = FALSE, background = "lightred", color = "darkred"),
          as.character(CV)
        )
      )
    ) %>%
    kableExtra::kbl(digits = 2, format = "html", escape = FALSE, align = "c", caption = "Variations Table") %>%
    kableExtra::kable_styling(position = "center", bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE) %>%
    kableExtra::collapse_rows(1:4) %>%
    kableExtra::scroll_box(height = height)
}


#' Renders a nice dilutions graph that's totally not cluttered af
#'
#' @param data
#'
#' @return a graph
#' @export
#'
#' @noRd
render_dilutions_graph <- function(data) {
  ggplot(
    data,
    aes(
      x = Sample_day,
      y = Value,
      shape = Dilution,
      color = Cytokine
    )
  ) +
    geom_point(position = position_jitterdodge(dodge.width = .7, jitter.width = .2, jitter.height = 0)) +
    geom_boxplot(
      aes(
        x = Sample_day,
        y = Value,
        fill = Dilution
      ),
      size = .2,
      color = "black",
      alpha = 1/5,
      position = position_dodge2(width = 1),
      outlier.alpha = 0
    ) +
    scale_color_viridis_d(name = "Cytokine:", end = .9) +
    scale_fill_brewer(type = "seq", palette = "Dark2") +
    theme_classic() +
    theme(
      axis.text.x = element_text(angle = 45, face = "bold", hjust = 1, vjust = 1),
      legend.position = "bottom"
    )
}

#' Renders a nice timepoint graph that's totally not useless without >=3 timepoints
#'
#' @param data
#'
#' @return a graph
#' @export
#'
#' @noRd
render_timepoint_graph <- function(data) {
  data <- mutate(data, Day = as.numeric(as.character(Day)))

  i <- min(data$Day, na.rm = TRUE)
  j <- max(data$Day, na.rm = TRUE)

  if (is.finite(i) && is.finite(j)) {
    ggplot(
        data,
        aes(x = Day, y = Value, color = Cytokine, shape = Name)
    ) +
    geom_point(position = position_jitterdodge(dodge.width = .1, jitter.width = .1, jitter.height = 0), alpha = 3/4) +
    geom_smooth(aes(linetype = Name), method = function(formula, data, weights = weight) robustbase::lmrob(formula, data, weights = weight), se = FALSE) +
    scale_color_viridis_d(name = "Cytokine:", end = .9) +
    scale_shape_discrete(name = "Sample:") +
    scale_linetype_discrete(name = "Trendline:") +
    theme_classic() +
    scale_x_continuous(limits = c(i, j), breaks = i:j, labels = i:j) +
    theme(
      axis.text.x = element_text(angle = 45, face = "bold", hjust = 1, vjust = 1),
      legend.position = "bottom"
    )
  } else {
    NULL
  }
}

#' Renders variations graph
#'
#' @param data
#'
#' @return a graph
#' @export
#'
#' @noRd
render_var_graph <- function(data) {
  group_by(data, Name, Day, Dilution, Cytokine) %>%
    summarise(
      CV = (sd(Value, na.rm = T) / mean(Value, na.rm = T) * 100) %>% round(., digits = 2)
    ) %>%
    ungroup() %>%
    mutate(
      Tag = paste0(Name,"_D", Day, "_1:", Dilution),
      Dilution = as.numeric(as.character(Dilution))
    ) %>%
    arrange(Name, Day, Dilution) %>%
    ggplot(
      aes(
        y = Cytokine,
        x = forcats::fct_inorder(Tag),
        fill = CV
      )
    ) +
    xlab("Sample Identifier") +
    ylab("Cytokine") +
    geom_tile() +
    theme_classic() +
    scale_fill_viridis_c(name = "CV%", limits = c(0, NA), option = "B") +
    theme(
      axis.text.x = element_text(angle = 45, face = "bold", hjust = 1, vjust = 1),
      legend.position = "bottom"
    )
}

#' Renders Correlations graph
#'
#' @param data data
#' @param ... additional stuff to pass to cor() function
#'
#' @return correlations graph
#' @export
#'
#' @noRd
render_cor_graph <- function(data, ...) {
  mutate(data, Tag = paste0(Name,"_D", Day, "_1:", Dilution)) %>%
    select(Tag, Cytokine, Value) %>%
    pivot_wider(names_from = Cytokine, values_from = Value, values_fn = function(x) mean(x, na.rm = TRUE)) %>%
    select(-Tag) %>%
    cor(., ...) %>%
    ggcorrplot(
      title = "Correlation Matrix - ordered (hierarchical clustering)",
      type = "lower",
      lab = TRUE,
      lab_size = 2,
      digits = 2,
      ggtheme = "theme_classic",
      hc.order = TRUE,
      legend.title = "Correlation\nCoefficient"
    )
}


#' Renders PCA Individuals Graph
#'
#' @param data
#'
#' @return correlations graph
#' @export
#'
#' @noRd
render_pca_summary_graph <- function(data) {
  factoextra::fviz_pca_ind(data, col.ind = "cos2", repel = TRUE) +
    theme_classic() +
    labs(title = "PCA Plot - Samples", subtitle = "Scaled/Centered Data, using prcomp()", color = "Quality\n(cos2)")
}

#' Renders PCA Biplot Graph
#'
#' @param data
#'
#' @return correlations graph
#' @export
#'
#' @noRd
render_pca_biplot_graph <- function(data) {
  factoextra::fviz_pca_biplot(data, col.ind = "cos2", repel = TRUE, label = "var") +
    theme_classic() +
    labs(title = "PCA Biplot", subtitle = "Scaled/Centered Data, using prcomp()", color = "Quality\n(cos2)")
}

#' Renders PCA Scree plot
#'
#' @param data
#'
#' @return correlations graph
#' @export
#'
#' @noRd
render_pca_scree_graph <- function(data) {
  factoextra::fviz_eig(data, addlabels = TRUE) +
    theme_classic() +
    labs(title = "PCA Scree Plot", subtitle = "Scaled/Centered Data, using prcomp()")
}



#' Calculates variations in the data
#'
#' @param data sample data
#'
#' @return variations
#' @export
#'
#' @noRd
calculate_var_table_data <- function(data) {
  group_by(data, Name, Day, Dilution, Cytokine) %>%
    summarise(
      AVG = mean(Value, na.rm = T) %>% round(., digits = 2),
      CV  = (sd(Value, na.rm = T) / AVG * 100) %>% round(., digits = 2)
    )
}

#' PCA/SVD
#' @details before you jump on me for not using princomp because spectral decomposition might be better for investigating cov/cor btw variables than SVD which prcomp() does to look at the cov/cor btw individuals: we don't have enough data for spectral decomposition most of the time.
#'
#' @param data
#'
#' @return PCA object
#' @export
#'
#' @noRd
calculate_pca_data <- function(data) {

  pca_data <- data %>%
    mutate(Tag = paste0(Name,"_D", Day, "_1:", Dilution)) %>%
    select(Tag, Cytokine, Value) %>%
    pivot_wider(names_from = Cytokine, values_from = Value, values_fn = function(x) mean(x, na.rm = TRUE))

  pca_names <- unlist(pca_data[, 1])

  pca_named_data <- as.data.frame(pca_data[, -1])
  rownames(pca_named_data) <- pca_names

  prcomp(pca_named_data, scale = TRUE, center = TRUE)
}


#' Prepare data for download
#'
#' @param data list containing standards/samples data
#'
#' @return a list containing processed information based on the input
#'
#' @noRd
process_for_download <- function(data) {

  std_raw <- data$standards %>%
    set_names(
      c("Location", "Standard", "Cytokine", "Value", "Validity")
    )

  std_summary <- data$standards %>%
    group_by(Cytokine, Sample) %>%
    summarise(
      n   = n(),
      AVG = mean(Value, na.rm = TRUE),
      SD  = sd(Value, na.rm = TRUE),
      CV  = 100 * SD / AVG
    ) %>%
    mutate(
      AVG = ifelse(is.nan(AVG), 0, AVG),
      across(SD:CV, ~ ifelse(is.na(.x), 0, .x))
    ) %>%
    ungroup() %>%
    mutate(across(AVG:CV, ~ round(.x, 2))) %>%
    set_names(
      c("Cytokine", "Standard", "Data Points", "Average", "Std. Dev", "CV (%)")
    )

  data_raw <- data$data %>%
    set_names(
      c("Location", "Sample", "Cytokine", "Value", "Validity", "Dilution", "Day", "Parsed Name", "Parsed Tag")
    )

  data_summary <- data$data %>%
    group_by(Cytokine, Sample) %>%
    mutate(
      n   = n(),
      AVG = mean(Value, na.rm = TRUE),
      SD  = sd(Value, na.rm = TRUE),
      CV  = 100 * SD / AVG,
      AVG = ifelse(is.nan(AVG), 0, AVG),
      across(SD:CV, ~ ifelse(is.na(.x), 0, .x))
    ) %>%
    select(-Value, -Location, -Valid) %>%
    distinct_all() %>%
    ungroup() %>%
    mutate(across(AVG:CV, ~ round(.x, 2))) %>%
    set_names(
      c("Sample", "Cytokine", "Dilution", "Day", "Parsed Name", "Parsed Tag", "Data Points", "Average", "Std. Dev", "CV (%)")
    )

  return(
    list(
      std_raw = std_raw,
      std_summary = std_summary,
      data_raw = data_raw,
      data_summary= data_summary
    )
  )

}
cgtc/RebelAnalysis documentation built on Feb. 21, 2022, 5:28 p.m.