knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 7, fig.align = "center", echo = FALSE)
library(JLmisc) pctEmpty = function(x) scales::percent((sum(is.na(x))/sum(length(x)))) m = pctEmpty(dystonia$`Biomarker Level`) n = sum(!is.na(dystonia$`Biomarker Level`)) nMiss = sum(is.na(dystonia$`Biomarker Level`))
All data in these analyses were taken from the dystonia
dataset bundled with the JLmisc R package.
This dataset was assembled from .csv files provided by Laura Scorr, M.D.
Analyses considered measurements of r length(unique(dystonia$Assay))
unique biomarkers in r length(unique(dystonia$Sample))
individuals.
Biomarkers were measured in duplicate.
The initial sample available for analysis included r n
unique measurements.
In addition, N=r nMiss
additional measurements were missing, and were assummed to be missing at random for purposes of analysis.
Biomarker data were inspected visually to confirm the absence of outlier measurements.
Duplicate observations were then averaged so that the level of each biomarker for each participant was represented by a single measurement.^[
In this dataset, r m
of the individual readings were missing. Biomarkers for which only one observation of a given patient was available were flagged to enable post-hoc sensitivity analyses if appropriate.
In the dataset, the data are saved two ways.
In both cases, the average of the two replicates is used if possible.
If only one replicate is available, `Biomarker Level`
takes that value; `Biomarker Level (Duplication Required)`
is empty.
]
Descriptive statistics for each biomarker were calculated for each biomarker stratified in two ways: 1) stratification by Cervical Dystonia (CD) vs. Control, and, 2) stratification by Cervical Dystonia + Hypothyroidism (CD/Hypo) vs. CD vs. Control.
Variation in average expression of each biomarker with the presence of CD were assessed with linear models.^[function lm
in R software]
Main models included CD as a dichotomous variable.
No formal adjustment for multiple comparisons was performed in primary analyses.
P values adjusted for False Discovery Rate are presented for reference.
Addtional linear models were calculated incorporating an interaction term coding for the additive effect of hypothyroid disorder in addition to that of CD.
library(tidyverse) library(modelr) library(tidyverse) library(broom) `%contains%` = function(s, pattern) grepl(pattern, s) `%omits%` = function(s, pattern) !grepl(pattern, s) dys = left_join( dystonia %>% group_by(CD, Group, Assay, Sample) %>% summarize(`Biomarker Level` = mean(`Biomarker Level`, na.rm = TRUE)) %>% ungroup(), dystonia %>% group_by(CD, Group, Assay, Sample) %>% summarize(`Biomarker Level (Duplication Required)` = mean(`Biomarker Level`, na.rm = FALSE)) %>% ungroup() ) %>% mutate(`Assay Name` = make.names(Assay)) dysWide = dys %>% select(CD, Group, Sample, `Assay Name`, `Biomarker Level`) %>% group_by(CD, Group, Sample) %>% pivot_wider(names_from = `Assay Name`, values_from = `Biomarker Level`) biomarkerVariableNames = names(dysWide)[-1:-3] # this model takes a dataset and returns the p value of a t-test comparing any CD to control. cdVsControlModel = function(d = dys %>% filter(biomarker == "IL.10")){ # add call to relevel to make the reference level first mod = lm(`Biomarker Level` ~ CD, data = d %>% ungroup() %>% mutate(CD = relevel(CD, "Control"))) %>% tidy() mod %>% pull(p.value) %>% tail(1) } # this model takes a dataset and returns two p values: any CD to control, and an interaction term for presence of hypothyroid. interactionModel = function(d = dys %>% filter(biomarker == "IL.10")){ # create variables for any CD and an interaction term for hypothyroid. d = d %>% ungroup() %>% mutate(CD = Group %contains% "CD") %>% mutate(Hypo = Group %contains% "Hypo") %>% mutate(Group = relevel(Group, "Control")) # threeGroupModel = lm(`Biomarker Level` ~ Group, data = d) %>% tidy() mod = lm(`Biomarker Level` ~ CD + Hypo, data = d) %>% tidy() mod %>% pull(p.value) %>% tail(2) } # create a nested data structure byBiomarker = dys %>% group_by(`Assay Name`) %>% nest() # apply the linear model to each biomarker (this is a two-sample t-test) byBiomarker = byBiomarker %>% mutate(cdVsControlPValue = purrr::map(data, cdVsControlModel)) %>% select(`Assay Name`, cdVsControlPValue, everything()) %>% mutate(cdVsControlPValue = unlist(cdVsControlPValue)) # Adjust the p values with FDR byBiomarker$cdVsControlPValueFDR = p.adjust(byBiomarker$cdVsControlPValue, method = "fdr") # rename the p values to be more descriptive byBiomarker = byBiomarker %>% rename(pTTest = cdVsControlPValue) %>% rename(pTTestFDR = cdVsControlPValueFDR) # apply the interaction model to each biomarker (this is a two-sample t-test) byBiomarker = byBiomarker %>% mutate(interactionPValue = purrr::map(data, interactionModel)) %>% mutate(pLinearModelCD = interactionPValue[[1]][1]) %>% mutate(pLinearModelHypo = interactionPValue[[1]][2]) # sort and rename variables results = byBiomarker %>% select(`Assay Name`, pTTest, pTTestFDR, pLinearModelCD, pLinearModelHypo) %>% arrange(pTTest) %>% rename( `CD vs. Control` = pTTest, `CD vs. Control, FDR` = pTTestFDR, `CD vs. Control, interaction model` = pLinearModelCD, `CD/Hypothyroid vs. CD, interaction model` = pLinearModelHypo )
# calculate which group is higher for each biomarker groupLevels = dys %>% group_by(`Assay Name`, CD) %>% summarize(maxVal = max(`Biomarker Level`, na.rm = TRUE)) %>% ungroup() %>% group_by(`Assay Name`) %>% summarize(greaterVal = max(maxVal), lesserGroup = CD[which.min(maxVal)]) %>% mutate(CD = factor(lesserGroup)) %>% rename(`Biomarker Level` = greaterVal) %>% full_join(results %>% mutate(pStr = paste0( "pt=",round(`CD vs. Control`,3),"\n", "pCD=",round(`CD vs. Control, interaction model`,3),"\n", "pHyp=",round(`CD/Hypothyroid vs. CD, interaction model`,3)) ) %>% select(`Assay Name`, pStr) %>% rename(label = pStr)) twoGroupPlot = function(biomarkersToPlot = head(results$`Assay Name`, 6)){ dataset = dys %>% filter(`Assay Name` %in% biomarkersToPlot) %>% mutate(`Assay Name` = ordered(`Assay Name`, levels = biomarkersToPlot)) labels = groupLevels %>% filter(`Assay Name` %in% biomarkersToPlot) %>% mutate(`Assay Name` = ordered(`Assay Name`, levels = biomarkersToPlot)) # %>% mutate(biomarker = factor(biomarker, levels = biomarkersToPlot, ordered = TRUE)) ggplot( dataset, aes(x = CD, y = `Biomarker Level`, color = CD)) + geom_boxplot(outlier.shape = NA, na.rm = TRUE) + geom_jitter(width = 0.2, na.rm = TRUE, mapping = aes(shape = Group)) + facet_wrap(~`Assay Name`, scales = "free") + theme_minimal() + theme(axis.text.x = element_blank()) + geom_text(aes(label = label), data = labels, size = 2, show.legend = FALSE, color = "black", hjust = "right", vjust = "top", position = position_nudge(x = +0.4)) } twoGroupPlot()
library(kableExtra) results %>% mutate_if(is.numeric, function(x) round(x,3)) %>% kable() %>% kable_styling() %>% add_header_above(c(" " = 1, "P Values, Two-Level Model" = 2, "P Values, Interaction Model" = 2))
suppressMessages(library(table1)) table1Formula = paste(paste("~", paste(biomarkerVariableNames, collapse = " + ")),"| factor(CD)") %>% as.formula() table1(table1Formula, data = dysWide, overall = NULL)
suppressMessages(library(table1)) table1Formula = paste(paste("~", paste(biomarkerVariableNames, collapse = " + ")),"| factor(Group)") %>% as.formula() table1(table1Formula, data = dysWide, overall = NULL)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.