View source: R/jj_get_pval_df.R
jj_get_pval_df | R Documentation |
get a data.frame of pvalues which can be passed to ggpubr to include in a barplot/boxplot... Each donor can have multiple samples. Each donor should uniquely belong to one group
jj_get_pval_df(
df,
sample_column,
donor_column,
group_column,
score_column,
comparisons_paired,
comparisons_unpaired,
p_adj_method = "BH",
p_adj_round = 3,
return_per_sample_df = FALSE
)
sample_column |
column containing annotation for the sample from which the observations are derived. number of columns |
donor_column |
column specifying the donor from which the observations are derived. If all samples are unpaired, it is the same as |
group_column |
column specifying the groups that should be used for the statistical comparison |
score_column |
column containing the values that are used for the comparison between groups (wilcox.test) |
comparisons_paired |
list of vectors of length 2 specifying the desired paired comparisons between levels in the |
comparisons_unpaired |
list of vectors of length 2 specifying the desired unpaired comparisons between levels in the |
Returns a tibble of pvalues
#make some example data: 10 samples, 4 healthy donors and 3 patients. From each patients, values are available from
#two timepoints: diagnosis and relapse. We want to quantify the difference in a `Disease_score` between Healthy, Diagnosis and
#relapse state on the sample level (not on single-cell level). Both paired (Diagnosis - Relapse) and unpaired
#(Healthy-Diagnosis, Healthy-Relapse) comparisons need to be performed.
data_df = data.frame(Sample = rep(1:10, each = 10),
Donor = c(rep(c('Healthy1','Healthy2','Healthy3','Healthy4'), each=10), rep(c('Disease1','Disease2','Disease3'), each=20)),
Group = c(rep('Healthy', 40), rep(rep(c('Diagnosis','Relapse'), each=10), 3)),
Disease_score = c(rnorm(40, mean = 3, sd = 0.5), rnorm(20, mean = 4, sd = 0.1),
rnorm(10, mean = 6, sd = 0.1), rnorm(10, mean = 4.5, sd = 0.1),
rnorm(10, mean = 4.5, sd = 0.1), rnorm(10, mean = 3, sd = 0.1)))
#plot the data per group as violins
gg = jj_plot_numeric_by_group(data_df, feature_column = 'Disease_score', group_column = 'Group')
gg
#get mean score per sample
sample_df = jj_get_pval_df(data_df,
sample_column = 'Sample',
donor_column = 'Donor',
group_column = 'Group',
score_column = 'Disease_score',
return_per_sample_df = T)
#i.e. comparisons which are calculated in the function 'get_pval_df'
wilcox.test(x=sample_df$score[sample_df$group == 'Healthy'],
y=sample_df$score[sample_df$group == 'Diagnosis'],
paired = F)
wilcox.test(x=sample_df$score[sample_df$group == 'Relapse'],
y=sample_df$score[sample_df$group == 'Diagnosis'],
paired = T)
#get the full pvalue tibble
pval_df = jj_get_pval_df(data_df,
sample_column = 'Sample',
donor_column = 'Donor',
group_column = 'Group',
score_column = 'Disease_score',
comparisons_paired = list(c('Diagnosis','Relapse')),
comparisons_unpaired = list(c('Healthy', 'Diagnosis'), c('Healthy', 'Relapse')),
)
pval_df
#add suitable y positions for p value visualization in the violin plot
pval_df$ypos = c(7, 6.5, 6.75)
#plot everything together using ggpubr package to show pvalues
gg + geom_point(data = sample_df, mapping = aes(x = group, y = score, colour=donor), size=3) +
scale_colour_manual(values=jj_get_jj_colours(sample_df$donor)) +
ggpubr::stat_pvalue_manual(data = pval_df, label='p_val_adj',
tip.length = 0.01,
y.position = 'ypos')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.