Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
message = FALSE,
warning = FALSE,
cache = FALSE
)
## ----eval=FALSE---------------------------------------------------------------
# remotes::install_github("Donadelnal/RQdeltaCT")
# library(RQdeltaCT)
## ----echo=FALSE, out.width="650px", dpi = 600, fig.align="center", warning=FALSE, message=FALSE, cache=FALSE----
knitr::include_graphics("figure1ok.png")
## ----message=FALSE, cache=FALSE-----------------------------------------------
# Set path to file:
path <- system.file("extdata",
"data_Ct_long.txt",
package = "RQdeltaCT")
# Import file using path; remember to specify proper separator, decimal character, and number of necessary columns:
library(RQdeltaCT)
library(tidyverse)
data.Ct <- read_Ct_long(path = path,
sep = "\t",
dec = ".",
skip = 0,
add.column.Flag = TRUE,
column.Sample = 1,
column.Gene = 2,
column.Ct = 5,
column.Group = 9,
column.Flag = 4)
## ----cache=FALSE--------------------------------------------------------------
str(data.Ct)
## ----cache=FALSE--------------------------------------------------------------
library(tidyverse)
data.Ct <- mutate(data.Ct,
Flag = ifelse(Flag < 1, "Undetermined", "OK"))
str(data.Ct)
## ----cache=FALSE--------------------------------------------------------------
# Set paths to required files:
path.Ct.file <- system.file("extdata",
"data_Ct_wide.txt",
package = "RQdeltaCT")
path.design.file <- system.file("extdata",
"data_design.txt",
package = "RQdeltaCT")
# Import files:
library(tidyverse)
data.Ct <- read_Ct_wide(path.Ct.file = path.Ct.file,
path.design.file = path.design.file,
sep ="\t",
dec = ".")
# Look at the structure:
str(data.Ct)
## ----cache=FALSE--------------------------------------------------------------
# Import file, be aware of specifying parameters that fit the imported data:
data.Ct.wide <- read.csv(file = "data/data.Ct.wide.vign.txt",
header = TRUE,
sep = ",")
str(data.Ct.wide)
# The imported table is now transformed into a long-format structure.
library(tidyverse)
data.Ct <- data.Ct.wide %>%
select(-X) %>% # The "X" column is unnecessary and is removed.
mutate(across(everything(), as.character)) %>% # All variables also are converted to a character to unify the class of variables.
pivot_longer(cols = -c(Group, Sample), names_to = "Gene", values_to = "Ct")
str(data.Ct)
## ----cache=FALSE--------------------------------------------------------------
data(data.Ct)
str(data.Ct)
data(data.Ct.pairwise)
str(data.Ct.pairwise)
## ----fig.dim=c(7.1,7), cache=FALSE--------------------------------------------
sample.Ct.control <- control_Ct_barplot_sample(data = data.Ct,
flag.Ct = "Undetermined",
maxCt = 35,
flag = c("Undetermined"),
axis.title.size = 9,
axis.text.size = 7,
plot.title.size = 9,
legend.title.size = 9,
legend.text.size = 9)
## ----fig.dim=c(7.1,5.5), cache=FALSE------------------------------------------
gene.Ct.control <- control_Ct_barplot_gene(data = data.Ct,
flag.Ct = "Undetermined",
maxCt = 35,
flag = c("Undetermined"),
axis.title.size = 9,
axis.text.size = 9,
plot.title.size = 9,
legend.title.size = 9,
legend.text.size = 9)
## ----cache=FALSE--------------------------------------------------------------
head(sample.Ct.control[[2]])
## ----cache=FALSE--------------------------------------------------------------
head(gene.Ct.control[[2]])
## ----fig.dim=c(7.1,8), cache=FALSE--------------------------------------------
library(tidyverse)
library(pheatmap)
data(data.Ct)
# Vector of colors to fill the heatmap can be specified to fit the user's needs:
colors <- c("#4575B4","#FFFFBF","#C32B23")
control_heatmap(data.Ct,
sel.Gene = "all",
colors = colors,
show.colnames = TRUE,
show.rownames = TRUE,
fontsize = 9,
fontsize.row = 9,
angle.col = 45)
## ----cache=FALSE--------------------------------------------------------------
# Finding samples with more than half of the unreliable Ct values.
low.quality.samples <- filter(sample.Ct.control[[2]], Not.reliable.fraction > 0.5)$Sample
low.quality.samples <- as.vector(low.quality.samples)
low.quality.samples
## -----------------------------------------------------------------------------
# Finding genes with more than half of the unreliable Ct values in given group.
low.quality.genes <- filter(gene.Ct.control[[2]], Not.reliable.fraction > 0.5)$Gene
low.quality.genes <- unique(as.vector(low.quality.genes))
low.quality.genes
## ----cache=FALSE--------------------------------------------------------------
# Objects returned from the `low_quality_samples()` and `low_quality_genes()`functions can be used directly:
data.CtF <- filter_Ct(data = data.Ct,
flag.Ct = "Undetermined",
maxCt = 35,
flag = c("Undetermined"),
remove.Gene = low.quality.genes,
remove.Sample = low.quality.samples)
# Check dimensions of data before and after filtering:
dim(data.Ct)
dim(data.CtF)
## ----cache=FALSE--------------------------------------------------------------
# Without imputation:
data.CtF.ready <- make_Ct_ready(data = data.CtF,
imput.by.mean.within.groups = FALSE)
# A part of the data with missing values:
as.data.frame(data.CtF.ready)[19:25,]
# With imputation:
data.CtF.ready <- make_Ct_ready(data = data.CtF,
imput.by.mean.within.groups = TRUE)
# Missing values were imputed:
as.data.frame(data.CtF.ready)[19:25,]
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
library(ctrlGene)
# Remember that the number of colors in col parameter should be equal to the number of tested genes:
ref <- find_ref_gene(data = data.CtF.ready,
groups = c("AAA","Control"),
candidates = c("CCL5", "IL1B","GAPDH","TGFB","TNF", "VEGFA"),
col = c("#66c2a5", "#fc8d62","#6A6599", "#D62728", "#1F77B4", "black"),
angle = 60,
axis.text.size = 7,
norm.finder.score = TRUE,
genorm.score = TRUE)
ref[[2]]
## ----cache=FALSE--------------------------------------------------------------
# For 2^-dCt^ method:
data.dCt.exp <- delta_Ct(data = data.CtF.ready,
normalise = TRUE,
ref = "GAPDH",
transform = TRUE)
## ----cache=FALSE--------------------------------------------------------------
# For 2^-ddCt^ method:
data.dCt <- delta_Ct(data = data.CtF.ready,
normalise = TRUE,
ref = "GAPDH",
transform = FALSE)
## ----fig.dim=c(7.1,6), cache=FALSE--------------------------------------------
control_boxplot_sample <- control_boxplot_sample(data = data.dCt,
y.axis.title = "dCt",
axis.text.size = 7)
## ----fig.dim=c(7.1,4)---------------------------------------------------------
control_boxplot_gene <- control_boxplot_gene(data = data.dCt,
by.group = TRUE,
y.axis.title = "dCt",
axis.text.size = 10)
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
control_cluster_sample(data = data.dCt,
method.dist = "euclidean",
method.clust = "average",
label.size = 0.6)
control_cluster_gene(data = data.dCt,
method.dist = "euclidean",
method.clust = "average",
label.size = 0.8)
## ----fig.dim=c(4,4.5), fig.align='center', cache=FALSE------------------------
control.pca.sample <- control_pca_sample(data = data.dCt,
point.size = 3,
label.size = 2.5,
legend.position = "top")
## ----fig.dim=c(4,4), fig.align='center', cache=FALSE--------------------------
control.pca.gene <- control_pca_gene(data = data.dCt)
## ----cache=FALSE--------------------------------------------------------------
data.dCtF <- filter_transformed_data(data = data.dCt,
remove.Sample = c("Control11"))
## ----cache=FALSE--------------------------------------------------------------
data.dCt.exp <- delta_Ct(data = data.CtF.ready,
ref = "GAPDH",
transform = TRUE)
library(coin)
results.dCt <- RQ_dCt(data = data.dCt.exp,
do.tests = TRUE,
group.study = "AAA",
group.ref = "Control")
# Obtained table can be sorted by, e.g. p values from the Mann-Whitney U test:
head(as.data.frame(arrange(results.dCt, MW_test_p)))
## ----cache=FALSE--------------------------------------------------------------
data.dCt <- delta_Ct(data = data.CtF.ready,
ref = "GAPDH",
transform = FALSE)
library(coin)
results.ddCt <- RQ_ddCt(data = data.dCt,
group.study = "AAA",
group.ref = "Control",
do.tests = TRUE)
# Obtained table can be sorted by, e.g. p values from the Mann-Whitney U test:
head(as.data.frame(arrange(results.ddCt, MW_test_p)))
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
# Variant with p values depending on the normality of the data:
library(ggsignif)
# Specifying vector with significance labels:
signif.labels <- c("****",
"**",
"ns.",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
"***")
# Genes with p < 0.05 and 2-fold changed expression between compared groups are considered significant:
FCh.plot <- FCh_plot(data = results.ddCt,
use.p = TRUE,
mode = "depends",
p.threshold = 0.05,
use.FCh = TRUE,
FCh.threshold = 2,
signif.show = TRUE,
signif.labels = signif.labels,
angle = 20)
# Access the table with results:
head(as.data.frame(FCh.plot[[2]]))
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
user <- data.dCt %>%
pivot_longer(cols = -c(Group, Sample),
names_to = "Gene",
values_to = "dCt") %>%
group_by(Gene) %>%
summarise(MW_test_p = wilcox.test(dCt ~ Group)$p.value,
.groups = "keep")
# The stats::wilcox.test() functions is limited to cases without ties;
# therefore, a warning "cannot compute exact p-value with ties" will appear when ties occur.
FCh.plot <- FCh_plot(data = results.ddCt,
use.p = TRUE,
mode = "user",
p.threshold = 0.05,
use.FCh = TRUE,
FCh.threshold = 2,
signif.show = TRUE,
signif.labels = signif.labels,
angle = 30)
# Access the table with results (p.used column was changed):
head(as.data.frame(FCh.plot[[2]]))
## ----fig.dim=c(4,4.5), fig.align='center', cache=FALSE------------------------
# Genes with p < 0.05 and 2-fold changed expression between compared groups are considered significant:
volcano <- results_volcano(data = results.ddCt,
mode = "depends",
p.threshold = 0.05,
FCh.threshold = 2)
# Access the table with results:
head(as.data.frame(volcano[[2]]))
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final_boxplot <- results_boxplot(data = data.dCtF,
sel.Gene = c("ANGPT1","IL8", "VEGFB"),
by.group = TRUE,
signif.show = TRUE,
signif.labels = c("****","**","***"),
signif.dist = 1.05,
faceting = TRUE,
facet.row = 1,
facet.col = 4,
y.exp.up = 0.1,
angle = 20,
y.axis.title = "dCt")
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final_boxplot_no_fac <- results_boxplot(data = data.dCtF,
sel.Gene = c("ANGPT1","IL8", "VEGFB"),
by.group = TRUE,
signif.show = FALSE, # Disable significance labels
faceting = FALSE, # Disable faceting
y.axis.title = "dCt") +
theme(axis.text.x = element_text(size = 5, colour = "black"))
# Add x axis annotations and ticks:
final_boxplot_no_fac <- final_boxplot_no_fac +
theme(axis.text.x = element_text(size = 11, colour = "black", face="italic"), # Use italic font for human gene symbols
axis.ticks.x = element_line(colour = "black"))
final_boxplot_no_fac
## ----cache=FALSE--------------------------------------------------------------
data.label <- data.frame(matrix(nrow = 3, ncol = 4)) # Number of rows should be equal to number of genes
rownames(data.label) <- c("ANGPT1","IL8","VEGFB")
colnames(data.label) <- c("x", "xend", "y", "annotation")
data.label$Gene <- rownames(data.label)
data.label$y <- 1 + c(max(data.dCtF$ANGPT1), max(data.dCtF$IL8), max(data.dCtF$VEGFB))
data.label$x <- c(0.81,1.81,2.81)
data.label$xend <- c(1.19,2.19,3.19)
data.label$annotation <- c("****","**","***")
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final_boxplot_no_fac_ok <- final_boxplot_no_fac +
geom_signif(annotation = data.label$annotation,
y_position = data.label$y,
xmin = data.label$x,
xmax = data.label$xend,
tip_length = 0.01,
textsize = 5)
final_boxplot_no_fac_ok
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final_boxplot_no_fac_ok <- final_boxplot_no_fac_ok +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) # Make room for the first label
final_boxplot_no_fac_ok
## ----fig.dim=c(5,3.5), fig.align='center', cache=FALSE------------------------
library(tidyverse)
colnames(data.dCtF)
data.dCtF.slim <- pivot_longer(data.dCtF, cols = ANGPT1:VEGFC, names_to = "gene", values_to = "exp")
# Select genes
data.dCtF.slim_sel <- data.dCtF.slim[data.dCtF.slim$gene %in% c("ANGPT1","IL8","VEGFB"), ]
# Change order of groups if needed
data.dCtF.slim_sel$Group <- factor(data.dCtF.slim_sel$Group, levels = c("Control","AAA"))
# Create plot
final_boxplot_no_colors <- ggplot(data.dCtF.slim_sel, aes(x = Group, y = exp)) +
geom_boxplot(outlier.shape = NA, coef = 2) +
theme_bw() +
ylab("dCt") +
xlab("") +
theme(axis.text = element_text(size = 10, color = "black")) +
theme(axis.title = element_text(size = 10, color = "black")) +
theme(panel.grid.major.x = element_blank()) +
facet_wrap(vars(gene), nrow = 1, dir = "h", scales = "free")
final_boxplot_no_colors
## ----fig.dim=c(5,3.5), fig.align='center', cache=FALSE------------------------
data.label <- data.frame(matrix(nrow = 3, ncol = 4)) # Number of rows is equal to number of genes
rownames(data.label) <- c("ANGPT1","IL8","VEGFB")
colnames(data.label) <- c("x", "xend", "y", "annotation")
data.label$gene <- rownames(data.label) # Name of column with gene symbols in this table must be
# the same as name of the column with gene symbols in data used for create the plot.
data.label$y <- 0.5 + c(max(data.dCtF$ANGPT1), max(data.dCtF$IL8), max(data.dCtF$VEGFB))
data.label$x <- c(1,1,1)
data.label$xend <- c(1.98,1.98,1.98)
data.label$annotation <- c("****","**","***")
final_boxplot_no_colors_labels <- final_boxplot_no_colors +
geom_signif(
stat = "identity",
data = data.label,
aes(x = x,
xend = xend,
y = y,
yend = y,
annotation = annotation),
color = "black",
manual = TRUE) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.1)))
final_boxplot_no_colors_labels
## ----fig.dim=c(5,3.5), fig.align='center', cache=FALSE------------------------
final_boxplot_no_colors_labels_points <- final_boxplot_no_colors_labels +
geom_point(position=position_jitter(w=0.1,h=0),
alpha = 0.7,
size = 1.5)
final_boxplot_no_colors_labels_points
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final_barplot <- results_barplot(data = data.dCtF,
sel.Gene = c("ANGPT1","IL8", "VEGFB"),
signif.show = TRUE,
signif.labels = c("****","**","***"),
angle = 30,
signif.dist = 1.05,
faceting = TRUE,
facet.row = 1,
facet.col = 4,
y.exp.up = 0.1,
y.axis.title = "dCt")
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final_barplot <- results_barplot(data = data.dCtF,
sel.Gene = c("ANGPT1","IL8", "VEGFB"),
signif.show = TRUE,
signif.labels = c("****","**","***"),
angle = 0,
signif.dist = 1.05,
faceting = FALSE,
y.exp.up = 0.1,
y.axis.title = "dCt")
# Add italic font to the x axis:
final_barplot <- final_barplot +
theme(axis.text.x = element_text(face="italic"))
final_barplot
## ----fig.dim=c(5,3.5), fig.align='center', cache=FALSE------------------------
library(tidyverse)
colnames(data.dCtF)
data.dCtF.slim <- pivot_longer(data.dCtF, cols = ANGPT1:VEGFC, names_to = "gene", values_to = "exp")
# Select genes
data.dCtF.slim_sel <- data.dCtF.slim[data.dCtF.slim$gene %in% c("ANGPT1","IL8","VEGFB"), ]
# Change order of groups if needed
data.dCtF.slim_sel$Group <- factor(data.dCtF.slim_sel$Group, levels = c("Control","AAA"))
data.mean <- data.dCtF.slim_sel %>%
group_by(Group, gene) %>%
summarise(mean = mean(exp, na.rm = TRUE), .groups = "keep")
data.sd <- data.dCtF.slim_sel %>%
group_by(Group, gene) %>%
summarise(sd = sd(exp, na.rm = TRUE), .groups = "keep")
data.mean$sd <- data.sd$sd
final_barplot_no_colors <- ggplot(data.mean, aes(x = Group, y = mean)) +
geom_errorbar(aes(group = Group,
y = mean,
ymin = ifelse(mean < 0, mean - abs(sd), mean),
ymax = ifelse(mean > 0, mean + abs(sd), mean)),
width = .2,
position = position_dodge(0.9)) +
geom_col(aes(group = Group),
position = position_dodge(0.9),
width = 0.7,
color = "black") +
xlab("") +
ylab("dCt") +
theme_bw() +
#theme(axis.text = element_text(size = axis.text.size, colour = "black")) +
#theme(axis.title = element_text(size = axis.title.size, colour = "black")) +
#theme(legend.text = element_text(size = legend.text.size, colour ="black")) +
#theme(legend.title = element_text(size = legend.title.size, colour = "black")) +
theme(panel.grid.major.x = element_blank()) +
facet_wrap(vars(gene), scales = "free", nrow = 1)
final_barplot_no_colors
## ----fig.dim=c(5,3.5), fig.align='center', cache=FALSE------------------------
data.label <- data.frame(matrix(nrow = 3, ncol = 4)) # Number of rows is equal to number of genes
rownames(data.label) <- c("ANGPT1","IL8","VEGFB")
colnames(data.label) <- c("x", "xend", "y", "annotation")
data.label$gene <- rownames(data.label) # Name of column with gene symbols in this table
# must be the same as name of the column with gene symbols in data used for create the plot.
data.mean <- data.mean %>%
mutate(max = mean + sd) %>%
group_by(gene) %>%
summarise(height = max(max, na.rm = TRUE), .groups = "keep")
data.label$y <- 0.5 + data.mean$height
data.label$x <- c(1,1,1)
data.label$xend <- c(1.98,1.98,1.98)
data.label$annotation <- c("****","**","***")
final_barplot_no_colors_labels <- final_barplot_no_colors +
geom_signif(
stat = "identity",
data = data.label,
aes(x = x,
xend = xend,
y = y,
yend = y,
annotation = annotation,
textsize = 5),
color = "black",
manual = TRUE) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.1)))
final_barplot_no_colors_labels
## ----fig.dim=c(7.1,5), cache=FALSE--------------------------------------------
# Create named list with colors for groups annotation:
colors.for.groups = list("Group" = c("AAA"="firebrick1","Control"="green3"))
# Vector of colors for heatmap can be also specified to fit the user needings:
colors <- c("navy","navy","#313695","#4575B4","#74ADD1","#ABD9E9",
"#E0F3F8","#FFFFBF","#FEE090","#FDAE61","#F46D43",
"#D73027","#C32B23","#A50026","#8B0000",
"#7E0202","#000000")
results_heatmap(data.dCt,
sel.Gene = "all",
col.groups = colors.for.groups,
colors = colors,
show.colnames = FALSE,
show.rownames = TRUE,
fontsize = 11,
fontsize.row = 11,
cellwidth = 4) # It avoids cropping the image on the right side.
## ----fig.dim=c(5,5.5), fig.align='center', cache=FALSE------------------------
pca.kmeans <- pca_kmeans(data.dCt,
sel.Gene = c("ANGPT1","IL8", "VEGFB"),
legend.position = "top")
# Access to the confusion matrix:
pca.kmeans[[2]]
## ----fig.dim=c(5,6), fig.align='center', cache=FALSE--------------------------
pca.kmeans[[1]] + theme(legend.box = "vertical")
## ----fig.dim=c(6.5,6.5), fig.align='center', cache=FALSE----------------------
library(Hmisc)
library(corrplot)
# To make the plot more readable, only part of the data was used:
corr.samples <- corr_sample(data = data.dCt[15:30, ],
method = "pearson",
order = "hclust",
size = 0.7,
p.adjust.method = "BH",
add.coef = "white")
## ----fig.dim=c(6.5,6.5), fig.align='center', cache=FALSE----------------------
library(Hmisc)
library(corrplot)
corr.genes <- corr_gene(data = data.dCt,
method = "spearman",
order = "FPC",
size = 0.7,
p.adjust.method = "BH")
## ----fig.dim=c(4.5,4.5), fig.align='center', cache=FALSE----------------------
library(ggpmisc)
AAA6_Control17 <- single_pair_sample(data = data.dCt,
x = "AAA6",
y = "Control17",
point.size = 3,
labels = TRUE,
label = c("eq", "R2", "p"),
label.position.x = 0.05)
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
library(ggpmisc)
PDGFB_TGFB <- single_pair_gene(data.dCt,
x = "PDGFB",
y = "TGFB",
by.group = TRUE,
point.size = 3,
labels = TRUE,
label = c("eq", "R2", "p"),
label.position.x = c(0.05),
label.position.y = c(1,0.95))
## ----cache=FALSE--------------------------------------------------------------
library(pROC)
# Remember to specify the numbers of rows (panels.row parameter) and columns (panels.col parameter)
# to be sufficient to arrange panels:
roc_parameters <- ROCh(data = data.dCt,
sel.Gene = c("ANGPT1","IL8", "VEGFB"),
groups = c("AAA","Control"),
panels.row = 2,
panels.col = 2)
roc_parameters
## ----echo=FALSE, out.width="500px", fig.align="center", warning=FALSE, message=FALSE, cache=FALSE----
knitr::include_graphics("ROC_plot_ind.png")
## ----fig.dim=c(5,4), fig.align='center', cache=FALSE--------------------------
library(oddsratio)
# Remember to set the increment parameter.
log.reg.results <- log_reg(data = data.dCt,
increment = 1,
sel.Gene = c("ANGPT1","IL8", "VEGFB"),
group.study = "AAA",
group.ref = "Control")
log.reg.results[[2]]
## ----fig.dim=c(5,4), fig.align='center', cache=FALSE--------------------------
log.reg.results.sorted <- log.reg.results[[1]] +
scale_y_discrete(limits = rev(sort(log.reg.results[[2]]$Gene)))
log.reg.results.sorted
## ----cache=FALSE--------------------------------------------------------------
data(data.Ct.pairwise)
str(data.Ct.pairwise)
## ----fig.dim=c(7.1,5), cache=FALSE--------------------------------------------
library(tidyverse)
sample.Ct.control.pairwise <- control_Ct_barplot_sample(data = data.Ct.pairwise,
flag.Ct = "Undetermined",
maxCt = 35,
flag = c("Undetermined"),
axis.title.size = 9,
axis.text.size = 9,
plot.title.size = 9,
legend.title.size = 9,
legend.text.size = 9)
## ----fig.dim=c(7.1,5.5), cache=FALSE------------------------------------------
gene.Ct.control.pairwise <- control_Ct_barplot_gene(data = data.Ct.pairwise,
flag.Ct = "Undetermined",
maxCt = 35,
flag = c("Undetermined"),
axis.title.size = 9,
axis.text.size = 9,
plot.title.size = 9,
legend.title.size = 9,
legend.text.size = 9)
## ----cache=FALSE--------------------------------------------------------------
head(sample.Ct.control.pairwise[[2]])
## ----cache=FALSE--------------------------------------------------------------
head(gene.Ct.control.pairwise[[2]], 10)
## ----cache=FALSE--------------------------------------------------------------
# Finding samples with more than half of the unreliable Ct values.
low.quality.samples.pairwise <- filter(sample.Ct.control.pairwise[[2]],
Not.reliable.fraction > 0.5)$Sample
low.quality.samples.pairwise <- as.vector(low.quality.samples.pairwise)
low.quality.samples.pairwise
## -----------------------------------------------------------------------------
# Finding genes with more than half of the unreliable Ct values in at least one group.
low.quality.genes.pairwise <- filter(gene.Ct.control.pairwise[[2]],
Not.reliable.fraction > 0.5)$Gene
low.quality.genes.pairwise <- unique(as.vector(low.quality.genes.pairwise))
low.quality.genes.pairwise
## ----cache=FALSE--------------------------------------------------------------
# Objects returned from the `low_quality_samples()` and
# `low_quality_genes()`functions can be used directly:
data.Ct.pairwiseF <- filter_Ct(data = data.Ct.pairwise,
flag.Ct = "Undetermined",
maxCt = 35,
flag = c("Undetermined"),
remove.Gene = low.quality.genes.pairwise,
remove.Sample = low.quality.samples.pairwise)
# Check dimensions of data before and after filtering:
dim(data.Ct.pairwise)
dim(data.Ct.pairwiseF)
## ----cache=FALSE--------------------------------------------------------------
# Without imputation:
data.Ct.pairwiseF.ready <- make_Ct_ready(data = data.Ct.pairwiseF,
imput.by.mean.within.groups = FALSE)
# A part of the data with missing values:
as.data.frame(data.Ct.pairwiseF.ready)[9:19,10:15]
# With imputation:
data.Ct.pairwiseF.ready <- make_Ct_ready(data = data.Ct.pairwiseF,
imput.by.mean.within.groups = TRUE)
# Missing values were imputed:
as.data.frame(data.Ct.pairwiseF.ready)[9:19,10:15]
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
library(ctrlGene)
# Remember that the number of colors in col parameter should be equal to the number of tested genes:
ref.pairwise <- find_ref_gene(data = data.Ct.pairwiseF.ready,
groups = c("After","Before"),
candidates = c("Gene4","Gene13","Gene20"),
col = c("#66c2a5", "#fc8d62","#6A6599"),
angle = 90,
axis.text.size = 7,
norm.finder.score = TRUE,
genorm.score = TRUE)
ref.pairwise[[2]]
## ----cache=FALSE--------------------------------------------------------------
data.dCt.pairwise <- delta_Ct(data = data.Ct.pairwiseF.ready,
ref = "Gene4",
transform = FALSE)
## ----cache=FALSE--------------------------------------------------------------
data.dCt.exp.pairwise <- delta_Ct(data = data.Ct.pairwiseF.ready,
ref = "Gene4",
transform = TRUE)
## ----cache=FALSE--------------------------------------------------------------
data.dCt.pairwise <- delta_Ct(data = data.Ct.pairwiseF.ready,
ref = "Gene4",
transform = FALSE)
library(coin)
results.dCt.pairwise <- RQ_dCt(data = data.dCt.pairwise,
do.tests = TRUE,
pairwise = TRUE,
group.study = "After",
group.ref = "Before")
# Obtained table can be sorted by, e.g. p values from the Mann-Whitney U test:
results <- as.data.frame(arrange(results.dCt.pairwise[[1]], MW_test_p))
head(results)
# Access to the table with fold change values calculated individually for each pair of sampleS:
FCh <- results.dCt.pairwise[[2]]
head(FCh)
## ----cache=FALSE--------------------------------------------------------------
data.dCt.pairwise <- delta_Ct(data = data.Ct.pairwiseF.ready,
ref = "Gene4",
transform = FALSE)
library(coin)
# Remember to set pairwise = TRUE:
results.ddCt.pairwise <- RQ_ddCt(data = data.dCt.pairwise,
group.study = "After",
group.ref = "Before",
pairwise = TRUE,
do.tests = TRUE)
# Obtained table can be sorted by, e.g. p values from the Mann-Whitney U test:
results <- as.data.frame(arrange(results.ddCt.pairwise[[1]], MW_test_p))
head(results)
# Access to the table with fold change values calculated individually for each pair of samples:
FCh <- results.ddCt.pairwise[[2]]
head(FCh)
## ----fig.dim=c(7.1,6), cache=FALSE--------------------------------------------
control_boxplot_sample <- control_boxplot_sample(data = data.dCt.pairwise,
axis.text.size = 9,
y.axis.title = "dCt")
## ----fig.dim=c(7.1,4)---------------------------------------------------------
control_boxplot_gene <- control_boxplot_gene(data = data.dCt.pairwise,
by.group = TRUE,
axis.text.size = 10,
y.axis.title = "dCt")
## ----fig.dim=c(7.1,6), cache=FALSE--------------------------------------------
# Remember to set pairwise.FCh to TRUE:
FCh <- results.dCt.pairwise[[2]]
control.boxplot.sample.pairwise <- control_boxplot_sample(data = FCh,
pairwise.FCh = TRUE,
axis.text.size = 9,
y.axis.title = "Fold change")
# There are some very high values, we can identify them using:
head(arrange(FCh, -FCh))
## ----fig.dim=c(7.1,4)---------------------------------------------------------
control.boxplot.gene.pairwise <- control_boxplot_gene(data = FCh,
by.group = FALSE,
pairwise.FCh = TRUE,
axis.text.size = 10,
y.axis.title = "Fold change")
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
# Hierarchical clustering of samples:
control_cluster_sample(data = data.dCt.pairwise,
method.dist = "euclidean",
method.clust = "average",
label.size = 0.6)
# Hierarchical clustering of genes:
control_cluster_gene(data = data.dCt.pairwise,
method.dist = "euclidean",
method.clust = "average",
label.size = 0.8)
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
# Remember to set pairwise.FCh = TRUE:
control_cluster_sample(data = FCh,
pairwise.FCh = TRUE,
method.dist = "euclidean",
method.clust = "average",
label.size = 0.7)
control_cluster_gene(data = FCh,
pairwise.FCh = TRUE,
method.dist = "euclidean",
method.clust = "average",
label.size = 0.8)
## ----fig.dim=c(5,5.5), fig.align='center', cache=FALSE------------------------
control.pca.sample.pairwise <- control_pca_sample(data = data.dCt.pairwise,
point.size = 3,
label.size = 2.5,
hjust = 0.5,
legend.position = "top")
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
control.pca.gene.pairwise <- control_pca_gene(data = data.dCt.pairwise,
hjust = 0.5)
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
control.pca.sample.pairwise <- control_pca_sample(data = FCh,
pairwise.FCh = TRUE,
colors = "black",
point.size = 3,
label.size = 2.5,
hjust = 0.5)
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
control.pca.gene.pairwise <- control_pca_gene(data = FCh,
pairwise.FCh = TRUE,
color = "black",
hjust = 0.5)
## ----cache=FALSE--------------------------------------------------------------
data.dCt.pairwise.F <- filter_transformed_data(data = data.dCt.pairwise,
remove.Sample = c("Sample22",
"Sample23",
"Sample15",
"Sample03"))
dim(data.dCt.pairwise)
dim(data.dCt.pairwise.F)
## -----------------------------------------------------------------------------
# Remember to set pairwise = TRUE:
results.ddCt.pairwise <- RQ_ddCt(data = data.dCt.pairwise.F,
group.study = "After",
group.ref = "Before",
pairwise = TRUE,
do.tests = TRUE)
# Obtained table can be sorted by, e.g. p values from the Mann-Whitney U test:
results <- as.data.frame(arrange(results.ddCt.pairwise[[1]], MW_test_p))
head(results)
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
library(ggsignif)
# Remember to use the first element of list object returned by `RQ_dCt()` or RQ_ddCt() function:
FCh.plot.pairwise <- FCh_plot(data = results.ddCt.pairwise[[1]],
use.p = TRUE,
mode = "depends.adj",
p.threshold = 0.05,
use.FCh = TRUE,
FCh.threshold = 1.5,
angle = 20)
# Access the table with results:
head(as.data.frame(FCh.plot.pairwise[[2]]))
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
# Firstly prepare a vector with significance labels specified according to the user needings:
signif.labels <- c("ns.",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
"**",
"***")
# Remember to set signif.show = TRUE:
FCh.plot.pairwise <- FCh_plot(data = results.ddCt.pairwise[[1]],
use.p = TRUE,
mode = "depends.adj",
p.threshold = 0.05,
use.FCh = TRUE,
FCh.threshold = 1.5,
use.sd = TRUE,
signif.show = TRUE,
signif.labels = signif.labels,
signif.dist = 0.2,
angle = 20)
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
# Variant with user p values - the used p values are calculated using the stats::wilcox.test() function:
user <- data.dCt.pairwise %>%
pivot_longer(cols = -c(Group, Sample),
names_to = "Gene",
values_to = "dCt") %>%
group_by(Gene) %>%
summarise(MW_test_p = wilcox.test(dCt ~ Group)$p.value,
.groups = "keep")
# The stats::wilcox.test() functions is limited to cases without ties; therefore,
# a warning "cannot compute exact p-value with ties" will appear when ties occur.
signif.labels <- c("ns.",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" ns. ",
" * ",
"***")
FCh.plot <- FCh_plot(data = results.ddCt.pairwise[[1]],
use.p = TRUE,
mode = "user",
p.threshold = 0.05,
use.FCh = TRUE,
FCh.threshold = 1.5,
use.sd = TRUE,
signif.show = TRUE,
signif.labels = signif.labels,
signif.dist = 0.4,
angle = 30)
## ----fig.dim=c(6,4.5), fig.align='center', cache=FALSE------------------------
# Genes with p < 0.05 and 2-fold changed expression between compared groups
# are considered significant. Remember to use the first element of list object
# returned by RQ_ddCt() function:
RQ.volcano.pairwise <- results_volcano(data = results.ddCt.pairwise[[1]],
mode = "depends.adj",
p.threshold = 0.05,
FCh.threshold = 1.5)
# Access the table with results:
head(as.data.frame(RQ.volcano.pairwise[[2]]))
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final.boxplot.pairwise <- results_boxplot(data = data.dCt.pairwise.F,
sel.Gene = c("Gene8","Gene19"),
by.group = TRUE,
signif.show = TRUE,
signif.labels = c("***","*"),
signif.dist = 1.2,
faceting = TRUE,
facet.row = 1,
facet.col = 2,
y.exp.up = 0.1,
y.axis.title = "dCt")
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
data.dCt.pairwise.F$Group <- factor(data.dCt.pairwise.F$Group, levels = c("Before", "After"))
final.boxplot.pairwise <- results_boxplot(data = data.dCt.pairwise.F,
sel.Gene = c("Gene8","Gene19"),
by.group = TRUE,
signif.show = TRUE,
signif.labels = c("***","*"),
signif.dist = 1.2,
faceting = TRUE,
facet.row = 1,
facet.col = 2,
y.exp.up = 0.1,
y.axis.title = "dCt")
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final.boxplot.pairwise <- results_boxplot(data = data.dCt.pairwise.F,
sel.Gene = c("Gene8","Gene19"),
by.group = TRUE,
signif.show = TRUE,
signif.labels = c("***","*"),
signif.dist = 1.2,
faceting = FALSE,
y.exp.up = 0.1,
y.axis.title = "dCt")
# Add x axis annotations and ticks:
final.boxplot.pairwise <- final.boxplot.pairwise +
theme(axis.text.x = element_text(size = 11, colour = "black", face="italic"),
axis.ticks.x = element_line(colour = "black"))
final.boxplot.pairwise
## ----fig.dim=c(4,3.5), fig.align='center', cache=FALSE------------------------
library(tidyverse)
colnames(data.dCt.pairwise.F)
data.dCt.pairwise.F.slim <- pivot_longer(data.dCt.pairwise.F,
cols = Gene10:Gene8,
names_to = "gene",
values_to = "exp")
# Select genes
data.dCt.pairwise.F.slim.sel <- data.dCt.pairwise.F.slim[data.dCt.pairwise.F.slim$gene %in% c("Gene19","Gene8"), ]
# Change order of groups if needed
data.dCt.pairwise.F.slim.sel$Group <- factor(data.dCt.pairwise.F.slim.sel$Group,
levels = c("Before","After"))
# Create plot
final_boxplot_no_colors <- ggplot(data.dCt.pairwise.F.slim.sel, aes(x = Group, y = exp)) +
geom_boxplot(outlier.shape = NA, coef = 2) +
theme_bw() +
ylab("dCt") +
xlab("") +
theme(axis.text = element_text(size = 8, color = "black")) +
theme(axis.title = element_text(size = 10, color = "black")) +
theme(panel.grid.major.x = element_blank()) +
facet_wrap(vars(gene), nrow = 1, dir = "h", scales = "free")
final_boxplot_no_colors
## ----fig.dim=c(5,3.5), fig.align='center', cache=FALSE------------------------
data.label <- data.frame(matrix(nrow = 2, ncol = 4)) # Number of rows is equal to number of genes
rownames(data.label) <- c("Gene19","Gene8")
colnames(data.label) <- c("x", "xend", "y", "annotation")
data.label$gene <- rownames(data.label) # Name of column with gene symbols in this table
# must be the same as name of the column with gene symbols in data used for create the plot.
data.label$y <- 0.5 + c(max(data.dCt.pairwise.F$Gene19), max(data.dCt.pairwise.F$Gene8))
data.label$x <- c(1,1)
data.label$xend <- c(1.98,1.98)
data.label$annotation <- c("***","*")
final_boxplot_no_colors_labels <- final_boxplot_no_colors +
geom_signif(
stat = "identity",
data = data.label,
aes(x = x,
xend = xend,
y = y,
yend = y,
annotation = annotation),
color = "black",
manual = TRUE) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.1)))
final_boxplot_no_colors_labels
## ----fig.dim=c(5,3.5), fig.align='center', cache=FALSE------------------------
final_boxplot_no_colors_labels_points <- final_boxplot_no_colors_labels +
geom_point(position=position_jitter(w=0.1,h=0), alpha = 0.7, size = 1.5)
final_boxplot_no_colors_labels_points
## ----fig.dim=c(4,4.5), fig.align='center', cache=FALSE------------------------
final.barplot.pairwise <- results_barplot(data = data.dCt.pairwise.F,
sel.Gene = c("Gene8","Gene19"),
signif.show = TRUE,
signif.labels = c("***","*"),
angle = 30,
signif.dist = 1.05,
faceting = TRUE,
facet.row = 1,
facet.col = 4,
y.exp.up = 0.1,
y.axis.title = "dCt")
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final.barplot.pairwise <- results_barplot(data = data.dCt.pairwise.F,
sel.Gene = c("Gene8","Gene19"),
signif.show = TRUE,
signif.labels = c("***","*"),
angle = 0,
signif.dist = 1.05,
faceting = FALSE,
y.exp.up = 0.1,
y.axis.title = "dCt")
# Add italic font to the x axis:
final.barplot.pairwise <- final.barplot.pairwise +
theme(axis.text.x = element_text(face="italic"))
final.barplot.pairwise
## ----fig.dim=c(5,3.5), fig.align='center', cache=FALSE------------------------
library(tidyverse)
colnames(data.dCt.pairwise.F)
data.dCt.pairwise.F.slim <- pivot_longer(data.dCt.pairwise.F,
cols = Gene10:Gene8,
names_to = "gene",
values_to = "exp")
# Select genes
data.dCt.pairwise.F.slim.sel <- data.dCt.pairwise.F.slim[data.dCt.pairwise.F.slim$gene %in% c("Gene19","Gene8"), ]
# Change order of groups if needed
data.dCt.pairwise.F.slim.sel$Group <- factor(data.dCt.pairwise.F.slim.sel$Group,
levels = c("Before","After"))
data.mean <- data.dCt.pairwise.F.slim.sel %>%
group_by(Group, gene) %>%
summarise(mean = mean(exp, na.rm = TRUE), .groups = "keep")
data.sd <- data.dCt.pairwise.F.slim.sel %>%
group_by(Group, gene) %>%
summarise(sd = sd(exp, na.rm = TRUE), .groups = "keep")
data.mean$sd <- data.sd$sd
final_barplot_no_colors <- ggplot(data.mean, aes(x = Group, y = mean)) +
geom_errorbar(aes(group = Group,
y = mean,
ymin = ifelse(mean < 0, mean - abs(sd), mean),
ymax = ifelse(mean > 0, mean + abs(sd), mean)),
width = .2,
position = position_dodge(0.9)) +
geom_col(aes(group = Group),
position = position_dodge(0.9),
width = 0.7,
color = "black") +
xlab("") +
ylab("dCt") +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_wrap(vars(gene), scales = "free", nrow = 1)
final_barplot_no_colors
## ----fig.dim=c(5,3.5), fig.align='center', cache=FALSE------------------------
data.label <- data.frame(matrix(nrow = 2, ncol = 4)) # Number of rows is equal to number of genes
rownames(data.label) <- c("Gene19","Gene8")
colnames(data.label) <- c("x", "xend", "y", "annotation")
data.label$gene <- rownames(data.label) # Name of column with gene symbols
# in this table must be the same as name of the column with gene symbols
# in data used for create the plot.
data.mean <- data.mean %>%
mutate(max = mean + sd) %>%
group_by(gene) %>%
summarise(height = max(max, na.rm = TRUE), .groups = "keep")
data.label$y <- 0.5 + data.mean$height
data.label$x <- c(1,1)
data.label$xend <- c(1.98,1.98)
data.label$annotation <- c("***","*")
final_barplot_no_colors_labels <- final_barplot_no_colors +
geom_signif(
stat = "identity",
data = data.label,
aes(x = x,
xend = xend,
y = y,
yend = y,
annotation = annotation,
textsize = 5),
color = "black",
manual = TRUE) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.1)))
final_barplot_no_colors_labels
## ----fig.dim=c(7.1,5), cache=FALSE--------------------------------------------
# Create named list with colors for groups annotation:
colors.for.groups = list("Group" = c("After"="firebrick1", "Before"="green3"))
# Vector of colors for heatmap can be also specified to fit the user needings:
colors <- c("navy","navy","#313695","#313695","#4575B4","#4575B4","#74ADD1","#74ADD1",
"#ABD9E9","#E0F3F8","#FFFFBF","#FEE090","#FDAE61","#F46D43",
"#D73027","#C32B23","#A50026","#8B0000",
"#7E0202","#000000")
library(pheatmap)
results_heatmap(data.dCt.pairwise.F,
sel.Gene = "all",
col.groups = colors.for.groups,
colors = colors,
show.colnames = TRUE,
show.rownames = TRUE,
fontsize = 11,
fontsize.row = 10,
cellwidth = 8,
angle.col = 90)
# Cellwidth parameter was set to 8 to avoid cropping the image on the right side.
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
parallel.plot <- parallel_plot(data = data.dCt.pairwise.F,
sel.Gene = c("Gene19","Gene8"),
order = c(4,3))
## ----fig.dim=c(5,5.5), fig.align='center', cache=FALSE------------------------
pca.kmeans <- pca_kmeans(data.dCt.pairwise.F,
sel.Gene = c("Gene8","Gene19"),
legend.position = "top")
## ----fig.dim=c(5,6), fig.align='center', cache=FALSE--------------------------
pca.kmeans[[1]] + theme(legend.box = "vertical")
## -----------------------------------------------------------------------------
pca.kmeans[[2]]
## ----fig.dim=c(6.5,6.5), fig.align='center', cache=FALSE----------------------
library(Hmisc)
library(corrplot)
# To make the plot more readable, only part of the data was used:
corr.samples <- corr_sample(data = data.dCt.pairwise.F[1:10, ],
method = "pearson",
order = "hclust",
size = 0.7,
p.adjust.method = "BH",
add.coef = "white")
## ----fig.dim=c(6.5,6.5), fig.align='center', cache=FALSE----------------------
library(Hmisc)
library(corrplot)
corr.genes <- corr_gene(data = data.dCt.pairwise.F,
method = "pearson",
order = "FPC",
size = 0.7,
p.adjust.method = "BH")
## ----fig.dim=c(7.1,5), fig.align='center', cache=FALSE------------------------
library(ggpmisc)
Sample08_Sample26 <- single_pair_sample(data = data.dCt.pairwise,
pairwise.data = TRUE,
by.group = TRUE,
x = "Sample08",
y = "Sample26",
point.size = 3,
labels = TRUE,
label = c("eq", "R2", "p"),
label.position.x = 0.05,
label.position.y = c(1, 0.95))
## ----fig.dim=c(7.1,5), fig.align='center', cache=FALSE------------------------
library(ggpmisc)
Gene16_Gene17 <- single_pair_gene(data.dCt.pairwise.F,
x = "Gene16",
y = "Gene17",
by.group = TRUE,
point.size = 3,
labels = TRUE,
label = c("eq", "R2", "p"),
label.position.x = c(0.05),
label.position.y = c(1,0.95))
## ----cache=FALSE--------------------------------------------------------------
library(pROC)
# Remember to specify the numbers of rows (panels.row parameter) and columns (panels.col parameter)
# to provide sufficient place to arrange panels:
roc_parameters <- ROCh(data = data.dCt.pairwise.F,
sel.Gene = c("Gene8","Gene19"),
groups = c("After","Before"),
panels.row = 1,
panels.col = 2)
# Access to calculated parameters:
roc_parameters
## ----echo=FALSE, out.width="500px", fig.align="center", warning=FALSE, message=FALSE, cache=FALSE----
knitr::include_graphics("ROC_plot.png")
## -----------------------------------------------------------------------------
# Filter data:
data <- data.dCt.pairwise.F[, colnames(data.dCt.pairwise.F) %in% c("Group", "Sample", "Gene19")]
# Perform analysis:
data_roc <- roc(response = data$Group,
predictor = as.data.frame(data)$Gene19,
levels = c("Before","After"),
smooth = FALSE,
auc = TRUE,
plot = FALSE,
ci = TRUE,
of = "auc",
quiet = TRUE)
# Gain parameters:
parameters <- coords(data_roc,
"best",
ret = c("threshold",
"specificity",
"sensitivity",
"accuracy",
"ppv",
"npv",
"youden"))
parameters
# Gain AUC
data_roc$auc
## ----fig.dim=c(5,4), fig.align='center', cache=FALSE--------------------------
library(oddsratio)
# Remember to set the increment parameter.
log.reg.results <- log_reg(data = data.dCt.pairwise.F,
increment = 1,
sel.Gene = c("Gene8","Gene19"),
group.study = "After",
group.ref = "Before",
log.axis = TRUE,
p.adjust = FALSE)
log.reg.results[[2]]
## ----fig.dim=c(5,4), fig.align='center', cache=FALSE--------------------------
log.reg.results.sorted <- log.reg.results[[1]] +
scale_y_discrete(limits = rev(sort(log.reg.results[[2]]$Gene)))
log.reg.results.sorted
## ----cache=FALSE--------------------------------------------------------------
data("data.Ct.3groups")
str(data.Ct.3groups)
table(data.Ct.3groups$Group)
## ----fig.dim=c(7.1,9), cache=FALSE--------------------------------------------
sample.Ct.control.3groups <- control_Ct_barplot_sample(data = data.Ct.3groups,
flag.Ct = "Undetermined",
maxCt = 35,
flag = c("Undetermined"),
axis.title.size = 9,
axis.text.size = 7,
plot.title.size = 9,
legend.title.size = 9,
legend.text.size = 9)
## ----fig.dim=c(7.1,5.5), cache=FALSE------------------------------------------
gene.Ct.control.3groups <- control_Ct_barplot_gene(data = data.Ct.3groups,
flag.Ct = "Undetermined",
maxCt = 35,
flag = c("Undetermined"),
axis.title.size = 9,
axis.text.size = 9,
plot.title.size = 9,
legend.title.size = 9,
legend.text.size = 9)
## ----cache=FALSE--------------------------------------------------------------
head(sample.Ct.control.3groups[[2]])
## ----cache=FALSE--------------------------------------------------------------
head(gene.Ct.control.3groups[[2]])
## ----fig.dim=c(7.1,9), cache=FALSE--------------------------------------------
library(tidyverse)
library(pheatmap)
data("data.Ct.3groups")
# Vector of colors to fill the heatmap can be specified to fit the user needs:
colors <- c("#4575B4","#FFFFBF","#C32B23")
control_heatmap(data.Ct.3groups,
sel.Gene = "all",
colors = colors,
show.colnames = TRUE,
show.rownames = TRUE,
fontsize = 9,
fontsize.row = 9,
angle.col = 45)
## ----cache=FALSE--------------------------------------------------------------
# Finding samples with more than half of the unreliable Ct values.
low.quality.samples.3groups <- filter(sample.Ct.control.3groups[[2]], Not.reliable.fraction > 0.5)$Sample
low.quality.samples.3groups <- as.vector(low.quality.samples.3groups)
low.quality.samples.3groups
## -----------------------------------------------------------------------------
# Finding genes with more than half of the unreliable Ct values in given group.
low.quality.genes.3groups <- filter(gene.Ct.control.3groups[[2]], Not.reliable.fraction > 0.5)$Gene
low.quality.genes.3groups <- unique(as.vector(low.quality.genes.3groups))
low.quality.genes.3groups
## ----cache=FALSE--------------------------------------------------------------
# Data filtering
data.CtF.3groups <- filter_Ct(data = data.Ct.3groups,
flag.Ct = "Undetermined",
maxCt = 35,
flag = c("Undetermined"),
remove.Gene = low.quality.genes.3groups,
remove.Sample = low.quality.samples.3groups)
# Collapsing technical replicates without imputation:
data.CtF.ready.3groups <- make_Ct_ready(data = data.CtF.3groups,
imput.by.mean.within.groups = FALSE)
# A part of the data with missing values:
as.data.frame(data.CtF.ready.3groups)[25:30,]
# Collapsing technical replicates with imputation:
data.CtF.ready.3groups <- make_Ct_ready(data = data.CtF.3groups,
imput.by.mean.within.groups = TRUE)
# Missing values were imputed:
as.data.frame(data.CtF.ready.3groups)[25:30,]
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
library(ctrlGene)
# Remember that the number of colors in col parameter should be equal to the number of tested genes:
ref.3groups <- find_ref_gene(data = data.CtF.ready.3groups,
groups = c("AAA","Control","VV"),
candidates = c("CCL5", "GAPDH","IL1B","TGFB", "VEGFA"),
col = c("#66c2a5", "#fc8d62","#6A6599", "#1F77B4", "black"),
angle = 60,
axis.text.size = 7,
norm.finder.score = TRUE,
genorm.score = TRUE)
ref.3groups[[2]]
## ----cache=FALSE--------------------------------------------------------------
# For 2-dCt method:
data.dCt.exp.3groups <- delta_Ct(data = data.CtF.ready.3groups,
normalise = TRUE,
ref = "VEGFA",
transform = TRUE)
# For 2-ddCt method:
data.dCt.3groups <- delta_Ct(data = data.CtF.ready.3groups,
normalise = TRUE,
ref = "VEGFA",
transform = FALSE)
## ----fig.dim=c(7.1,8), cache=FALSE--------------------------------------------
control_boxplot_sample_3groups <- control_boxplot_sample(data = data.dCt.3groups,
colors = c("#66c2a5", "#fc8d62", "#8DA0CB"),
axis.text.size = 7)
## ----fig.dim=c(7.1,4)---------------------------------------------------------
control_boxplot_gene_3groups <- control_boxplot_gene(data = data.dCt.3groups,
by.group = TRUE,
colors = c("#66c2a5", "#fc8d62", "#8DA0CB"),
axis.text.size = 10)
## ----fig.dim=c(5,5.5), fig.align='center', cache=FALSE------------------------
control.pca.sample.3groups <- control_pca_sample(data = data.dCt.3groups,
colors = c("#66c2a5", "#fc8d62", "#8DA0CB"),
point.size = 3,
label.size = 2.5,
legend.position = "top")
## ----fig.dim=c(5,5), fig.align='center', cache=FALSE--------------------------
control.pca.gene.3groups <- control_pca_gene(data = data.dCt.3groups)
## ----fig.dim=c(7.1,4), cache=FALSE--------------------------------------------
control_cluster_sample(data = data.dCt.3groups,
method.dist = "euclidean",
method.clust = "average",
label.size = 0.5)
control_cluster_gene(data = data.dCt.3groups,
method.dist = "euclidean",
method.clust = "average",
label.size = 0.5)
## ----cache=FALSE--------------------------------------------------------------
data.dCtF.3groups <- filter_transformed_data(data = data.dCt.3groups,
remove.Sample = c("AAA14"))
## ----cache=FALSE--------------------------------------------------------------
data.dCt.exp.3groups <- delta_Ct(data = data.CtF.ready.3groups,
ref = "VEGFA",
transform = TRUE)
library(coin)
results.dCt.3groups <- RQ_dCt(data = data.dCt.exp.3groups,
do.tests = TRUE,
group.study = "VV",
group.ref = "Control")
# Obtained table can be sorted by, e.g. p values from the Mann-Whitney U test:
head(as.data.frame(arrange(results.dCt.3groups, MW_test_p)))
## ----cache=FALSE--------------------------------------------------------------
data.dCt.3groups <- delta_Ct(data = data.CtF.ready.3groups,
ref = "VEGFA",
transform = FALSE)
library(coin)
results.ddCt.3groups <- RQ_ddCt(data = data.dCt.3groups,
group.study = "VV",
group.ref = "Control",
do.tests = TRUE)
# Obtained table can be sorted by, e.g. p values from the Mann-Whitney U test:
head(as.data.frame(arrange(results.ddCt.3groups, MW_test_p)))
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final_boxplot_3groups <- results_boxplot(data = data.dCtF.3groups,
sel.Gene = c("ANGPT1","VEGFB", "VEGFC"),
colors = c("#66c2a5", "#fc8d62", "#8DA0CB"),
by.group = TRUE,
signif.show = TRUE,
signif.labels = c("****","***","*"),
signif.dist = 1.05,
faceting = TRUE,
facet.row = 1,
facet.col = 4,
y.exp.up = 0.1,
angle = 20,
y.axis.title = "dCt")
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final_barplot_3groups <- results_barplot(data = data.dCtF.3groups,
sel.Gene = c("ANGPT1","VEGFB","VEGFC"),
colors = c("#66c2a5", "#fc8d62", "#8DA0CB"),
signif.show = TRUE,
signif.labels = c("****","***","*"),
angle = 30,
signif.dist = 1.05,
faceting = TRUE,
facet.row = 1,
facet.col = 4,
y.exp.up = 0.1,
y.axis.title = "dCt")
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
# Draw plot without statistical significance labels:
final_boxplot_3groups <- results_boxplot(data = data.dCtF.3groups,
sel.Gene = c("ANGPT1","VEGFB", "VEGFC"),
colors = c("#66c2a5", "#fc8d62", "#8DA0CB"),
by.group = TRUE,
signif.show = FALSE, # It avoids drawing labels
faceting = TRUE,
facet.row = 1,
facet.col = 4,
y.exp.up = 0.2,
angle = 20,
y.axis.title = "dCt")
## ----cache=FALSE--------------------------------------------------------------
# Prepare expression data:
data <- pivot_longer(data.dCtF.3groups,
!c(Sample, Group),
names_to = "Gene" ,
values_to = "value")
# filter for genes:
data <- filter(data, Gene %in% c("ANGPT1","VEGFB", "VEGFC"))
# Find maximum value in each group:
label.height <- data %>%
group_by(Gene) %>%
summarise(height = max(value), .groups = "keep")
# Prepare empty data frame:
data.label.empty <- data.frame(matrix(nrow = length(unique(label.height$Gene)), ncol = 4))
rownames(data.label.empty) <- label.height$Gene
colnames(data.label.empty) <- c("x", "xend", "y", "annotation")
data.label.empty$Gene <- rownames(data.label.empty)
# Fill a data frame with coordinates for right pair:
data.label.right <- data.label.empty
data.label.right$x <- rep(1.01, nrow(data.label.right))
data.label.right$xend <- rep(1.25, nrow(data.label.right))
data.label.right$y <- label.height$height + 0.5
data.label.right$annotation <- c("right1","right2","right3")
# Fill a data frame with coordinates for left pair:
data.label.left <- data.label.empty
data.label.left$x <- rep(0.98, nrow(data.label.left))
data.label.left$xend <- rep(0.75, nrow(data.label.left))
data.label.left$y <- label.height$height + 0.5
data.label.left$annotation <- c("left1","left2","left3")
# Fill a data frame with coordinates for edge pair:
data.label.edge <- data.label.empty
data.label.edge$x <- rep(0.75, nrow(data.label.edge))
data.label.edge$xend <- rep(1.25, nrow(data.label.edge))
data.label.edge$y <- label.height$height + 1.2
data.label.edge$annotation <- c("edge1","edge2","edge3")
## ----fig.dim=c(6.5,4.5), fig.align='center', cache=FALSE----------------------
final_boxplot_3groups +
geom_signif(
stat = "identity",
data = data.label.right,
aes(x = x,
xend = xend,
y = y,
yend = y,
annotation = annotation),
color = "black",
manual = TRUE) +
geom_signif(
stat = "identity",
data = data.label.left,
aes(x = x,
xend = xend,
y = y,
yend = y,
annotation = annotation),
color = "black",
manual = TRUE) +
geom_signif(
stat = "identity",
data = data.label.edge,
aes(x = x,
xend = xend,
y = y,
yend = y,
annotation = annotation),
color = "black",
manual = TRUE) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.12))) # it makes space for labels
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
final_barplot_3groups <- results_barplot(data = data.dCtF.3groups,
sel.Gene = c("ANGPT1","VEGFB","VEGFC"),
colors = c("#66c2a5", "#fc8d62", "#8DA0CB"),
signif.show = FALSE,
angle = 30,
faceting = TRUE,
facet.row = 1,
facet.col = 4,
y.exp.up = 0.1,
y.axis.title = "dCt")
## ----cache=FALSE--------------------------------------------------------------
# Prepare expression data:
data <- pivot_longer(data.dCtF.3groups,
!c(Sample, Group),
names_to = "Gene" ,
values_to = "value")
# filter for genes:
data <- filter(data, Gene %in% c("ANGPT1","VEGFB", "VEGFC"))
# Calculate mean and standard deviation for each group:
data.mean <- data %>%
group_by(Group, Gene) %>%
summarise(mean = mean(value, na.rm = TRUE), .groups = "keep")
data.sd <- data %>%
group_by(Group, Gene) %>%
summarise(sd = sd(value, na.rm = TRUE), .groups = "keep")
data.mean$sd <- data.sd$sd
#Find the highest values:
label.height <- data.mean %>%
mutate(max = mean + sd) %>%
group_by(Gene) %>%
summarise(height = max(max, na.rm = TRUE), .groups = "keep")
# Prepare empty data frame:
data.label.empty <- data.frame(matrix(nrow = length(unique(data.mean$Gene)), ncol = 4))
rownames(data.label.empty) <- unique(data.mean$Gene)
colnames(data.label.empty) <- c("x", "xend", "y", "annotation")
data.label.empty$Gene <- rownames(data.label.empty)
# Fill a data frame with coordinates for left pair:
data.label.left <- data.label.empty
data.label.left$x <- rep(0.97, nrow(data.label.left))
data.label.left$xend <- rep(0.7, nrow(data.label.left))
data.label.left$y <- label.height$height + 0.3
data.label.left$annotation <- c("left1","left2","left3")
# Fill a data frame with coordinates for left pair:
data.label.right <- data.label.empty
data.label.right$x <- rep(1.01, nrow(data.label.right))
data.label.right$xend <- rep(1.28, nrow(data.label.right))
data.label.right$y <- label.height$height + 0.3
data.label.right$annotation <- c("right1","right2","right3")
# Fill a data frame with coordinates for edge pair:
data.label.edge <- data.label.empty
data.label.edge$x <- rep(0.7, nrow(data.label.edge))
data.label.edge$xend <- rep(1.28, nrow(data.label.edge))
data.label.edge$y <- label.height$height + 1
data.label.edge$annotation <- c("edge1","edge2","edge3")
## ----fig.dim=c(6,4.5), fig.align='center', cache=FALSE------------------------
final_barplot_3groups +
geom_signif(
stat = "identity",
data = data.label.left,
aes(x = x,
xend = xend,
y = y,
yend = y,
annotation = annotation),
color = "black",
manual = TRUE) +
geom_signif(
stat = "identity",
data = data.label.right,
aes(x = x,
xend = xend,
y = y,
yend = y,
annotation = annotation),
color = "black",
manual = TRUE) +
geom_signif(
stat = "identity",
data = data.label.edge,
aes(x = x,
xend = xend,
y = y,
yend = y,
annotation = annotation),
color = "black",
manual = TRUE) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.12)))
## ----fig.dim=c(9,5), cache=FALSE----------------------------------------------
# Create named list with colors for groups annotation:
colors.for.groups = list("Group" = c("AAA"="#f98517","Control"="#33b983", "VV"="#bf8cfc"))
# Vector of colors for heatmap:
colors <- c("navy","navy","#313695","#4575B4","#74ADD1","#ABD9E9",
"#E0F3F8","#FFFFBF","#FEE090","#FDAE61","#F46D43",
"#D73027","#C32B23","#A50026","#8B0000",
"#7E0202","#000000")
results_heatmap(data.dCtF.3groups,
sel.Gene = "all",
col.groups = colors.for.groups,
colors = colors,
show.colnames = FALSE,
show.rownames = TRUE,
fontsize = 11,
fontsize.row = 11,
cellwidth = 4)
# Cellwidth parameter was set to 4 to avoid cropping the image on the right side.
## ----fig.dim=c(5,5.5), fig.align='center', cache=FALSE------------------------
pca.kmeans <- pca_kmeans(data.dCtF.3groups,
sel.Gene = c("ANGPT1","VEGFB", "VEGFC"),
k.clust = 3,
clust.names = c("Cluster1", "Cluster2", "Cluster3"),
point.shape = c(19, 17, 18),
point.color = c("#66c2a5", "#fc8d62", "#8DA0CB"),
legend.position = "top")
# Access to the confusion matrix:
pca.kmeans[[2]]
## ----fig.dim=c(5,6), fig.align='center', cache=FALSE--------------------------
pca.kmeans[[1]] + theme(legend.box = "vertical")
## ----fig.dim=c(6.5,6.5), fig.align='center', cache=FALSE----------------------
library(Hmisc)
library(corrplot)
# To make the plot more readable, only part of the data was used:
corr.samples <- corr_sample(data = data.dCtF.3groups[15:30, ],
method = "pearson",
order = "hclust",
size = 0.7,
p.adjust.method = "BH",
add.coef = "white")
## ----fig.dim=c(6.5,6.5), fig.align='center', cache=FALSE----------------------
library(Hmisc)
library(corrplot)
corr.genes <- corr_gene(data = data.dCtF.3groups,
method = "spearman",
order = "FPC",
size = 0.7,
p.adjust.method = "BH")
## ----fig.dim=c(4.5,4.5), fig.align='center', cache=FALSE----------------------
library(ggpmisc)
AAA6_AAA43 <- single_pair_sample(data = data.dCtF.3groups,
x = "AAA6",
y = "AAA43",
point.size = 3,
labels = TRUE,
label = c("eq", "R2", "p"),
label.position.x = 0.05)
## ----fig.dim=c(5,4.5), fig.align='center', cache=FALSE------------------------
library(ggpmisc)
# Without stratification by groups:
PDGFB_TGFB <- single_pair_gene(data.dCtF.3groups,
x = "PDGFB",
y = "TGFB",
by.group = FALSE,
point.size = 3,
labels = TRUE,
label = c("eq", "R2", "p"),
label.position.x = c(0.05),
label.position.y = c(1,0.95))
## ----fig.dim=c(6,5.5), fig.align='center', cache=FALSE------------------------
library(ggpmisc)
# With stratification by groups:
PDGFB_TGFB <- single_pair_gene(data.dCtF.3groups,
x = "PDGFB",
y = "TGFB",
by.group = TRUE,
colors = c("#66c2a5", "#fc8d62", "#8DA0CB"), # Vector of colors
point.size = 3,
labels = TRUE,
label = c("eq", "R2", "p"),
label.position.x = c(0.05),
label.position.y = c(1,0.95,0.9)) # Labels position
## ----cache=FALSE--------------------------------------------------------------
library(pROC)
# Remember to specify the numbers of rows (panels.row parameter) and columns (panels.col parameter) to be sufficient to arrange panels:
roc_parameters <- ROCh(data = data.dCtF.3groups,
sel.Gene = c("ANGPT1","VEGFB", "VEGFC"),
groups = c("Control","VV"),
panels.row = 2,
panels.col = 2)
roc_parameters
## ----echo=FALSE, out.width="500px", fig.align="center", warning=FALSE, message=FALSE, cache=FALSE----
knitr::include_graphics("ROC_plot_3groups.png")
## ----fig.dim=c(5,4), fig.align='center', cache=FALSE--------------------------
library(oddsratio)
# Remember to set the increment parameter.
log.reg.results <- log_reg(data = data.dCtF.3groups,
increment = 1,
sel.Gene = c("ANGPT1","VEGFB", "VEGFC"),
group.study = "VV",
group.ref = "Control")
log.reg.results[[2]]
## ----fig.dim=c(5,4), fig.align='center', cache=FALSE--------------------------
log.reg.results.sorted <- log.reg.results[[1]] +
scale_y_discrete(limits = rev(sort(log.reg.results[[2]]$Gene)))
log.reg.results.sorted
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.