## SKG
## Jan. 22, 2020
## some EDA (non-map kind)
devtools::load_all()
library(ggplot2)
library(dplyr)
library(tidyr)
library(viridis)
data(clusts_tb)
clusts_smear <- clusts_tb %>% rename(n_smear_pos = n_pos,
n_smear_neg = n_neg,
n_smear_unk = n_unk) %>%
arrange(size, n_smear_neg, n_hiv_pos, n_hiv_neg) %>%
pivot_longer(n_smear_pos:n_hiv_unk,
names_to = c("var", "type"),
names_pattern = "n_?(.*)_(.*)")
ggplot(data = clusts_smear %>% filter(size > 1),
aes(x = factor(PCR.Cluster, levels = unique(clusts_smear$PCR.Cluster)), y = value)) + geom_col(aes(fill = type)) +
coord_flip() + theme_bw(base_size = 12) +
scale_fill_manual(values = c("red", "blue", "gray30"), labels = c("-", "+", "Unknown")) +
facet_wrap(~var, labeller = as_labeller(c('hiv' = "HIV", 'smear' = "Smear"))) +
labs(title = "Cluster size and HIV/Smear status", x = "Cluster ID",
y = "Cluster Size",
fill = "Value",
subtitle = "Cluster Size > 1") +
theme(axis.text.y = element_blank(),
legend.position = "bottom")
ggsave("hiv-smear.pdf", width = 7)
## Size and range
ggplot(data = clusts_tb %>% filter(size > 1), aes(x = size, y = as.numeric(inf_range) / 365)) + geom_point(size = 2) +
theme_bw(base_size = 12) +
labs(x = "Cluster Size",
y = "Time between first and last detection (years)",
title = "Infection Duration vs. Cluster Size",
subtitle = "With a Loess Smoother") +
geom_smooth()
ggsave("duration-size.pdf")
tb_clean$hiv <- ifelse(tb_clean$hivstatus == "Positive", "Positive",
ifelse(tb_clean$hivstatus == "Negative", "Negative",
"Unknown"))
tb_clean$smear <- ifelse(tb_clean$spsmear == "Positive", "Positive",
ifelse(tb_clean$spsmear == "Negative", "Negative",
"Unknown"))
## first infection smear
clusts_first <- tb_clean %>%
filter(PCR.Cluster != "") %>%
group_by(PCR.Cluster) %>%
summarize(size = dplyr::n(),
first_smear = smear[order(INIT_REGIMEN_START_DATE)][1],
last_smear = smear[order(INIT_REGIMEN_START_DATE, decreasing = TRUE)][1])
tab <- table(clusts_first$first_smear, clusts_first$size)
tab2 <- table(clusts_first$last_smear, clusts_first$size)
df <- data.frame(tab)
df2 <- data.frame(tab2)
ggplot(data = df2, aes(x = Var2, y = Freq)) + geom_col(aes(fill = Var1))
df3 <- tb_clean %>% filter(PCR.Cluster != "")
tab3 <- table( x = df3$hiv, y = df3$smear, dnn = c("HIV", "Smear"))
chisq.test(tab3, simulate.p.value = TRUE)
library(ggmosaic)
ggplot(data = tb_clean) + geom_mosaic(aes(x = product(smear, hiv), fill = smear))
library(vcd)
vcd::mosaic(x = tab3, legend = TRUE, shade = TRUE)
vcd::assoc(x = tab3, shade = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.