Load vsearch output

Philly 2017 pollen

trnL_PH_pollen_2017 <- load_blast6("~/vsearchr/inst/extdata/trnL_vsearch_philly_2017.output/pollen/")
ITS1_PH_pollen_2017 <- load_blast6("~/vsearchr/inst/extdata/ITS1_vsearch_philly_2017.output/pollen/")
ITS2_PH_pollen_2017 <- load_blast6("~/vsearchr/inst/extdata/ITS2_vsearch_philly_2017.output/pollen/")
write_csv(trnL_PH_pollen_2017, "trnL_PH_pollen_2017.csv")
write_csv(ITS1_PH_pollen_2017, "ITS1_PH_pollen_2017.csv")
write_csv(ITS2_PH_pollen_2017, "ITS2_PH_pollen_2017.csv")
trnL_PH_pollen_2017 <- read_csv("~/vsearchr/inst/extdata/trnL_PH_pollen_2017.csv", col_names = TRUE)
ITS1_PH_pollen_2017 <- read_csv("~/vsearchr/inst/extdata/ITS1_PH_pollen_2017.csv", col_names = TRUE)
ITS2_PH_pollen_2017 <- read_csv("~/vsearchr/inst/extdata/ITS2_PH_pollen_2017.csv", col_names = TRUE)

Load taxonomy tables

Philly 2017

trnL_PH_tax <- load_MTXA("~/vsearchr/inst/extdata/trnL_PH_Amplicons2.tax")
ITS1_PH_tax <- load_MTXA("~/vsearchr/inst/extdata/ITS1_PH_Amplicons.tax")
ITS2_PH_tax <- load_MTXA("~/vsearchr/inst/extdata/ITS2_PH_Amplicons.tax")

Join vsearch output to taxonomy tables

Philly 2017 pollen

trnL_PH_pollen_2017_join <- tax_join(trnL_PH_pollen_2017, trnL_PH_tax, min_id = 97.0, min_len = 150)
ITS1_PH_pollen_2017_join <- tax_join(ITS1_PH_pollen_2017, ITS1_PH_tax, min_id = 95.0, min_len = 300)
ITS2_PH_pollen_2017_join <- tax_join(ITS2_PH_pollen_2017, ITS2_PH_tax, min_id = 95.0, min_len = 300)
write_csv(trnL_PH_pollen_2017_join, "~/vsearchr/inst/extdata/trnL_PH_pollen_join_2017.csv")
write_csv(ITS1_PH_pollen_2017_join, "~/vsearchr/inst/extdata/ITS1_PH_pollen_join_2017.csv")
write_csv(ITS2_PH_pollen_2017_join, "~/vsearchr/inst/extdata/ITS2_PH_pollen_join_2017.csv")
trnL_PH_pollen_2017_join <- read_csv("~/vsearchr/inst/extdata/trnL_PH_pollen_join_2017.csv", col_names = TRUE)
ITS1_PH_pollen_2017_join <- read_csv("~/vsearchr/inst/extdata/ITS1_PH_pollen_join_2017.csv", col_names = TRUE)
ITS2_PH_pollen_2017_join <- read_csv("~/vsearchr/inst/extdata/ITS2_PH_pollen_join_2017.csv", col_names = TRUE)

Evaluate coverage

trnL_cov <- trnL_PH_pollen_2017 %>%
  group_by(sample) %>%
  summarize(reads = n())

ITS1_cov <- ITS1_PH_pollen_2017 %>%
  group_by(sample) %>%
  summarize(reads = n())

ITS2_cov <- ITS2_PH_pollen_2017 %>%
  group_by(sample) %>%
  summarize(reads = n())

cov <- full_join(trnL_cov, ITS1_cov, by = "sample") %>%
  full_join(ITS2_cov, by = "sample") %>%
  dplyr::select(sample, trnL = 2, ITS1 = 3, ITS2 = 4)

write_csv(cov, "~/vsearchr/inst/extdata/output_2017/pollen_2017_coverage")

Tally genera by sample

Philly 2017 pollen

trnL_PH_pollen_tally <- tally_gen(trnL_PH_pollen_2017_join)
ITS1_PH_pollen_tally <- tally_gen(ITS1_PH_pollen_2017_join)
ITS2_PH_pollen_tally <- tally_gen(ITS2_PH_pollen_2017_join)

Add sample metadata

Philly 2017 pollen

trnL_PH_pollen_final <- add_meta(trnL_PH_pollen_tally, "~/vsearchr/inst/extdata/PH_2017_key.csv")
ITS1_PH_pollen_final <- add_meta(ITS1_PH_pollen_tally, "~/vsearchr/inst/extdata/PH_2017_key.csv")
ITS2_PH_pollen_final <- add_meta(ITS2_PH_pollen_tally, "~/vsearchr/inst/extdata/PH_2017_key.csv")

Create consensus data sets, create genus summary fields

PH_pollen_consensus <- consensus_xyz_gen(trnL_PH_pollen_final, ITS1_PH_pollen_final, ITS2_PH_pollen_final,
                                         min_prop = 0.0005) %>%
  add_meta("~/vsearchr/inst/extdata/PH_2017_key.csv") %>%
  mutate(period = case_when(date > "2017-05-20" & date < "2017-06-15" ~ "June",
                            date > "2017-06-20" & date < "2017-07-15" ~ "July",
                            date > "2017-07-20" & date < "2017-08-15" ~ "Aug",
                            date > "2017-08-20" & date < "2017-09-15" ~ "Sept",
                            is.na(date) & site == "Nicetown" ~ "July"))

PH_pollen_consensus_June <- filter(PH_pollen_consensus, period == "June")
PH_pollen_consensus_July <- filter(PH_pollen_consensus, period == "July")
PH_pollen_consensus_Aug <- filter(PH_pollen_consensus, period == "Aug")
PH_pollen_consensus_Sept <- filter(PH_pollen_consensus, period == "Sept")


PH_pollen_genus_summary <- PH_pollen_consensus %>%
  group_by(genus) %>%
  summarize(gen_freq = n(),
            max_prop = max(scaled_prop))

PH_pollen_genus_summary_June <- PH_pollen_consensus_June %>%
  group_by(genus) %>%
  summarize(gen_freq = n(),
            mean_prop = sum(scaled_prop)/8) # We only had 8 sites for the MayJune sample; we had 12 sites for all others

PH_pollen_genus_summary_July <- PH_pollen_consensus_July %>%
  group_by(genus) %>%
  summarize(gen_freq = n(),
            mean_prop = sum(scaled_prop)/12)

PH_pollen_genus_summary_Aug <- PH_pollen_consensus_Aug %>%
  group_by(genus) %>%
  summarize(gen_freq = n(),
            mean_prop = sum(scaled_prop)/12)

PH_pollen_genus_summary_Sept <- PH_pollen_consensus_Sept %>%
  group_by(genus) %>%
  summarize(gen_freq = n(),
            mean_prop = sum(scaled_prop)/12)


PH_pollen_final <- left_join(PH_pollen_consensus, PH_pollen_genus_summary, by = "genus")
PH_pollen_final_June <- left_join(PH_pollen_consensus_June, PH_pollen_genus_summary_June, by = "genus")
PH_pollen_final_July <- left_join(PH_pollen_consensus_July, PH_pollen_genus_summary_July, by = "genus")
PH_pollen_final_Aug <- left_join(PH_pollen_consensus_Aug, PH_pollen_genus_summary_Aug, by = "genus")
PH_pollen_final_Sept <- left_join(PH_pollen_consensus_Sept, PH_pollen_genus_summary_Sept, by = "genus")

write_csv(PH_pollen_final, "~/vsearchr/inst/extdata/output_2017/PH_2017_pollen.csv")

Plot data

Philly 2017 pollen

trnL only
ggplot(filter(trnL_PH_pollen_final, gen_prop >= 0.01), aes(x = date, y = reorder(genus, gen_freq), fill = gen_prop)) +
  geom_tile(width = 32, color = "white") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(12) +
  ylab("Genus") +
  xlab("Sample") +
  labs(fill = "Proportional\nabundance") +
  facet_grid(~site) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
ITS1 only
ggplot(filter(ITS1_PH_pollen_final, gen_prop >= 0.01), aes(x = date, y = reorder(genus, gen_prop), fill = gen_prop)) +
  geom_tile(width = 12, color = "white") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(12) +
  ylab("Genus") +
  xlab("Sample") +
  labs(fill = "Proportional\nabundance") +
  facet_grid(~site) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
ITS2 only
ggplot(filter(ITS2_PH_pollen_final, gen_prop >= 0.01), aes(x = date, y = reorder(genus, gen_prop), fill = gen_prop)) +
  geom_tile(width = 12, color = "white") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(12) +
  ylab("Genus") +
  xlab("Sample") +
  labs(fill = "Proportional\nabundance") +
  facet_grid(~site) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
2-of-3 consensus
ggplot(PH_pollen_final, aes(x = date, y = reorder(genus, gen_freq), fill = scaled_prop)) +
  geom_tile(width = 30, color = "gray40") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(4) +
  ylab("Genus") +
  xlab("Sample") +
  labs(fill = "Proportional\nabundance") +
  facet_grid(~site) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave("~/vsearchr/inst/extdata/output_2017/pollen_2017.pdf")

ggplot(filter(PH_pollen_final_MayJune, period == "May/June"), aes(x = site, y = reorder(genus, mean_prop), fill = scaled_prop)) +
  geom_tile(color = "gray40") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(12) +
  ylab("Genus") +
  xlab("Site") +
  labs(fill = "Proportional\nabundance") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave("~/vsearchr/inst/extdata/output_2017/pollen_2017_mayjune.pdf", width = 6, height = 8)

ggplot(filter(PH_pollen_final_JuneJuly, period == "June/July"), aes(x = site, y = reorder(genus, mean_prop), fill = scaled_prop)) +
  geom_tile(color = "gray40") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(12) +
  ylab("Genus") +
  xlab("Site") +
  labs(fill = "Proportional\nabundance") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave("~/vsearchr/inst/extdata/output_2017/pollen_2017_junejuly.pdf", width = 6, height = 8)

ggplot(filter(PH_pollen_final_Aug, period == "Aug"), aes(x = site, y = reorder(genus, mean_prop), fill = scaled_prop)) +
  geom_tile(color = "gray40") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(12) +
  ylab("Genus") +
  xlab("Site") +
  labs(fill = "Proportional\nabundance") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave("~/vsearchr/inst/extdata/output_2017/pollen_2017_aug.pdf", width = 6, height = 8)

ggplot(filter(PH_pollen_final_Sept, period == "Sept"), aes(x = site, y = reorder(genus, mean_prop), fill = scaled_prop)) +
  geom_tile(color = "gray40") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(12) +
  ylab("Genus") +
  xlab("Site") +
  labs(fill = "Proportional\nabundance") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave("~/vsearchr/inst/extdata/output_2017/pollen_2017_sept.pdf", width = 6, height = 8)
# pdf("~/vsearchr/inst/extdata/figures/PH_final_period.pdf", height = 5, width = 8.5)
# ggplot(filter(PH_pollen_final_join, !is.na(date)), 
#        aes(x = reorder(period, date), y = reorder(genus, gen_freq), fill = scaled_prop)) +
#   geom_tile(width = 0.5, color = "white") +
#   scale_fill_gradient(low = "gray95", high = "purple") +
#   theme_bw(8) +
#   ylab("Genus") +
#   xlab("Sample") +
#   labs(fill = "Proportional\nabundance") +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1))
# dev.off()


############ Trimmed figures for presentation #################

# pdf("~/vsearchr/inst/extdata/figures/PH_final.pdf", height = 5, width = 8.5)
# ggplot(PH_pollen_final, aes(x = date, y = reorder(genus, gen_freq), fill = scaled_prop)) +
#   geom_tile(width = 30, color = "gray40") +
#   scale_fill_gradient(low = "gray95", high = "purple") +
#   theme_bw(4) +
#   ylab("Genus") +
#   xlab("Sample") +
#   labs(fill = "Proportional\nabundance") +
#   facet_grid(~site) +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1))
# dev.off()

pdf("~/vsearchr/inst/extdata/figures/PH_final_mayjune_PresTrim.pdf", height = 6, width = 6.5)
ggplot(filter(PH_pollen_final_MayJune, period == "May/June", gen_freq > 1 | mean_prop >= 0.05), aes(x = site, y = reorder(genus, mean_prop), fill = scaled_prop)) +
  geom_tile(color = "gray40") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(12) +
  ylab("Genus") +
  xlab("Site") +
  labs(fill = "Proportional\nabundance") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
dev.off()

pdf("~/vsearchr/inst/extdata/figures/PH_final_junejuly_PresTrim.pdf", height = 6, width = 6.5)
ggplot(filter(PH_pollen_final_JuneJuly, period == "June/July", gen_freq > 1 | mean_prop >= 0.05), aes(x = site, y = reorder(genus, mean_prop), fill = scaled_prop)) +
  geom_tile(color = "gray40") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(12) +
  ylab("Genus") +
  xlab("Site") +
  labs(fill = "Proportional\nabundance") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
dev.off()

pdf("~/vsearchr/inst/extdata/figures/PH_final_aug_PresTrim.pdf", height = 5, width = 6.5)
ggplot(filter(PH_pollen_final_Aug, period == "Aug", gen_freq > 1 | mean_prop >= 0.05), aes(x = site, y = reorder(genus, mean_prop), fill = scaled_prop)) +
  geom_tile(color = "gray40") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(12) +
  ylab("Genus") +
  xlab("Site") +
  labs(fill = "Proportional\nabundance") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
dev.off()

pdf("~/vsearchr/inst/extdata/figures/PH_final_sept_PresTrim.pdf", height = 5, width = 6.5)
ggplot(filter(PH_pollen_final_Sept, period == "Sept", gen_freq > 1 | mean_prop >= 0.05), aes(x = site, y = reorder(genus, mean_prop), fill = scaled_prop)) +
  geom_tile(color = "gray40") +
  scale_fill_gradient(low = "gray95", high = "purple") +
  theme_bw(12) +
  ylab("Genus") +
  xlab("Site") +
  labs(fill = "Proportional\nabundance") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
dev.off()

# pdf("~/vsearchr/inst/extdata/figures/PH_final_period.pdf", height = 5, width = 8.5)
# ggplot(filter(PH_pollen_final_join, !is.na(date)), 
#        aes(x = reorder(period, date), y = reorder(genus, gen_freq), fill = scaled_prop)) +
#   geom_tile(width = 0.5, color = "white") +
#   scale_fill_gradient(low = "gray95", high = "purple") +
#   theme_bw(8) +
#   ylab("Genus") +
#   xlab("Sample") +
#   labs(fill = "Proportional\nabundance") +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1))
# dev.off()


sponslerdb/vsearchR documentation built on June 19, 2020, 10:01 a.m.