knitr::opts_chunk$set( cache = TRUE, message = FALSE, warning = FALSE )
library(nychanesmicrobiome) library(dplyr) scale_palette <- c("#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00") theme_transparent <- ggplot2::theme( axis.title = ggplot2::element_text(colour = "white"), axis.text = ggplot2::element_text(colour = "white"), title = ggplot2::element_text(colour = "white"), legend.text = ggplot2::element_text(colour = "white"), axis.ticks = ggplot2::element_line(colour = "white"), panel.border = ggplot2::element_rect(colour = "white"), panel.background = ggplot2::element_rect(fill = "transparent",colour = NA), plot.background = ggplot2::element_rect(fill = "transparent",colour = NA), legend.background = ggplot2::element_rect(fill = "transparent",colour = NA), legend.key = ggplot2::element_rect(fill = "transparent",colour = NA), panel.grid = ggplot2::element_blank() )
NYC_HANES <- loadQiimeData() %>% annotateFactors() # Extract alternative smokers who also smoke cigarettes and remove them alt_smokers_cigarettes <- NYC_HANES |> sample_data() |> data.frame() |> dplyr::filter(smokingstatus == 'Alternative smoker') |> dplyr::filter(CIGARETTES == 'Yes') |> dplyr::select(Burklab_ID) |> t() NYC_HANES <- prune_samples(!(sample_names(NYC_HANES) %in% alt_smokers_cigarettes), NYC_HANES) sample_data(NYC_HANES)$smokingstatus <- relevel(sample_data(NYC_HANES)$smokingstatus,"Never smoker") # Subset alternative smokers NYC_HANES.alt <- subset_samples(NYC_HANES, smokingstatus %in% c("Never smoker", "Alternative smoker")) levels(sample_data(NYC_HANES.alt)$smokingstatus) <- c('Never','Alternative') biosis.tsv <- system.file("extdata","biosis.tsv", package="nychanesmicrobiome", mustWork = TRUE) biosis <- read.csv(biosis.tsv,header = TRUE, sep = '\t')
metadata <- data.frame(sample_data(NYC_HANES)) variablesToLook <- c("GENDER","RACE","EDU4CAT","SPAGE", "AGEGRP5C","DBTS_NEW","SR_ACTIVE","INC25KMOD","COTININE","OHQ_3") factorVars <- c("AGEGRP5C", "RACE","EDU4CAT","DBTS_NEW","SR_ACTIVE","INC25KMOD", "GENDER") table1 <- tableone::CreateTableOne(vars = variablesToLook, data = metadata, factorVars = factorVars, strata = "smokingstatus", includeNA = TRUE, test = FALSE) alternative_habits <- metadata[metadata$smokingstatus == 'Alternative smoker', c('E_CIGARETTES', 'HOOKAH_PIPE', 'CIGARS_CIGARILLOS')] %>% dplyr::mutate_all(dplyr::funs(as.numeric)) %>% colSums(na.rm = TRUE) x <- print(table1, explain = TRUE, printToggle = FALSE,nonnormal = c('COTININE')) knitr::kable(x)
NYC_HANES.genus <- tax_glom(NYC_HANES, taxrank = "Genus") NYC_HANES.genus.relab <- transform_sample_counts(NYC_HANES.genus, function(x) x / sum(x)) NYC_HANES.relab <- transform_sample_counts(NYC_HANES, function(x) x / sum(x)) NYC_HANES.genus.relab <- merge_taxa(NYC_HANES.genus, taxa_names(filter_taxa(NYC_HANES.genus.relab, function(x) mean(x) < 2e-4, TRUE))) tax_table(NYC_HANES.genus.relab)[is.na(tax_table(NYC_HANES.genus.relab)[, "Genus"]), "Genus"] <- "Other" NYC_HANES.genus.relab <- tax_glom(transform_sample_counts(NYC_HANES.genus.relab, function(x) x / sum(x)), taxrank = "Genus") NYC_HANES.phylum <- tax_glom(NYC_HANES, taxrank = "Phylum") NYC_HANES.phylum.relab <- transform_sample_counts(NYC_HANES.phylum, function(x) x / sum(x))
plot( plot_abundance( NYC_HANES, top_n = 8, prop = "total", taxrank = "Genus" ) + labs(x = "296 oral rinse samples") )
alphadiv <- estimate_richness(NYC_HANES, measures = c("Observed", "Chao1", "Shannon", "Simpson"))[, -3] alphadiv <- cbind(alphadiv, sample_data(NYC_HANES)$smokingstatus) colnames(alphadiv)[5] <- c("smokingstatus") alphadiv.melted <- reshape2::melt(alphadiv, id.vars = c("smokingstatus"))
ggplot(alphadiv.melted, aes(smokingstatus,value)) + geom_boxplot(aes(fill=smokingstatus))+ facet_grid(variable ~., scales = "free", switch = 'y') + scale_x_discrete(labels = c("Never\nsmokers","Cigarette\nsmokers","Former\nsmokers","Alternative\nsmokers","Secondhand\nsmoke"))+ scale_y_continuous(position = 'right') + scale_fill_manual(values = scale_palette) + guides(colour="none") + ggtitle("Smoking categories") + theme_bw() + theme(strip.background = element_rect(colour = "transparent", fill = "transparent"), strip.text.y = element_text(angle = 180), axis.text = element_text(size = 13), axis.title = element_text(size = 12), legend.text = element_text(size=12), legend.title = element_text(size=12), legend.position = 'bottom')
aovSMOKINGSTATUS <- aov(Shannon ~ smokingstatus, alphadiv) summary(aovSMOKINGSTATUS) aovSMOKINGSTATUS <- aov(Observed ~ smokingstatus, alphadiv) summary(aovSMOKINGSTATUS) aovSMOKINGSTATUS <- aov(Chao1 ~ smokingstatus, alphadiv) summary(aovSMOKINGSTATUS)
ggplot(metadata, aes(smokingstatus, COTININE, fill = smokingstatus)) + stat_boxplot() + scale_fill_manual(values = scale_palette) + theme_bw() + scale_x_discrete(labels = c("Never\nsmokers","Cigarette\nsmokers", "Former\nsmokers","Alternative\nsmokers","Secondhand\nsmoke")) + xlab('Reported smoking status') + ylab('Serum Cotinine (ng/ml)') + guides(fill = FALSE) + theme(strip.background = element_rect(colour = "transparent", fill = "transparent"), strip.text.y = element_text(angle = 180), axis.text = element_text(size = 12), axis.title = element_text(size = 12), legend.text = element_text(size=12), legend.title = element_text(size=12), legend.position = 'bottom')
distwu <- phyloseq::distance(NYC_HANES, "wunifrac") ordwu <- ordinate(NYC_HANES, method = "MDS", distance = distwu, weighted = TRUE) smoker.wu.data <- plot_ordination(NYC_HANES, ordwu, justDF = TRUE) %>% dplyr::arrange() smoker.wu.data$weeksquit <- as.numeric(smoker.wu.data[, "SMQ_4"]) * as.numeric(smoker.wu.data[, "SMQ_4UNIT"]) smoker.wu.data[!smoker.wu.data$SMOKER3CAT %in% "Former smoker", "weeksquit"] <- 5 smoker.wu.data$CIGARETTES[is.na(smoker.wu.data$CIGARETTES)] <- "No" smoker.wu.data %<>% bind_cols(otu_table(NYC_HANES.phylum.relab) %>% t %>% data.frame) proteobacteria <- tax_table(NYC_HANES.phylum.relab)[tax_table(NYC_HANES.phylum.relab)[, "Phylum"] == "Proteobacteria"] %>% rownames
plot( ggplot(subset( smoker.wu.data, smokingstatus %in% c("Cigarette", "Never smoker") )) + geom_point( aes(Axis.1, Axis.2, color = smokingstatus), size = 3, alpha = 0.7 ) + stat_ellipse(aes(Axis.1, Axis.2, color = smokingstatus), type = 't') + theme_bw() + theme( axis.text = element_text(size = 13), axis.title = element_text(size = 12), legend.text = element_text(size = 12), legend.title = element_text(size = 12), legend.position = 'bottom' ) + scale_color_manual(name = 'Smoking status', values = scale_palette[1:2]) + scale_y_continuous(limits = c(-0.14, 0.13)) + scale_x_continuous(limits = c(-0.18, 0.20)) )
plot( ggplot(subset( smoker.wu.data, smokingstatus %in% c("Cigarette", "Never smoker", "Former smoker") )) + geom_point( aes(Axis.1, Axis.2, color = smokingstatus, size = weeksquit), alpha = 0.7 ) + theme_bw() + theme( axis.text = element_text(size = 13), axis.title = element_text(size = 12), legend.text = element_text(size = 12), legend.title = element_text(size = 12), legend.position = 'bottom', legend.box = 'vertical', legend.margin = margin(-10, 0, 0, 0) ) + scale_color_manual(name = 'Smoking status', values = scale_palette[c(1:3)]) + scale_y_continuous(limits = c(-0.14, 0.13)) + scale_x_continuous(limits = c(-0.18, 0.20)) + scale_size(range = c(2, 8)) )
plot( ggplot(subset( smoker.wu.data, smokingstatus %in% c("Cigarette", "Never smoker", "Former smoker", "Secondhand") )) + geom_point( aes(Axis.1, Axis.2, color = smokingstatus, size = weeksquit), alpha = 0.7 ) + theme_bw() + scale_color_manual(name = 'Smoking status', values = scale_palette[c(1:4)]) + scale_y_continuous(limits = c(-0.14, 0.13)) + scale_x_continuous(limits = c(-0.18, 0.20)) + scale_size(range = c(2, 8)) )
plot( ggplot(smoker.wu.data) + geom_point( aes(Axis.1, Axis.2, color = smokingstatus, size = weeksquit), alpha = 0.7 ) + theme_bw() + scale_color_manual(name = 'Smoking status', values = scale_palette) + scale_y_continuous(limits = c(-0.14, 0.13)) + scale_x_continuous(limits = c(-0.18, 0.20)) + scale_size(range = c(2, 8)) )
plot( ggplot() + xlab("Axis.1") + ylab("Axis.2") + theme_bw() + geom_point( data = smoker.wu.data[smoker.wu.data$COTININE < 1, ], aes(Axis.1, Axis.2), size = 3, alpha = 1, color = "black" ) + geom_point( data = smoker.wu.data[smoker.wu.data$COTININE > 1, ], aes(Axis.1, Axis.2, color = COTININE), size = 3, alpha = 0.7 ) + scale_y_continuous(limits = c(-0.14, 0.13)) + scale_x_continuous(limits = c(-0.18, 0.20)) + scale_color_gradient(low = "blue", high = "red", name = "Cotinine (ng/ml)") + scale_size(range = c(2, 8)) )
metadata <- as(sample_data(NYC_HANES), "data.frame") permanova.res <- data.frame() for (s in combn(levels(metadata$smokingstatus), 2, simplify = FALSE)) { metadata_2cat <- metadata[metadata$smokingstatus %in% s, ] distwu_2cat <- phyloseq::distance( subset_samples(NYC_HANES, smokingstatus %in% s), "wunifrac" ) permanova <- vegan::adonis2( formula = distwu_2cat ~ smokingstatus + OHQ_3 + RACE + GENDER + AGEGRP4C + SR_ACTIVE + EDU3CAT + DBTS_NEW, data = metadata_2cat ) permanova.res %<>% bind_rows( data.frame( contrast = stringr::str_c(s, collapse = " vs "), r2 = permanova[1, 'R2'], pvalue = permanova[1, 'Pr(>F)'] ) ) %>% arrange(pvalue) } knitr::kable(permanova.res)
s <- c("Cigarette", "Never smoker") metadata <- as(sample_data(NYC_HANES), 'data.frame') metadata_2cat <- metadata[metadata$smokingstatus %in% s, ] distwu_2cat <- phyloseq::distance(subset_samples(NYC_HANES, smokingstatus %in% s), "wunifrac") permanova <- vegan::adonis2(data = metadata_2cat, distwu_2cat ~ COTININE) permanova
metadata_3cat <- metadata[metadata$smokingstatus %in% c("Cigarette", "Never smoker", "Former smoker"), ] distwu_3cat <- phyloseq::distance(subset_samples( NYC_HANES, smokingstatus %in% c("Cigarette", "Never smoker", "Former smoker") ), "wunifrac") vegan::adonis2( distwu_3cat ~ smokingstatus + RACE + GENDER + AGEGRP4C + SR_ACTIVE + EDU3CAT + DBTS_NEW, metadata_3cat )
vegan::adonis2(distwu ~ smokingstatus, metadata)
DESeq2 is presented as sensitivity analysis. Main results are calculated by edgeR.
threshold <- 0.05 dds <- DESeq2::DESeq(phyloseq_to_deseq2(NYC_HANES, design = ~ smokingstatus), parallel = TRUE) ## here "Current smoker" is the numerator, "Never smoker" is the denominator in fold-change calculation res.crude <- DESeq2::results(dds, contrast = c("smokingstatus", "Cigarette", "Never smoker")) res.filtered <- res.crude[!is.na(res.crude$padj), ] res.filtered <- res.filtered[res.filtered$padj < threshold, ] ## Create a table with the taxonomy of the significant OTUs res.tax <- cbind(as.data.frame(res.filtered), as.matrix(tax_table(NYC_HANES)[rownames(res.filtered), ])) res.maxfold <- tapply(res.tax$log2FoldChange, res.tax$Genus, function(x) max(x)) res.maxfold <- sort(res.maxfold, T) # res.tax["New.ReferenceOTU179","Genus"] <- res.tax["New.ReferenceOTU179","Phylum"] %>% as.character res.tax$Genus <- factor(as.character(res.tax$Genus), levels = names(res.maxfold)) res.tax$type <- "Current/Never" res.tax <- res.tax[order(res.tax$log2FoldChange), ]
plot( ggplot(right_join(biosis, res.tax, by = c("X1" = "Genus")), aes(log2FoldChange, X1)) + geom_vline( xintercept = 0.0, color = "gray", size = 0.5 ) + geom_point(size = 3, aes(color = X2)) + facet_grid( Phylum ~ ., scales = "free", space = "free_y", switch = "y", as.table = TRUE ) + theme_bw() + scale_y_discrete(position = "right") + scale_color_discrete(name = "Oxygen requirement") + # guides(color=FALSE) + theme( axis.text = element_text( hjust = 0, vjust = 0.5, size = 11 ), strip.text = element_text(size = 9), axis.title = element_text(size = 9), axis.title.y = element_blank(), strip.background = element_rect(colour = "transparent", fill = "transparent"), strip.text.y = element_text(angle = 180) ) )
## Counfounders added in the design's formula dds.conf <- DESeq2::DESeq( phyloseq_to_deseq2( NYC_HANES, design = ~ AGEGRP4C + GENDER + RACE + SR_ACTIVE + EDU3CAT + DBTS_NEW + smokingstatus ) , parallel = TRUE, quiet = TRUE ) ##Use two out of three categories in the contrast variable res.conf <- DESeq2::results( dds.conf, contrast = c("smokingstatus", "Cigarette", "Never smoker"), pAdjustMethod = "BH" ) res.filtered.conf <- res.conf[!is.na(res.conf$padj), ] res.filtered.conf <- res.filtered.conf[res.filtered.conf$padj < threshold, ] #Create result table with DA OTUs tt <- tax_table(NYC_HANES)[rownames(res.filtered.conf), ] if (all.equal(rownames(res.filtered.conf), rownames(tt))) { res.tax.conf <- cbind(as.data.frame(res.filtered.conf), as.matrix(tt)) } else{ stop("rowname mismatch for res.tax.conf") } res.tax.conf <- res.tax.conf[!is.na(res.tax.conf$Family), ] res.maxfold.conf <- tapply(res.tax.conf$log2FoldChange, res.tax.conf$Genus, function(x) max(x)) res.maxfold.conf <- sort(res.maxfold.conf, T) res.tax.conf$Genus <- as.character(res.tax.conf$Genus) res.tax.conf["New.ReferenceOTU148", "Genus"] <- "Neisseriaceae" res.tax.conf["New.ReferenceOTU137", "Genus"] <- "Neisseriaceae" res.tax.conf["New.ReferenceOTU192", "Genus"] <- "Pasteurellaceae" res.tax.conf$Genus <- factor(as.character(res.tax.conf$Genus), levels = names(res.maxfold.conf)) res.tax.conf$type <- "Cigarette/Never + conf"
plot( ggplot( right_join(biosis, res.tax.conf, by = c("X1" = "Genus")), aes(log2FoldChange, X1) ) + geom_vline( xintercept = 0.0, color = "gray", size = 0.5 ) + geom_point(size = 3, aes(color = X2)) + facet_grid( Phylum ~ ., scales = "free", space = "free_y", switch = "y", as.table = TRUE ) + theme_bw() + # guides(color=FALSE) + facet_grid( Phylum ~ ., scales = "free", space = "free_y", switch = "y", as.table = TRUE ) + scale_y_discrete(position = "right") + scale_color_discrete(name = "Oxygen requirement") + # guides(color=FALSE) + theme( axis.text = element_text( hjust = 0, vjust = 0.5, size = 11 ), strip.text = element_text(size = 9), axis.title = element_text(size = 9), axis.title.y = element_blank(), strip.background = element_rect(colour = "transparent", fill = "transparent"), strip.text.y = element_text(angle = 180) ) )
#Take log2fc from crude and adjusted analysis. The pvalue and FDR are from the crude analysis stopifnot(all.equal(rownames(res.crude), rownames(res.conf))) compardat <- data.frame( crude = res.crude$log2FoldChange, pvalue = res.crude$pvalue, padj = res.crude$padj, adjusted = res.conf$log2FoldChange ) #Set pvalues with NAs at 1 compardat$padj[is.na(compardat$padj)] <- 1
ggplot(compardat) + geom_point(aes(crude, adjusted, color = padj < 0.05), size = 2) + geom_abline() + scale_colour_manual(values = setNames(c('black', 'grey'), c(TRUE, FALSE)), guide = FALSE) + # scale_x_continuous(limits = c(-2,2))+ # scale_y_continuous(limits = c(-2,2))+ theme_bw() + theme( axis.text = element_text( hjust = 0, vjust = 0.5, size = 17 ), strip.text = element_text(size = 13), axis.title = element_text(size = 15) )
#Extract all samples marked as SHS NYC_HANEScot <- subset_samples(NYC_HANES, smokingstatus %in% c("Secondhand")) #NYC_HANEScot <- subset_samples(NYC_HANEScot, smokingstatus %in% c("Secondhand") & COTININE < 3) #Build DESeq2 object using continuous cotinine variable ddscot <- DESeq2::DESeq(phyloseq_to_deseq2(NYC_HANEScot, design = ~ COTININE), parallel = TRUE) rescot.crude <- DESeq2::results(ddscot, alpha = 0.05) #Select OTUs significant in the crude smokers vs non-smokers model. #Correlation is calculated by taking the log2FC from the crude smokers vs non-smokers model and the crude cotinine model ( corr <- cor.test( res.crude$log2FoldChange[res.crude$padj < 0.05], rescot.crude$log2FoldChange[res.crude$padj < 0.05], use = "pairwise.complete.obs" ) ) if (identical(all.equal(rownames(rescot.crude), rownames(res.crude)), TRUE)) { cor.data <- data.frame(smoks = res.crude$log2FoldChange[res.crude$padj < 0.05], cot = rescot.crude$log2FoldChange[res.crude$padj < 0.05]) } else{ stop("mismatch in cotinine/smoking rownames") }
plot( ggplot(cor.data, aes(smoks, cot)) + theme_bw() + xlab("Crude Smokers vs. Non-smokers") + ylab("Crude Cotinine Levels") + geom_text(aes( 0.5, 0.25, label = stringr::str_c( 'p-value: ', signif(corr$p.value, 2), '\nCorr: ', signif(corr$estimate, 2) ) ), color = "black") + geom_point(size = 4, color = '#e41a1c') + geom_smooth(method = lm, se = FALSE, color = "red") + # scale_x_continuous(limits = c(-1.7,1.7)) + # scale_y_continuous(limits = c(-0.17,0.3)) + theme( axis.text = element_text( hjust = 0, vjust = 0.5, size = 13 ), strip.text = element_text(size = 13), axis.title = element_text(size = 15) ) )
#Set Never smokers as first level sample_data(NYC_HANES)$smokingstatus <- relevel(sample_data(NYC_HANES)$smokingstatus, "Never smoker") dasmoking.crude <- get_edgeR_results( ~ smokingstatus, pseq=NYC_HANES, coef = 2, alph = 0.05, method = "BH") dasmoking.crude$table <- dasmoking.crude$table[sort(dasmoking.crude$table %>% rownames), ]
dasmoking.crude$table$Genus <- as.character(dasmoking.crude$table$Genus) dasmoking.crude$table[c("New.CleanUp.ReferenceOTU8965", "New.ReferenceOTU179"), 'Genus'] <- 'uncultured' dasmoking.crude$table['New.ReferenceOTU72', 'Genus'] <- 'Family XIII' dasmoking.crude$table$Genus[is.na(dasmoking.crude$table$Genus)] <- 'unclassified' ord <- order(dasmoking.crude$table[['Phylum']], dasmoking.crude$table$logFC, decreasing = TRUE) dasmoking.crude$table$Genus <- factor(dasmoking.crude$table$Genus, levels = unique(dasmoking.crude$table$Genus[ord])) phyla_colors <- c( '#00B2FFFF', '#FF9900FF', '#FF0000FF', '#0DFF00FF', '#00FF9FFF', '#A600FFFF', '#B9FF00FF', '#FF00ACFF', '#0006FFFF', '#ababab' ) p <- ggplot2::ggplot(dasmoking.crude$table, ggplot2::aes(x = logFC, y = Genus, color = Phylum)) + ggplot2::geom_vline(xintercept = 0.0, color = "gray", size = 0.5) + ggplot2::geom_point(size = 3) + ggplot2::guides(color = ggplot2::guide_legend(title = 'Phylum')) + ggplot2::theme_light() + ggplot2::theme( axis.text.x = ggplot2::element_text( angle = -90, hjust = 0, vjust = 0.5 ), axis.text.y = ggplot2::element_text(face = "italic"), axis.text = ggplot2::element_text(size = 12), legend.text = ggplot2::element_text(size = 12), legend.box.margin = margin(-27, 0, -27, -27), legend.title = ggplot2::element_text(size = 14), title = ggplot2::element_text(size = 15), plot.title = ggplot2::element_text(margin = ggplot2::margin(b = 26)) ) + ggplot2::scale_y_discrete(position = "right") + ggplot2::labs(x = "Log2-fold change") + ggplot2::xlim(-3.1, 3.1) + ggplot2::ggtitle('Cigarette vs. Never smoker') + ggplot2::scale_color_manual(values = phyla_colors) p1 <- p + ggplot2::annotation_custom( grob = grid::textGrob('Lower in smokers'), xmin = -1.8, xmax = -1.8, ymin = 29.7, ymax = 29.7 ) + ggplot2::annotation_custom( grob = grid::textGrob('Higher in smokers'), xmin = 1.8, xmax = 1.8, ymin = 29.7, ymax = 29.7 ) gg_table <- ggplot_gtable(ggplot_build(p1)) gg_table$layout$clip[gg_table$layout$name == "panel"] <- "off" grid::grid.draw(gg_table)
sample_data(NYC_HANES.phylum)$smokingstatus <- relevel(sample_data(NYC_HANES.phylum)$smokingstatus, "Never smoker") DGELRT <- get_edgeR_results( ~ smokingstatus, pseq = NYC_HANES.phylum, coef = 2, alph = 0.05, method = "BH" ) p <- ggplot2::ggplot(DGELRT$table, ggplot2::aes(x = logFC, y = Phylum, color = Phylum)) + ggplot2::geom_vline(xintercept = 0.0, color = "gray", size = 0.5) + ggplot2::geom_point(size = 3) + ggplot2::guides(color = ggplot2::guide_legend(title = 'Phylum')) + ggplot2::theme_light() + ggplot2::theme( axis.text.x = ggplot2::element_text( angle = -90, hjust = 0, vjust = 0.5 ), axis.text.y = ggplot2::element_text(face = "italic"), axis.text = ggplot2::element_text(size = 12), legend.text = ggplot2::element_text(size = 12), legend.box.margin = margin(-27, 0, -27, -27), legend.title = ggplot2::element_text(size = 14), title = ggplot2::element_text(size = 15), plot.title = ggplot2::element_text(margin = ggplot2::margin(b = 26)) ) + ggplot2::scale_y_discrete(position = "right") + ggplot2::labs(x = "Log2-fold change") + ggplot2::xlim(-3.1, 3.1) + ggplot2::ggtitle('Cigarette vs. Never smoker') + ggplot2::scale_color_manual(values = phyla_colors) p1 <- p + ggplot2::annotation_custom( grob = grid::textGrob('Lower in smokers', gp = grid::gpar(col = "black", fontsize = 11)), xmin = -1.8, xmax = -1.8, ymin = 5.8, ymax = 5.8 ) + ggplot2::annotation_custom( grob = grid::textGrob('Higher in smokers', gp = grid::gpar(col = "black", fontsize = 11)), xmin = 1.8, xmax = 1.8, ymin = 5.8, ymax = 5.8 ) gg_table <- ggplot_gtable(ggplot_build(p1)) gg_table$layout$clip[gg_table$layout$name == "panel"] <- "off" grid::grid.draw(gg_table)
#Adjusted model with race, sex, age, physical activity, education level, diabetes status (based on serum HbA1c) and self-reported gum disease da.adj <- get_edgeR_results( ~ smokingstatus + AGEGRP4C + GENDER + RACE + SR_ACTIVE + EDU3CAT + DBTS_NEW, coef = 2, alph = 0.05 ) da.adj <- da.adj[sort(da.adj %>% rownames), ]
da.adj$table$Genus <- as.character(da.adj$table$Genus) da.adj$table['New.ReferenceOTU72', 'Genus'] <- 'Family XIII' da.adj$table$Genus[is.na(da.adj$table$Genus)] <- 'unclassified' ord <- order(da.adj$table[['Phylum']], da.adj$table$logFC, decreasing = TRUE) da.adj$table$Genus <- factor(da.adj$table$Genus, levels = unique(da.adj$table$Genus[ord])) phyla_colors <- c('#FF9900FF', '#0DFF00FF', '#00FF9FFF', '#A600FFFF', '#FF00ACFF') p <- ggplot2::ggplot(da.adj$table, ggplot2::aes(x = logFC, y = Genus, color = Phylum)) + ggplot2::geom_vline(xintercept = 0.0, color = "gray", size = 0.5) + ggplot2::geom_point(size = 3) + ggplot2::guides(color = ggplot2::guide_legend(title = 'Phylum')) + ggplot2::theme_light() + ggplot2::theme( axis.text.x = ggplot2::element_text( angle = -90, hjust = 0, vjust = 0.5 ), # axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t=25)), axis.text.y = ggplot2::element_text(face = "italic"), axis.text = ggplot2::element_text(size = 12), legend.text = ggplot2::element_text(size = 12), legend.title = ggplot2::element_text(size = 14), title = ggplot2::element_text(size = 15), plot.title = ggplot2::element_text(margin = ggplot2::margin(b = 26)) ) + ggplot2::scale_y_discrete(position = "right") + ggplot2::xlim(-2.5, 2.5) + ggplot2::labs(x = "Log2-fold change") + ggplot2::ggtitle('Cigarette vs. Never smoker') + ggplot2::scale_color_manual(values = phyla_colors) p p1 <- p + ggplot2::annotation_custom( grob = grid::textGrob('Lower in smokers'), xmin = -1.4, xmax = -1.4, ymin = 17.2, ymax = 17.2 ) + ggplot2::annotation_custom( grob = grid::textGrob('Higher in smokers'), xmin = 1.4, xmax = 1.4, ymin = 17.2, ymax = 17.2 ) gg_table <- ggplot_gtable(ggplot_build(p1)) gg_table$layout$clip[gg_table$layout$name == "panel"] <- "off" grid::grid.draw(gg_table)
#Get crude edgeR results without filtering for FDR dasmoking.crude <- get_edgeR_results( ~ smokingstatus, coef = 2, alph = 2, method = "BH") dasmoking.crude$table <- dasmoking.crude$table[sort(dasmoking.crude$table %>% rownames), ] #Get adjusted edgeR results without filtering for FDR da.adj <- get_edgeR_results( ~ smokingstatus + RACE + GENDER + AGEGRP4C + SR_ACTIVE + EDU3CAT + DBTS_NEW + OHQ_3, coef = 2, alph = 2 ) da.adj <- da.adj[sort(da.adj %>% rownames), ] # Build result table by taking log2FC from crude and adjusted models and FDR from the crude one. if (identical(all.equal(rownames(da.adj), rownames(dasmoking.crude)), TRUE)) { compardat <- data.frame( crude = dasmoking.crude$table$logFC, padj = dasmoking.crude$table$FDR, adjusted = da.adj$table$logFC ) } else{ stop("rownames mismatch between da.adj and dasmoking.crude") }
plot( ggplot(compardat) + geom_abline( color = "black", alpha = 0.5, size = 1 ) + geom_point(aes(crude, adjusted, alpha = padj < 0.05), size = 2) + scale_colour_manual(values = setNames(c('grey', 'black'), c(FALSE, TRUE)), guide = FALSE) + scale_x_continuous(limits = c(-2, 2)) + scale_y_continuous(limits = c(-2, 2)) + xlab("Crude log2 fold change") + ylab("Adjusted log2 fold change") + theme_bw() + theme( axis.text = element_text( hjust = 0, vjust = 0.5, size = 15 ), strip.text = element_text(size = 13), axis.title = element_text(size = 14), legend.position = 'bottom' ) )
A function for merging smoking and cotinine edgeR results:
mergeResults <- function(obj.smoking, obj.cotinine) { intersect.filters <- intersect(rownames(obj.smoking), rownames(obj.cotinine)) intersect.filters <- sort(intersect.filters) obj.smoking <- obj.smoking[intersect.filters, ] obj.cotinine <- obj.cotinine[intersect.filters, ] stopifnot(all.equal(rownames(obj.smoking), rownames(obj.cotinine))) ## res <- data.frame( logFC_smoking = obj.smoking$table$logFC, logFC_cotinine = obj.cotinine$table$logFC, FDR_smoking = obj.smoking$table$FDR, FDR_cotinine = obj.cotinine$table$FDR, OTU = rownames(obj.smoking$table) ) res <- cbind(res, obj.smoking[, c("Phylum", "Class", "Order", "Family", "Genus", "Species")]) return(res) }
And a plotting function:
plotSmokingcotinineComparison <- function(obj.smoking, obj.cotinine, labtext = "Crude", FDRcutoff = 1) { dataset <- mergeResults(obj.smoking, obj.cotinine) dataset <- dataset[dataset$FDR_smoking < FDRcutoff,] corr.result <- with(dataset, cor.test(logFC_smoking, logFC_cotinine)) p <- ggplot(dataset) + theme_bw() + xlab(paste(labtext, "Smokers vs. Non-smokers")) + ylab(paste(labtext, "Cotinine Levels (ng/ml)")) + geom_text(aes(1,-0.3, label = sprintf( "p-value: %s\ncor: %s \nFDR<0.05: %s", signif(corr.result$p.value, 2), signif(corr.result$estimate, 2), sum(dataset$FDR_smoking < 0.05) )), size = 5) + geom_point(aes(logFC_smoking, logFC_cotinine, color = FDR_smoking <= 0.05), size = 3) + scale_colour_manual(values = setNames(c('black', 'grey'), c(TRUE, FALSE)), guide = FALSE) + geom_smooth( aes(logFC_smoking, logFC_cotinine), method = lm, se = FALSE, color = "red" ) + theme( axis.text = element_text( hjust = 0, vjust = 0.5, size = 13 ), strip.text = element_text(size = 13), axis.title = element_text(size = 15) ) return(p) }
Two thresholds for serum cotinine:
NYC_HANES.secondhand.original <- subset_samples(NYC_HANES, smokingstatus %in% c("Secondhand")) NYC_HANES.secondhand.strict <- subset_samples(NYC_HANES.secondhand.original, COTININE < 10) sort(sample_data(NYC_HANES.secondhand.original)$COTININE) summary(sample_data(NYC_HANES.secondhand.original)$COTININE > 10)
#Get crude edgeR results for model built with continuous cotinin without filtering for FDR dacot.crude <- get_edgeR_results(~ COTININE, pseq = NYC_HANES.secondhand.original, filtering=TRUE, # pre-filtering alph = 2, #don't apply alpha cutoff method = "BH") #Get crude edgeR results using strict definition of second-hand smokers dacot.crude.strict <- get_edgeR_results(~ COTININE, pseq = NYC_HANES.secondhand.strict, filtering=TRUE, # pre-filtering alph = 2, #don't apply alpha cutoff method = "BH") #Get crude edgeR results for model smokers vs non-smokers without filtering dasmoking.crude <- get_edgeR_results(~ smokingstatus, pseq=NYC_HANES, filtering=TRUE, # pre-filtering alph = 2, #don't apply alpha cutoff coef = 2, #use non-smokers as reference method = "BH") edger.smoker_secondhand_crude <- mergeResults(dasmoking.crude, dacot.crude) edger.smoker_secondhand_crude.strict <- mergeResults(dasmoking.crude, dacot.crude.strict)
Number of OTUs with FDR < 0.05 and total that passed non-specific screens in both analysis:
summary(edger.smoker_secondhand_crude$FDR_smoking < 0.05) # note that the different total number for strict cotinine cutoff is # due to non-specific filtering in edgeR summary(edger.smoker_secondhand_crude.strict$FDR_smoking < 0.05)
corarray <- array(dim=c(2, 2, 2), dimnames=list(secondhand=c("cot14", "cot10"), counfounding=c("crude", "adjusted"), FDRcutoff=c("no", "yes"))) parray <- array(dim=c(2, 2, 2), dimnames=list(secondhand=c("cot14", "cot10"), counfounding=c("crude", "adjusted"), FDRcutoff=c("no", "yes")))
Correlation between crude smoking and cotinine coefficients, all OTUs:
corr <- with(edger.smoker_secondhand_crude, cor.test(logFC_smoking, logFC_cotinine)) parray["cot14", "crude", "no"] <- corr$p.value corarray["cot14", "crude", "no"] <- corr$estimate
Strict definition of second-hand smoke:
corr <- with(edger.smoker_secondhand_crude.strict, cor.test(logFC_smoking, logFC_cotinine)) parray["cot10", "crude", "no"] <- corr$p.value corarray["cot10", "crude", "no"] <- corr$estimate
Correlation between crude smoking and cotinine coefficients, FDR < 0.05 in smoking:
corr <- with(filter(edger.smoker_secondhand_crude, FDR_smoking < 0.05), cor.test(logFC_smoking, logFC_cotinine)) corarray["cot10", "crude", "yes"] <- corr$p.value parray["cot10", "crude", "yes"] <- corr$estimate
All OTUs, black for FDR < 0.05 on smoking coefficients, grey for FDR >= 0.05
p <- plotSmokingcotinineComparison(dasmoking.crude, dacot.crude, labtext = "Crude", FDR = 1) p
Repeat, using the strict definition of second-hand smokers (cotinine < 10)
p <- plotSmokingcotinineComparison(dasmoking.crude, dacot.crude.strict, labtext = "Crude", FDR = 1) p
dacot.adjusted <- get_edgeR_results(~ COTININE + AGEGRP4C + EDU3CAT, pseq = NYC_HANES.secondhand.original, alph = 2, filtering = TRUE, method = "BH" ) dacot.adjusted.strict <- get_edgeR_results(~ COTININE + AGEGRP4C + EDU3CAT, pseq = NYC_HANES.secondhand.strict, alph = 2, filtering = TRUE, method = "BH" ) dasmoking.adjusted <- get_edgeR_results(~ smokingstatus + AGEGRP4C + EDU3CAT, coef = 2, alph = 2, filtering = TRUE ) edger.smoker_secondhand_adjusted <- mergeResults(dasmoking.adjusted, dacot.adjusted) edger.smoker_secondhand_adjusted.strict <- mergeResults(dasmoking.adjusted, dacot.adjusted.strict)
Number of differentially abundant OTUs, and total
nrow(edger.smoker_secondhand_adjusted) summary(edger.smoker_secondhand_adjusted$logFC_smoking < 0.05) summary(edger.smoker_secondhand_adjusted$logFC_cotinine < 0.05)
Correlation between adjusted smoking and cotinine coefficients, all OTUs:
corr <- with(edger.smoker_secondhand_adjusted, cor.test(logFC_smoking, logFC_cotinine)) parray["cot14", "adjusted", "no"] <- corr$p.value corarray["cot14", "adjusted", "no"] <- corr$estimate
As above, using strict serum cotinine cutoff (<10)
corr <- with(edger.smoker_secondhand_adjusted.strict, cor.test(logFC_smoking, logFC_cotinine)) parray["cot10", "adjusted", "no"] <- corr$p.value corarray["cot10", "adjusted", "no"] <- corr$estimate
Correlation between adjusted smoking and cotinine coefficients, FDR < 0.05 in smoking:
with(filter(edger.smoker_secondhand_adjusted, FDR_smoking < 0.05), cor.test(logFC_smoking, logFC_cotinine)) parray["cot14", "adjusted", "yes"] <- corr$p.value corarray["cot14", "adjusted", "yes"] <- corr$estimate
Sensitivity analysis:
round(corarray, 2) signif(parray, 1)
p <- plotSmokingcotinineComparison( obj.smoking = dasmoking.adjusted, obj.cotinine = dacot.adjusted, labtext = "Adjusted", FDRcutoff = 1 ) p
Repeat, using the strict definition of second-hand smokers (cotinine < 10)
p <- plotSmokingcotinineComparison( obj.smoking = dasmoking.adjusted, obj.cotinine = dacot.adjusted.strict, labtext = "Adjusted", FDRcutoff = 1 ) p
stopifnot(all.equal(rownames(edger.smoker_secondhand_adjusted), rownames(edger.smoker_secondhand_crude))) crude <- edger.smoker_secondhand_crude[, 1:4] adjusted <- edger.smoker_secondhand_adjusted[, 1:4] colnames(crude) <- paste0(colnames(crude), "_crude") colnames(adjusted) <- paste0(colnames(adjusted), "_adjusted") sfile <- cbind(edger.smoker_secondhand_crude[, -1:-4], crude, adjusted) readr::write_excel_csv(sfile, path="SupplementalFile1.csv")
sample_data(NYC_HANES)$smokingstatus <- relevel(sample_data(NYC_HANES)$smokingstatus,"Never smoker") plot_edgeR(~ smokingstatus, pseq = NYC_HANES, coef=4, alph = 0.05, filtering = TRUE, method = "BH", color = "Phylum", sortby = "Phylum",invisbl = FALSE)
sample_data(NYC_HANES)$E_CIGARETTES <- factor(sample_data(NYC_HANES)$E_CIGARETTES,labels = c('Yes','No',NaN)) plot_edgeR(~ E_CIGARETTES, pseq = NYC_HANES, alph = 0.05, filtering = TRUE, method = "BH", sortby = "Phylum") -> edger.univ
sample_data(NYC_HANES)$HOOKAH_PIPE <- factor(sample_data(NYC_HANES)$HOOKAH_PIPE, labels = c('Yes','No',NaN)) plot_edgeR(~ HOOKAH_PIPE, pseq = NYC_HANES, alph = 0.05, filtering = TRUE, method = "BH", sortby = "Phylum", color = 'Phylum',invisbl = FALSE)
sample_data(NYC_HANES)$CIGARS_CIGARILLOS <- factor(sample_data(NYC_HANES)$CIGARS_CIGARILLOS,labels = c('Yes','No',NaN)) plot_edgeR(~ CIGARS_CIGARILLOS, pseq = NYC_HANES, alph = 0.05, filtering = TRUE, method = "BH", sortby = "Phylum", invisbl = FALSE)
R.utils::gcDLLs() dasmoking.crude <- get_edgeR_results(~ smokingstatus, coef = 2, alph = 2, filtering = FALSE, method = "BH") dasmoking.crude$table %<>% left_join(biosis, by = c("Genus"="X1")) dasmoking.crude <- dasmoking.crude$table dasmoking.crude$direction <- factor(ifelse(dasmoking.crude$logFC >0, 'Enriched in smokers', 'Depleted in smokers')) dasmoking.crude$direction <- factor(dasmoking.crude$direction, levels(dasmoking.crude$direction)[c(2,1)]) direction <- c('enriched in','depleted in;') x<- table(dasmoking.crude$X2 == 'Aerobic', dasmoking.crude$direction)[c(2,1),] rownames(x) <- c('OTU is aerobic', 'OTU is not aerobic') epitools::epitab(x) x<- table(dasmoking.crude$X2 == 'Anaerobic', dasmoking.crude$direction)[c(2,1),] rownames(x) <- c('OTU is anaerobic', 'OTU is not anaerobic') epitools::epitab(x) x<- table(dasmoking.crude$X2 == 'F Anaerobic', dasmoking.crude$direction)[c(2,1),] rownames(x) <- c('OTU is F Anaerobic', 'OTU is not F Anaerobic') epitools::epitab(x)
full_eset <- ExpressionSet( assayData = microbiome::abundances(NYC_HANES), phenoData = new("AnnotatedDataFrame", sample_data(NYC_HANES)) ) full_eset$CATCOTININE <- ifelse(full_eset$COTININE < 3,'low','high') full_annotated_ds <- tax_table(NYC_HANES) %>% as.data.frame %>% bind_cols(data.frame(OTU=rownames(tax_table(NYC_HANES)))) %>% filter(Domain != "Unassigned") %>% left_join(biosis, by = c("Genus"="X1")) alt_full_eset <- full_eset[,(full_eset$smokingstatus %in% c("Never smoker","Alternative smoker"))] alt_full_eset$smokingstatus <- droplevels(alt_full_eset$smokingstatus) levels(alt_full_eset$smokingstatus) <- c('Never','Alternative') alt_full_eset$GROUP <- ifelse(alt_full_eset$smokingstatus == "Alternative",1,0) EnrichmentBrowser::configEBrowser("OUTDIR.DEFAULT","./results")
sns.eset <- full_eset[,full_eset$smokingstatus %in% c("Cigarette","Never smoker")] NYC_HANES.sns <- NYC_HANES %>% subset_samples(., smokingstatus %in% c("Cigarette","Never smoker")) sns.eset$GROUP <- ifelse(sns.eset$smokingstatus == "Cigarette",1,0) de.sns_eset <- EnrichmentBrowser::deAna(sns.eset, de.method = 'edgeR', filter.by.expr = FALSE) design = model.matrix(~smokingstatus, data=data.frame(sample_data(NYC_HANES.sns))) otu_table.voom <- limma::voom(SummarizedExperiment::assay(de.sns_eset), design = design, plot = FALSE) SummarizedExperiment::assay(de.sns_eset) <- otu_table.voom$E class(SummarizedExperiment::assay(de.sns_eset)) <- "matrix" sns_ds <- full_annotated_ds %>% filter(OTU %in% taxa_names(NYC_HANES.sns)) sns_sAero <- (sns_ds %>% filter(X2=="Aerobic"))$OTU %>% as.character sns_sAnae <- (sns_ds %>% filter(X2=="Anaerobic"))$OTU %>% as.character sns_sFana <- (sns_ds %>% filter(X2=="F Anaerobic" | X2=="Aero / Facultative Anaerobic"))$OTU %>% as.character sns_my.gs <- list(aero=sns_sAero, anae=sns_sAnae, fana=sns_sFana) sns.gsea.res <- EnrichmentBrowser::sbea("gsea", de.sns_eset, sns_my.gs) EnrichmentBrowser::gsRanking(sns.gsea.res, signif.only=FALSE)
expr = SummarizedExperiment::assay(de.sns_eset) expr = expr[SummarizedExperiment::rowData(de.sns_eset)$ADJ.PVAL <= 0.05,] grp = factor(de.sns_eset$GROUP, labels = c('Never smoker','Cigarette')) expr <- t(scale(t(expr))) # expr <- expr[rownames(expr) %in% unlist(sns.gsea.res$gs$aero), ] expr <- expr[rownames(expr) %in% unlist(sns.gsea.res$gs), ] coll <- c("#B62A84", "#2AB68C") names(coll) <- levels(grp) coll <- list(Group = coll) df <- data.frame(Group = grp) ha <- ComplexHeatmap::HeatmapAnnotation(df = df, col=coll) rownames(expr) <- paste(data.frame(tax_table(NYC_HANES)[rownames(expr),])$Genus, '') ComplexHeatmap::Heatmap(expr, name = "Expression", top_annotation = ha, show_row_names = (nrow(expr) < 41), show_column_names = (ncol(expr) < 41), column_title = "Samples", row_title = "OTUs", cluster_rows = TRUE # row_order = unlist(gsea.res$gs) )
sns_ora.res <- EnrichmentBrowser::sbea("ora", de.sns_eset, sns_my.gs, perm=1000) EnrichmentBrowser::gsRanking(sns_ora.res, signif.only=FALSE)
NYC_HANES.sh <- NYC_HANES %>% subset_samples(., smokingstatus %in% c("Secondhand")) sh_eset <- full_eset[,full_eset$smokingstatus %in% c("Secondhand")] cotinine_quantiles <- quantile(sh_eset$COTININE ,c(0.33,0.66)) sh_eset$GROUP <- ifelse(sh_eset$COTININE < cotinine_quantiles[[1]], 0,ifelse(sh_eset$COTININE > cotinine_quantiles[[2]],1,NA)) subeset <- sh_eset[,!is.na(sh_eset$GROUP)] sh_design = model.matrix(~CATCOTININE, data = data.frame(pData(subeset))) de.sh_eset <- EnrichmentBrowser::deAna(subeset, de.method = 'edgeR', filter.by.expr = FALSE) otu_table.voom <- limma::voom(SummarizedExperiment::assay(de.sh_eset), design = sh_design, plot = FALSE) SummarizedExperiment::assay(de.sh_eset) <- otu_table.voom$E class(SummarizedExperiment::assay(de.sh_eset)) <- "matrix" sh_ds <- full_annotated_ds %>% dplyr::filter(OTU %in% taxa_names(NYC_HANES.sh)) sh_sAero <- (sh_ds %>% filter(X2=="Aerobic"))$OTU %>% as.character sh_sAnae <- (sh_ds %>% filter(X2=="Anaerobic"))$OTU %>% as.character sh_sFana <- (sh_ds %>% filter(X2=="F Anaerobic" | X2=="Aero / Facultative Anaerobic"))$OTU %>% as.character sh_my.gs <- list(aero=sh_sAero, anae=sh_sAnae, fana=sh_sFana) sh_gsea.res <- EnrichmentBrowser::sbea("gsea", de.sh_eset, sh_my.gs) EnrichmentBrowser::gsRanking(sh_gsea.res, signif.only=FALSE)
sh_ora.res <- EnrichmentBrowser::sbea("ora", de.sh_eset, sh_my.gs) EnrichmentBrowser::gsRanking(sh_ora.res, signif.only=FALSE)
sh.gsva <- GSVA::gsva(sh_eset, sh_my.gs, parallel.sz = 1) sh_fit <- limma::lmFit(sh.gsva, sh.gsva$COTININE) %>% eBayes() topTable(sh_fit)
NYC_HANES.sns.nc <- NYC_HANES %>% subset_samples(., smokingstatus %in% c("Cigarette","Never smoker") & !is.na(COTININE)) sns.nc.eset <- full_eset[,full_eset$smokingstatus %in% c("Cigarette","Never smoker") & !is.na(full_eset$COTININE)] sns.nc_ds <- full_annotated_ds %>% dplyr::filter(OTU %in% taxa_names(NYC_HANES.sns.nc)) sns.nc_sAero <- (sns.nc_ds %>% filter(X2=="Aerobic"))$OTU %>% as.character sns.nc_sAnae <- (sns.nc_ds %>% filter(X2=="Anaerobic"))$OTU %>% as.character sns.nc_sFana <- (sns.nc_ds %>% filter(X2=="F Anaerobic" | X2=="Aero / Facultative Anaerobic"))$OTU %>% as.character sns.nc_my.gs <- list(aero=sns.nc_sAero, anae=sns.nc_sAnae, fana=sns.nc_sFana) sns.nc.my.gsva <- GSVA::gsva(sns.nc.eset, sns.nc_my.gs, parallel.sz = 1) sns.nc.fit <- limma::lmFit(sns.nc.my.gsva, sns.nc.my.gsva$COTININE) %>% eBayes() topTable(sns.nc.fit)
NYC_HANES <- loadQiimeData() |> annotateFactors() alt_smokers_cigarettes <- NYC_HANES |> sample_data() |> data.frame() |> dplyr::filter(smokingstatus == 'Alternative smoker') |> dplyr::filter(CIGARETTES == 'Yes') |> dplyr::select(Burklab_ID) |> t() NYC_HANES <- prune_samples(!(sample_names(NYC_HANES) %in% alt_smokers_cigarettes), NYC_HANES) sample_data(NYC_HANES)$smokingstatus <- relevel(sample_data(NYC_HANES)$smokingstatus,"Never smoker") NYC_HANES.alt <- subset_samples(NYC_HANES, smokingstatus %in% c("Never smoker", "Alternative smoker")) levels(sample_data(NYC_HANES.alt)$smokingstatus) <- c('Never','Alternative') full_eset <- ExpressionSet(assayData = microbiome::abundances(NYC_HANES), phenoData = new("AnnotatedDataFrame", sample_data(NYC_HANES))) full_eset$CATCOTININE <- ifelse(full_eset$COTININE < 3,'low','high') full_annotated_ds <- tax_table(NYC_HANES) %>% as.data.frame %>% bind_cols(data.frame(OTU=rownames(tax_table(NYC_HANES)))) %>% filter(Domain != "Unassigned") %>% left_join(biosis, by = c("Genus"="X1")) alt_full_eset <- full_eset[,(full_eset$smokingstatus %in% c("Never smoker","Alternative smoker"))] alt_full_eset$smokingstatus <- droplevels(alt_full_eset$smokingstatus) levels(alt_full_eset$smokingstatus) <- c('Never','Alternative') alt_full_eset$GROUP <- ifelse(alt_full_eset$smokingstatus == "Alternative",1,0)
NYC_HANES.hookah <- subset_samples(NYC_HANES.alt, ((smokingstatus %in% c("Never")) | (HOOKAH_PIPE == 1))) hookah_eset <- alt_full_eset[,(alt_full_eset$smokingstatus %in% c("Never") | tidyr::replace_na(alt_full_eset$HOOKAH_PIPE == 1, FALSE))] write.csv(hookah_eset$smokingstatus, '/tmp/a') de.hookah_eset <- EnrichmentBrowser::deAna(hookah_eset, de.method = 'edgeR', filter.by.expr = FALSE) hookah_design <- model.matrix(~smokingstatus, data = data.frame(pData(hookah_eset))) otu_table.voom <- limma::voom(SummarizedExperiment::assay(de.hookah_eset), design = hookah_design , plot = FALSE) SummarizedExperiment::assay(de.hookah_eset) <- otu_table.voom$E class(SummarizedExperiment::assay(de.hookah_eset)) <- "matrix" hookah_ds <- full_annotated_ds %>% dplyr::filter(OTU %in% taxa_names(NYC_HANES.hookah)) hookah_sAero <- (hookah_ds %>% filter(X2=="Aerobic"))$OTU %>% as.character hookah_sAnae <- (hookah_ds %>% filter(X2=="Anaerobic"))$OTU %>% as.character hookah_sFana <- (hookah_ds %>% filter(X2=="F Anaerobic" | X2=="Aero / Facultative Anaerobic"))$OTU %>% as.character hookah_my.gs <- list(aero=hookah_sAero, anae=hookah_sAnae, fana=hookah_sFana) hookah_gsea.res <- EnrichmentBrowser::sbea("gsea", de.hookah_eset, hookah_my.gs) EnrichmentBrowser::gsRanking(hookah_gsea.res, signif.only=FALSE)
hookah_ora.res <- EnrichmentBrowser::sbea("ora", de.hookah_eset, hookah_my.gs, perm=0) EnrichmentBrowser::gsRanking(hookah_ora.res, signif.only=FALSE)
NYC_HANES.ecig <- NYC_HANES.alt %>% subset_samples(., smokingstatus %in% c("Never") | E_CIGARETTES == 1) ecig_eset <- alt_full_eset[,(alt_full_eset$smokingstatus %in% c("Never") | tidyr::replace_na(alt_full_eset$E_CIGARETTES == 1, FALSE))] de.ecig_eset <- EnrichmentBrowser::deAna(ecig_eset, de.method = 'edgeR', filter.by.expr = FALSE) ecig_design <- model.matrix(~smokingstatus, data = data.frame(pData(ecig_eset))) otu_table.voom <- limma::voom(SummarizedExperiment::assay(de.ecig_eset), design = ecig_design, plot = FALSE) SummarizedExperiment::assay(de.ecig_eset) <- otu_table.voom$E class(SummarizedExperiment::assay(de.ecig_eset)) <- "matrix" ecig_ds <- full_annotated_ds %>% dplyr::filter(OTU %in% taxa_names(NYC_HANES.ecig)) ecig_sAero <- (ecig_ds %>% filter(X2=="Aerobic"))$OTU %>% as.character ecig_sAnae <- (ecig_ds %>% filter(X2=="Anaerobic"))$OTU %>% as.character ecig_sFana <- (ecig_ds %>% filter(X2=="F Anaerobic" | X2=="Aero / Facultative Anaerobic"))$OTU %>% as.character ecig_my.gs <- list(aero=ecig_sAero, anae=ecig_sAnae, fana=ecig_sFana) ecig_gsea.res <- EnrichmentBrowser::sbea("gsea", de.ecig_eset, ecig_my.gs) EnrichmentBrowser::gsRanking(ecig_gsea.res, signif.only=FALSE)
ecig_ora.res <- EnrichmentBrowser::sbea("ora", de.ecig_eset, ecig_my.gs, perm=0) EnrichmentBrowser::gsRanking(ecig_ora.res, signif.only=FALSE)
NYC_HANES.cigars <- NYC_HANES.alt %>% subset_samples(., smokingstatus %in% c("Never") | CIGARS_CIGARILLOS == 1) cigars_eset <- alt_full_eset[,(alt_full_eset$smokingstatus %in% c("Never") | tidyr::replace_na(alt_full_eset$CIGARS_CIGARILLOS == 1, FALSE))] de.cigars_eset <- EnrichmentBrowser::deAna(cigars_eset, de.method = 'edgeR', filter.by.expr = FALSE) cigars_design <- model.matrix(~smokingstatus, data = data.frame(pData(cigars_eset))) otu_table.voom <- limma::voom(SummarizedExperiment::assay(de.cigars_eset), design = cigars_design, plot = FALSE) SummarizedExperiment::assay(de.cigars_eset) <- otu_table.voom$E class(SummarizedExperiment::assay(de.cigars_eset)) <- "matrix" cigars_ds <- full_annotated_ds %>% dplyr::filter(OTU %in% taxa_names(NYC_HANES.cigars)) cigars_sAero <- (cigars_ds %>% filter(X2=="Aerobic"))$OTU %>% as.character cigars_sAnae <- (cigars_ds %>% filter(X2=="Anaerobic"))$OTU %>% as.character cigars_sFana <- (cigars_ds %>% filter(X2=="F Anaerobic" | X2=="Aero / Facultative Anaerobic"))$OTU %>% as.character cigars_my.gs <- list(aero=cigars_sAero, anae=cigars_sAnae, fana=cigars_sFana) cigars_gsea.res <- EnrichmentBrowser::sbea("gsea", de.cigars_eset, cigars_my.gs) EnrichmentBrowser::gsRanking(cigars_gsea.res, signif.only=FALSE)
cigars_ora.res <- EnrichmentBrowser::sbea("ora", de.cigars_eset, cigars_my.gs, perm=0) EnrichmentBrowser::gsRanking(cigars_ora.res, signif.only=FALSE)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.