knitr::opts_chunk$set(echo = TRUE) suppressPackageStartupMessages({ library(vegan) library(RColorBrewer) library(phyloseq) library(dplyr) library(ggplot2) library(tidyr) library(stringr) library(DESeq2) library(edgeR) library(reshape2) library(tableone) library(knitr) library(phyloseq) library(GSEABase) library(magrittr) library(GSVA) library(SummarizedExperiment)})
```{bash, render=FALSE}
```r NYC_HANES <- loadQiimeData() %>% annotateFactors(.)
metadata <- data.frame(sample_data(NYC_HANES)) source("prettytable_functions.R") variablesToLook <- c("GENDER","RACE","EDU4CAT","SPAGE", "AGEGRP5C","BMI","DBTS_NEW","SR_ACTIVE","SMQ_7","INC25KMOD","POVGROUP6_0812CT","COTININE","MERCURYU","OHQ_5") factorVars <- c("AGEGRP5C", "RACE","EDU4CAT","DBTS_NEW","SR_ACTIVE","INC25KMOD","POVGROUP6_0812CT", "GENDER") table1 <- CreateTableOne(vars = variablesToLook,data = metadata, strata = "smokingstatus", factorVars = factorVars, includeNA = T,test = T) table1_data <- formatMetadata(NYC_HANES) table1 <- prettytable_autoindent(vars = names(table1_data),data = table1_data, includeNA = TRUE,test = FALSE)
NYC_HANES.genus <- tax_glom(NYC_HANES,taxrank = "Genus") NYC_HANES.genus.relab <- transform_sample_counts(NYC_HANES.genus, function(x) x/sum(x)) NYC_HANES.relab <- transform_sample_counts(NYC_HANES, function(x) x/sum(x)) NYC_HANES.genus.relab <- merge_taxa(NYC_HANES.genus, taxa_names(filter_taxa(NYC_HANES.genus.relab, function(x) mean(x) < 2e-4, T))) tax_table(NYC_HANES.genus.relab)[is.na(tax_table(NYC_HANES.genus.relab)[,"Genus"]),"Genus"] <- "Other" NYC_HANES.genus.relab <- tax_glom(transform_sample_counts(NYC_HANES.genus.relab, function(x) x/sum(x)),taxrank = "Genus") NYC_HANES.phylum <- tax_glom(NYC_HANES,taxrank = "Phylum") NYC_HANES.phylum.relab <- transform_sample_counts(NYC_HANES.phylum, function(x) x/sum(x))
plot_abundance(NYC_HANES, top_n = 8, prop="total",taxrank = "Genus") + labs(x="296 oral rinse samples") -> taxa taxa
dir.create("out_img") png("out_img/taxa_genus.png",800,600, res = 150, bg = "transparent") taxa + theme(axis.title = element_text(colour = "white"), axis.text = element_text(colour = "white"), title = element_text(colour = "white"), legend.text = element_text(colour = "white"), axis.ticks = element_line(colour = "white"), # panel.border = element_rect(colour = "white"), # panel.background = element_rect(fill = "transparent",colour = NA), # plot.background = element_rect(fill = "transparent",colour = NA), legend.background = element_rect(fill = "transparent",colour = NA), legend.key = element_rect(fill = "transparent",colour = NA), panel.grid = element_blank() ) dev.off()
alphadiv <- estimate_richness(NYC_HANES, measures = c("Observed","Chao1","Shannon", "Simpson"))[,-3] alphadiv <- cbind(alphadiv, sample_data(NYC_HANES)$smokingstatus) colnames(alphadiv)[5] <- c("smokingstatus") alphadiv.melted <- melt(alphadiv, id.vars = c("smokingstatus")) alpha.ss <- ggplot(alphadiv.melted,aes(smokingstatus,value)) + geom_boxplot(aes(fill=smokingstatus), color="white")+ facet_grid(variable ~., scales = "free", switch = 'y') + scale_x_discrete(labels = c("Cigarette\nsmoker\nn=111","Never\nsmoker\nn=43","Former\nsmoker\nn=45","Alternative\nsmoker\nn=58","Secondhand\nsmoke\nn=39"))+ scale_y_continuous(position = 'right') + scale_fill_manual(values = scale_palette) + guides(colour="none") + theme_bw() + theme(strip.background = element_rect(colour = "transparent", fill = "transparent"), strip.text.y = element_text(angle = 180))
aovSMOKINGSTATUS<-aov(Shannon ~ smokingstatus,alphadiv) summary(aovSMOKINGSTATUS) aovSMOKINGSTATUS<-aov(Observed ~ smokingstatus,alphadiv) summary(aovSMOKINGSTATUS) aovSMOKINGSTATUS<-aov(Chao1 ~ smokingstatus,alphadiv) summary(aovSMOKINGSTATUS) alpha.ss
dir.create("out_img/") png("out_img/alpha_smokingstatus.png",res = 120, height = 800, width = 1000, bg = 'transparent') alpha.ss + theme_transparent + ggtitle("Smoking categories")+ theme(axis.text = element_text(size = 13), strip.text = element_text(size = 13, colour = "white"), axis.title.x = element_blank(), axis.title = element_text(size = 15) ) dev.off()
distwu <- phyloseq::distance(NYC_HANES, "wunifrac") ordwu <- ordinate(NYC_HANES, method = "MDS", distance = distwu, weighted = TRUE) smoker.wu.data <- plot_ordination(NYC_HANES, ordwu,justDF = T) %>% arrange() smoker.wu.data$weeksquit <- as.numeric(smoker.wu.data[,"SMQ_4"]) * as.numeric(smoker.wu.data[,"SMQ_4UNIT"]) smoker.wu.data[!smoker.wu.data$SMOKER3CAT %in% "Former smoker","weeksquit"] <- 5 smoker.wu.data$CIGARETTES[is.na(smoker.wu.data$CIGARETTES)] <- "No" smoker.wu.data %<>% bind_cols(otu_table(NYC_HANES.phylum.relab) %>% t %>% data.frame) proteobacteria <- tax_table(NYC_HANES.phylum.relab)[tax_table(NYC_HANES.phylum.relab)[,"Phylum"]=="Proteobacteria"] %>% rownames
betadiv.proteobacteria <- ggplot(subset(smoker.wu.data,smokingstatus %in% c("Cigarette","Never smoker"))) + geom_point(aes(Axis.1,Axis.2, color=New.ReferenceOTU194), size=3) + theme_bw() + scale_color_gradient(name='Proteobacteria\nrelative\nabundance', low = "blue", high = "red") + scale_y_continuous(limits = c(-0.12,0.1)) + scale_x_continuous(limits = c(-0.17,0.14)) smoker.wu.noformer <- ggplot(subset(smoker.wu.data,smokingstatus %in% c("Cigarette","Never smoker"))) + geom_point(aes(Axis.1,Axis.2, color=smokingstatus), size=3, alpha=0.7) + theme_bw() + scale_color_manual(name='Smoking status', values = scale_palette[1:2]) + scale_y_continuous(limits = c(-0.12,0.1)) + scale_x_continuous(limits = c(-0.17,0.14)) smoker.wu.former.never <- ggplot(subset(smoker.wu.data,smokingstatus %in% c("Cigarette","Never smoker","Former smoker"))) + geom_point(aes(Axis.1,Axis.2, color=smokingstatus, size=weeksquit), alpha=0.7) + theme_bw() + scale_color_manual(name='Smoking status', values = scale_palette[c(1:3)]) + scale_y_continuous(limits = c(-0.12,0.1)) + scale_x_continuous(limits = c(-0.17,0.14)) + scale_size(range = c(2,8)) smoker.wu.second <- ggplot(subset(smoker.wu.data,smokingstatus %in% c("Cigarette","Never smoker","Former smoker","Secondhand"))) + geom_point(aes(Axis.1,Axis.2, color=smokingstatus, size=weeksquit), alpha=0.7) + theme_bw() + scale_color_manual(name='Smoking status', values = scale_palette[c(1:4)]) + scale_y_continuous(limits = c(-0.12,0.1)) + scale_x_continuous(limits = c(-0.17,0.14)) + scale_size(range = c(2,8)) smoker.wu <- ggplot(smoker.wu.data) + geom_point(aes(Axis.1,Axis.2, color=smokingstatus, size=weeksquit), alpha=0.7) + theme_bw() + scale_color_manual(name='Smoking status', values = scale_palette) + scale_y_continuous(limits = c(-0.12,0.1)) + scale_x_continuous(limits = c(-0.17,0.14)) + scale_size(range = c(2,8)) smoker.wu.alternative.cigarettes <- ggplot(subset(smoker.wu.data,smokingstatus %in% c("Cigarette","Alternative smoker"))) + geom_point(aes(Axis.1,Axis.2, color=smokingstatus, shape=CIGARETTES), alpha=0.7, size=3) + theme_bw() + scale_color_manual(name='Smoking status', values = scale_palette[c(1,5)]) + scale_shape_discrete(name='Alongside alternative ways,\ndo you also smoke cigarettes?') + scale_y_continuous(limits = c(-0.12,0.1)) + scale_x_continuous(limits = c(-0.17,0.14)) cotinine.gradient <- ggplot() + xlab("Axis.1")+ ylab("Axis.2")+ theme_bw() + geom_point(data=smoker.wu.data[smoker.wu.data$COTININE<1,], aes(Axis.1, Axis.2), size=3, alpha=1, color="black") + geom_point(data=smoker.wu.data[smoker.wu.data$COTININE>1,], aes(Axis.1, Axis.2, color=COTININE), size=3, alpha=0.7) + scale_y_continuous(limits = c(-0.12,0.1)) + scale_x_continuous(limits = c(-0.17,0.14)) + scale_color_gradient(low = "blue", high = "red", name = "Cotinine (ng/ml)") + scale_size(range = c(2,8))
smoker.wu.noformer
Betadisper results are not significant, meaning we cannot reject the null hypothesis that our groups have the same dispersions.
metadata <- as(sample_data(NYC_HANES),"data.frame") permanova.res <- data.frame() for(s in combn(levels(metadata$smokingstatus),2, simplify = FALSE)){ metadata_2cat <- metadata[metadata$smokingstatus %in% s,] distwu_2cat <- phyloseq::distance(subset_samples(NYC_HANES, smokingstatus %in% s), "wunifrac") permanova <- adonis(distwu_2cat ~ smokingstatus + RACE + GENDER + AGEGRP4C + SR_ACTIVE + EDU3CAT + DBTS_NEW, metadata_2cat, parallel = 5) beta_2cat <- betadisper(distwu_2cat, metadata_2cat$smokingstatus, type = "centroid") permetest <- permutest(beta_2cat,parallel = T) permanova.res %<>% bind_rows(data.frame(contrast = stringr::str_c(s, collapse = " vs "), r2=permanova$aov.tab[1,5],pvalue=permanova$aov.tab[1,6], betadisp=permetest$tab$`Pr(>F)`[1])) %>% arrange(pvalue) } knitr::kable(permanova.res)
metadata_3cat <- metadata[metadata$smokingstatus %in% c("Cigarette","Never smoker", "Former smoker"),] distwu_3cat <- phyloseq::distance(subset_samples(NYC_HANES, smokingstatus %in% c("Cigarette","Never smoker", "Former smoker")), "wunifrac") adonis(distwu_3cat ~ smokingstatus + RACE + GENDER + AGEGRP4C + SR_ACTIVE + EDU3CAT + DBTS_NEW, metadata_3cat)
adonis(distwu ~ smokingstatus, metadata)
smoker.wu.former.never
smoker.wu.second
smoker.wu
smoker.wu.alternative.cigarettes
cotinine.gradient
png("out_img/beta-proteobacteria.png",1000,800, res = 120, bg = "transparent") betadiv.proteobacteria + theme_transparent dev.off() png("out_img/beta-SMOKER3CAT-sns.png",1000,800, res = 120, bg = "transparent") smoker.wu.noformer +theme_transparent dev.off() png("out_img/beta-SMOKER3CAT.former.png",1000,800, res = 120, bg = "transparent") smoker.wu.former.never + theme_transparent dev.off() png("out_img/beta-SMOKER3CAT.secondhand.png",1000,800, res = 120, bg = "transparent") smoker.wu.second dev.off() png("out_img/beta-SMOKER3CAT.allcat.png",1000,800, res = 120, bg = "transparent") smoker.wu dev.off() png("out_img/beta-SMOKER3CAT.cigarette_alt.png",1000,800, res = 120, bg = "transparent") smoker.wu.alternative.cigarettes dev.off() png("out_img/beta-COTININE-wu.png",1080,800, res = 120, bg = "transparent") cotinine.gradient dev.off()
| Up in smokers | Down in smokers | | ------------- | --------------- | | Sterptococcus | Neisseria | | Eubacterium | Porphyromonas | | Megasphera | Gemella | | Veilonella | Capnocytophaga | | Eikenella | Fusobacterium | | Actinomyces | Corynebacterium | | Rothia mucilaginosa | | | Bifidobacterium longum | | | Atopobium spp | | | Haemophilus | |
threshold <- 0.05 dds <- DESeq(phyloseq_to_deseq2(NYC_HANES, design = ~ smokingstatus), parallel = TRUE) ## here "Current smoker" is the numerator, "Never smoker" is the denominator in fold-change calculation res <- DESeq2::results(dds, contrast = c("smokingstatus","Cigarette","Never smoker")) res.filtered <- res[!is.na(res$padj),] res.filtered <- res.filtered[res.filtered$padj < threshold,] ## Create a table with the taxonomy of the significant OTUs res.tax <- cbind(as.data.frame(res.filtered), as.matrix(tax_table(NYC_HANES)[rownames(res.filtered),])) dds.taxa <- rownames(res.tax) res.maxfold <- tapply(res.tax$log2FoldChange, res.tax$Genus, function(x) max(x)) res.maxfold <- sort(res.maxfold, T) # res.tax["New.ReferenceOTU179","Genus"] <- res.tax["New.ReferenceOTU179","Phylum"] %>% as.character res.tax$Genus <- factor(as.character(res.tax$Genus), levels = names(res.maxfold)) res.tax$type <- "Current/Never" res.tax <- res.tax[order(res.tax$log2FoldChange),] biosis <- read.csv("biosis.tsv",header = T, sep = '\t')
taxp <- ggplot(right_join(biosis, res.tax, by = c("X1" = "Genus")), aes(log2FoldChange, X1)) + geom_vline(xintercept = 0.0, color = "gray", size = 0.5) + geom_point(size=3, aes(color = X2)) + facet_grid(Phylum ~ ., scales = "free", space = "free_y",switch = "y",as.table = T)+ theme_bw() + scale_y_discrete(position = "right") + scale_color_discrete(name = "Oxygen requirement")+ # guides(color=FALSE) + theme(axis.text = element_text(hjust = 0, vjust=0.5, size = 11), strip.text = element_text(size = 9), axis.title = element_text(size = 9), axis.title.y = element_blank(), strip.background = element_rect(colour = "transparent", fill = "transparent"), strip.text.y = element_text(angle = 180) ) taxp
## Counfounders added in the design's formula dds.conf <- DESeq(phyloseq_to_deseq2(NYC_HANES, design = ~ RACE + GENDER + AGEGRP4C + SR_ACTIVE + EDU3CAT + DBTS_NEW + smokingstatus),parallel = TRUE) ##Use two out of three categories in the contrast variable res.conf <- results(dds.conf, contrast = c("smokingstatus","Cigarette","Never smoker"), pAdjustMethod = "BH") res.filtered.conf <- res.conf[!is.na(res.conf$padj),] res.filtered.conf <- res.filtered.conf[res.filtered.conf$padj < threshold,] res.tax.conf <- cbind(as.data.frame(res.filtered.conf), as.matrix(tax_table(NYC_HANES)[rownames(res.filtered.conf),])) res.tax.conf <- res.tax.conf[!is.na(res.tax.conf$Family),] dds.taxa.conf <- rownames(res.tax.conf) res.maxfold.conf <- tapply(res.tax.conf$log2FoldChange, res.tax.conf$Genus, function(x) max(x)) res.maxfold.conf <- sort(res.maxfold.conf, T) res.tax.conf["New.ReferenceOTU148","Genus"] <- "Neisseriaceae" res.tax.conf["New.ReferenceOTU137","Genus"] <- "Neisseriaceae" res.tax.conf["New.ReferenceOTU192","Genus"] <- "Pasteurellaceae" res.tax.conf$Genus <- factor(as.character(res.tax.conf$Genus), levels = names(res.maxfold.conf)) res.tax.conf$type <- "Cigarette/Never + conf"
source("edgeR_functions.R") covs <- c("RACE","GENDER","AGEGRP4C","SR_ACTIVE","EDU3CAT","DBTS_NEW") res1 <- list() for (i in c(1:6)) { form <- formula(paste("~", "smokingstatus+", str_c(covs[1:i], collapse = "+"))) dds1.conf <- get_edgeR_results(form , coef = 2, alph = 1) res1[[as.character(form)[2]]] <- data.frame(dds1.conf[rownames(dds1.conf$table) %>% sort,]) } res1.delta <- data.frame(res1[[1]]%>%rownames) for(i in c(2:6)){ res1.delta %<>% cbind(100*(res1[[i]]$logFC - res1[[i-1]]$logFC)/res1[[i]]$logFC %>% as.numeric() ) } lapply(res1.delta[,2:6], median)
taxp.conf <- ggplot(right_join(biosis, res.tax.conf, by = c("X1" = "Genus")), aes(log2FoldChange, X1)) + geom_vline(xintercept = 0.0, color = "gray", size = 0.5) + geom_point(size=3, aes(color = X2)) + facet_grid(Phylum~., scales = "free", space = "free_y",switch = "y",as.table = T) + theme_bw() + # guides(color=FALSE) + facet_grid(Phylum~., scales = "free", space = "free_y",switch = "y",as.table = T)+ scale_y_discrete( position = "right") + scale_color_discrete(name = "Oxygen requirement")+ # guides(color=FALSE) + theme(axis.text = element_text(hjust = 0, vjust=0.5, size = 11), strip.text = element_text(size = 9), axis.title = element_text(size = 9), axis.title.y = element_blank(), strip.background = element_rect(colour = "transparent", fill = "transparent"), strip.text.y = element_text(angle = 180) ) taxp.conf
compardat <- data.frame(univariate = res$log2FoldChange, pvalue = res$pvalue, padj = res$padj, multivariate = res.conf$log2FoldChange) compardat$padj[is.na(compardat$padj)] <- 1 compare.deseq2 <- ggplot(compardat) + geom_point(aes(univariate, multivariate, color=padj < 0.05), size=2) + geom_abline()+#color="white") + scale_colour_manual(values = setNames(c('#DCDC29','grey'),c(TRUE, FALSE)), guide=FALSE) + scale_x_continuous(limits = c(-2,2))+ scale_y_continuous(limits = c(-2,2))+ theme_bw() + theme( axis.text = element_text(hjust = 0, vjust=0.5, size = 17), strip.text = element_text(size = 13), axis.title = element_text(size = 15))
compare.deseq2
NYC_HANEScot <- subset_samples(NYC_HANES, smokingstatus %in% c("Secondhand")) ddscot <- DESeq(phyloseq_to_deseq2(NYC_HANEScot, design = ~ COTININE), parallel = T) ##Use two out of three categories in the contrast variable rescot <- results(ddscot, alpha = 0.05) cor.test(res$log2FoldChange, rescot$log2FoldChange, use="pairwise.complete.obs") corr <- cor.test(res$log2FoldChange[res$padj < 0.05], rescot$log2FoldChange[res$padj < 0.05], use="pairwise.complete.obs") corr cor.data <- data.frame(smoks=res$log2FoldChange[res$padj < 0.05], cot=rescot$log2FoldChange[res$padj < 0.05]) smokers_secondhand <- ggplot(cor.data,aes(smoks, cot)) + theme_bw() + xlab("Univariate Smokers vs. Non-smokers") + ylab("Univariate Cotinine Levels") + geom_text(aes(0,0.11,label = sprintf("p-value: %s\ncor: %s",signif(corr$p.value,2),signif(corr$estimate,2))), color="white") + geom_point(size=4, color = '#DCDC29') + geom_smooth(method=lm, se = FALSE,color="red") + # scale_y_continuous(limits = c(-2,2))+ scale_x_continuous(limits = c(-1.7,1.7)) + # theme_transparent + theme(axis.text = element_text(hjust = 0, vjust=0.5, size = 13), strip.text = element_text(size = 13), axis.title = element_text(size = 15))
smokers_secondhand
dir.create("out_img") png("out_img/deseq_smokerno.png",bg = "transparent", res = 300, units = "in", width = 6, height = 6) taxp + theme_transparent dev.off() png("out_img/deseq_smokerno_wconf.png",bg = "transparent", res = 300, units = "in", width = 6, height = 6) taxp.conf + theme_transparent dev.off() png("out_img/deseq_compare_univ_multiv.png",bg = "transparent", res = 300, units = "in", width = 6, height = 6) compare.deseq2 dev.off() png("out_img/deseq_compare_smokers_secondhand.png",650,550,bg = "transparent", res = 120) smokers_secondhand + theme_transparent dev.off()
source("edgeR_functions.R") sample_data(NYC_HANES)$smokingstatus <- relevel(sample_data(NYC_HANES)$smokingstatus,"Never smoker") da.univ <- get_edgeR_results(~ smokingstatus, coef = 2, alph = 2, filtering = F, method = "BH") da.univ <- da.univ[sort(da.univ %>% rownames),] plot_edgeR(~ smokingstatus, pseq = NYC_HANES, coef=2, alph = 0.05, filtering = T, method = "BH", color = "Phylum", sortby = "Phylum") -> edger.univ
da.multiv <- get_edgeR_results(~ smokingstatus +RACE + GENDER + AGEGRP4C + SR_ACTIVE + EDU3CAT + DBTS_NEW , coef = 2, alph = 2, filtering = F) da.multiv <- da.multiv[sort(da.multiv %>% rownames),] plot_edgeR(~ smokingstatus + RACE + GENDER + AGEGRP4C + SR_ACTIVE + EDU3CAT + DBTS_NEW , coef = 2, pseq = NYC_HANES, alph = 0.05, color = "Phylum", filtering=TRUE, sortby = "Phylum")-> edger.multiv
compardat <- data.frame(univariate = da.univ$table$logFC, padj = da.univ$table$PValue, multivariate = da.multiv$table[da.univ$table %>% rownames,]$logFC) compare.edger <- ggplot(compardat) + geom_abline(color="black", alpha=0.5, size=1) + geom_point(aes(univariate, multivariate, color=padj < 0.05), size=2) + scale_colour_manual(values = setNames(c('#e41a1c','grey'),c(TRUE, FALSE)), guide=FALSE) + scale_x_continuous(limits = c(-2,2))+ scale_y_continuous(limits = c(-2,2))+ xlab("Univariate Log fold change") + ylab("Multivariate Log fold change") + theme_bw() + theme( axis.text = element_text(hjust = 0, vjust=0.5, size = 15), strip.text = element_text(size = 13), axis.title = element_text(size = 14))
compare.edger
da.cotinine <- get_edgeR_results(~ COTININE, pseq = subset_samples(NYC_HANES, smokingstatus %in% c("Secondhand")), alph = 2, filtering = TRUE, method = "BH") da.cotinine <- da.cotinine[sort(da.cotinine %>% rownames),] edger.smoker_secondhand <- data.frame(smokers = da.univ$table[rownames(da.cotinine$table),"logFC"], secondhand = da.cotinine$table$logFC, pvalue = da.univ$table[rownames(da.cotinine$table),"FDR"]) edger.smoker_secondhand<-edger.smoker_secondhand[edger.smoker_secondhand$pvalue<0.05,] corr <- cor.test(edger.smoker_secondhand$secondhand, edger.smoker_secondhand$smokers,use="pairwise.complete.obs") corr
secondhand.compare <- ggplot(edger.smoker_secondhand) + theme_bw() + xlab("Univariate Smokers vs. Non-smokers") + ylab("Univariate Cotinine Levels") + geom_text(aes(1,0.4,label = sprintf("p-value: %s\ncor: %s",signif(corr$p.value,2),signif(corr$estimate,2)))) + geom_point(aes(smokers, secondhand, color=pvalue <= 0.05), size=2) + scale_colour_manual(values = setNames(c('#e41a1c','grey'),c(TRUE, FALSE)), guide=FALSE) + geom_smooth(aes(smokers, secondhand), method=lm, se = FALSE,color="red") + theme(axis.text = element_text(hjust = 0, vjust=0.5, size = 13), strip.text = element_text(size = 13), axis.title = element_text(size = 15))
secondhand.compare
sample_data(NYC_HANES)$smokingstatus <- relevel(sample_data(NYC_HANES)$smokingstatus,"Never smoker") da.univ <- get_edgeR_results(~ smokingstatus, coef = 4, alph = 2, filtering = F, method = "BH") da.univ <- da.univ[sort(da.univ %>% rownames),] plot_edgeR(~ smokingstatus, pseq = NYC_HANES, coef=4, alph = 0.05, filtering = T, method = "BH", color = "Phylum", sortby = "Phylum") -> edger.univ
dir.create("out_img") png("out_img/edgeR_smokerno.png",bg = "transparent", res = 300, units = "in", width = 6, height = 6) edger.univ + theme_transparent dev.off() png("out_img/edgeR_smokerno_wconf.png",bg = "transparent", res = 300, units = "in", width = 6, height = 6) edger.multiv + theme_transparent dev.off() png("out_img/edgeR_compare_univ_multiv.png",bg = "transparent", res = 300, units = "in", width = 6, height = 6) compare.edger dev.off() svg("out_img/edgeR_compare_univ_multiv.svg",bg = "transparent") compare.edger dev.off() png("out_img/edgeR_compare_smokers_secondhand.png",bg = "transparent", res = 300, units = "in", width = 6, height = 6) smokers_secondhand dev.off()
Odds ratio calculated on aerobic/anaerobic vs enrichment in smokers/never smokers Anaerobic because degradation of aeromatic compounds
*Odds ratio are now calculated using all logFCs and not only on the ones statically significant
x <- cbind(as.data.frame(rescot), as.matrix(tax_table(NYC_HANES)[rownames(rescot),])) %>% left_join(biosis, by = c("Genus"="X1")) %>% dplyr::select(X2,log2FoldChange) %>% dplyr::mutate(log2FoldChange > 0 ) %>% dplyr::select(-log2FoldChange) x$`log2FoldChange > 0` <- factor(x$`log2FoldChange > 0`,labels = c("Non Smokers","Smokers")) x.table <- table(x$X2, x$`log2FoldChange > 0`) OR_aer <- (x.table[1,1]/x.table[2,1])/(x.table[1,2]/x.table[2,2]) OR_fan <- (x.table[1,1]/x.table[3,1])/(x.table[1,2]/x.table[3,2]) "OR Aerobic vs anaerobic" OR_aer "OR F anaerobic vs anaerobic" OR_fan se_aer <- sqrt(1/x.table[1,1]+1/x.table[2,1]+1/x.table[1,2]+1/x.table[2,2]) se_fan <- sqrt(1/x.table[1,1]+1/x.table[3,1]+1/x.table[1,2]+1/x.table[3,2]) CI_aer <- c(exp(log(OR_aer)-1.96*se_aer),exp(log(OR_aer)+1.96*se_aer)) CI_fan <- c(exp(log(OR_fan)-1.96*se_fan),exp(log(OR_fan)+1.96*se_fan)) fisher.test(x.table[1:2,]) fisher.test(x.table[c(1,3),])
NYC_HANES.sns <- NYC_HANES %>% subset_samples(., smokingstatus %in% c("Cigarette","Never smoker")) NYC_HANES.sns %<>% prune_taxa((NYC_HANES.sns %>% otu_table %>% rowSums())>30,.) meta <- sample_data(NYC_HANES.sns) pdata <- new("AnnotatedDataFrame",meta) group = sample_data(NYC_HANES.sns)$smokingstatus design = model.matrix(~smokingstatus, data=data.frame(sample_data(NYC_HANES.sns))) d <- edgeR::DGEList(otu_table(NYC_HANES.sns), group = sample_data(NYC_HANES.sns)$smokingstatus) d <- edgeR::calcNormFactors(d) class(d$counts) <- "matrix" otu_table.voom <- limma::voom(d, design = design, plot = FALSE) eset <- ExpressionSet(assayData = otu_table.voom$E, phenoData = pdata) eset$GROUP <- ifelse(eset$smokingstatus == "Cigarette",0,1) de.eset <- EnrichmentBrowser::de.ana(eset) par(mfrow=c(1,2)) EnrichmentBrowser::volcano(rowData(de.eset)$FC, rowData(de.eset)$ADJ.PVAL) eset.none <- EnrichmentBrowser::de.ana(de.eset, padj.method="none") EnrichmentBrowser::pdistr(rowData(eset.none)$ADJ.PVAL) biosis <- read.csv("biosis.tsv",header = T, sep = '\t') ds <- tax_table(NYC_HANES.sns) %>% as.data.frame %>% bind_cols(data.frame(OTU=rownames(tax_table(NYC_HANES.sns)))) %>% filter(Domain != "Unassigned") %>% left_join(biosis, by = c("Genus"="X1")) sAero <- (ds %>% filter(X2=="Aerobic"))$OTU %>% as.character sAnae <- (ds %>% filter(X2=="Anaerobic"))$OTU %>% as.character sFana <- (ds %>% filter(X2=="F Anaerobic" | X2=="Aero / Facultative Anaerobic"))$OTU %>% as.character my.gs <- list(aero=sAero, anae=sAnae, fana=sFana) EnrichmentBrowser::config.ebrowser("OUTDIR.DEFAULT","./results") gsea.res <- EnrichmentBrowser::sbea("gsea", de.eset, my.gs) EnrichmentBrowser::gs.ranking(gsea.res, signif.only=FALSE)
ind <- which.min(rowData(de.eset)$ADJ.PVAL) otuname <- rownames(de.eset)[ind] smoker.exprs <- assay(de.eset)[ind,de.eset$GROUP==1] nsmoker.exprs <- assay(de.eset)[ind,de.eset$GROUP==0] boxplot(nsmoker.exprs, smoker.exprs) title(paste(otuname, tax_table(NYC_HANES.sns)[otuname,'Genus']))
ora.res <- EnrichmentBrowser::sbea("ora", de.eset, my.gs, perm=0) EnrichmentBrowser::gs.ranking(ora.res, signif.only=FALSE)
NYC_HANES.sh <- NYC_HANES %>% subset_samples(., smokingstatus %in% c("Secondhand")) NYC_HANES.sh %<>% prune_taxa((NYC_HANES.sh %>% otu_table %>% rowSums())>30,.) meta <- sample_data(NYC_HANES.sh) meta$catcotinine <- ifelse(meta$COTININE<3,'low','high') pdata <- new("AnnotatedDataFrame",meta) group = meta$catcotinine design = model.matrix(~catcotinine, data=data.frame(meta)) d = edgeR::DGEList(otu_table(NYC_HANES.sh), group = meta$catcotinine) d = edgeR::calcNormFactors(d) otu_table.voom <- voom(d, design = design, plot = FALSE) eset <- ExpressionSet(assayData = otu_table.voom$E, phenoData = pdata) eset$GROUP <- ifelse(meta$COTININE < 1.9, 0,ifelse(meta$COTININE>4.35,1,NA)) subeset <- eset[,!is.na(eset$GROUP)] ##TERTILES DIVISION HERE # quantile(meta$COTININE,c(0.33,0.66)) de.eset <- EnrichmentBrowser::de.ana(subeset) biosis <- read.csv("biosis.tsv",header = T, sep = '\t') ds <- tax_table(NYC_HANES.sh) %>% as.data.frame %>% bind_cols(data.frame(OTU=rownames(tax_table(NYC_HANES.sh)))) %>% filter(Domain != "Unassigned") %>% left_join(biosis, by = c("Genus"="X1")) sAero <- (ds %>% filter(X2=="Aerobic"))$OTU %>% as.character sAnae <- (ds %>% filter(X2=="Anaerobic"))$OTU %>% as.character sFana <- (ds %>% filter(X2=="F Anaerobic" | X2=="Aero / Facultative Anaerobic"))$OTU %>% as.character my.gs <- list(aero=sAero, anae=sAnae, fana=sFana) EnrichmentBrowser::config.ebrowser("OUTDIR.DEFAULT","./results") gsea.res <- EnrichmentBrowser::sbea("gsea", de.eset, my.gs) EnrichmentBrowser::gs.ranking(gsea.res, signif.only=FALSE)
ora.res <- EnrichmentBrowser::sbea("ora", de.eset, my.gs, perm=0) EnrichmentBrowser::gs.ranking(ora.res, signif.only=FALSE)
NYC_HANES.sh <- NYC_HANES %>% subset_samples(., smokingstatus %in% c("Secondhand")) NYC_HANES.sh %<>% prune_taxa((NYC_HANES.sh %>% otu_table %>% rowSums())>30,.) meta <- sample_data(NYC_HANES.sh) pdata <- new("AnnotatedDataFrame",meta) group = meta$COTININE design = model.matrix(~COTININE, data=data.frame(meta)) d = edgeR::DGEList(otu_table(NYC_HANES.sh), group = meta$COTININE) d = edgeR::calcNormFactors(d) otu_table.voom <- voom(d, design = design, plot = FALSE) eset <- ExpressionSet(assayData = otu_table.voom$E, phenoData = pdata) biosis <- read.csv("biosis.tsv",header = T, sep = '\t') ds <- tax_table(NYC_HANES.sh) %>% as.data.frame %>% bind_cols(data.frame(OTU=rownames(tax_table(NYC_HANES.sh)))) %>% filter(Domain != "Unassigned") %>% left_join(biosis, by = c("Genus"="X1")) sAero <- (ds %>% filter(X2=="Aerobic"))$OTU %>% as.character sAnae <- (ds %>% filter(X2=="Anaerobic"))$OTU %>% as.character sFana <- (ds %>% filter(X2=="F Anaerobic" | X2=="Aero / Facultative Anaerobic"))$OTU %>% as.character my.gs <- list(aero=sAero, anae=sAnae, fana=sFana) my.gsva <- gsva(eset, my.gs, parallel.sz = 1) fit <- limma::lmFit(my.gsva, my.gsva$COTININE) %>% eBayes() topTable(fit)
NYC_HANES.sns <- NYC_HANES %>% subset_samples(., smokingstatus %in% c("Cigarette","Never smoker") & !is.na(COTININE)) NYC_HANES.sns %<>% prune_taxa((NYC_HANES.sns %>% otu_table %>% rowSums())>30,.) meta <- sample_data(NYC_HANES.sns) pdata <- new("AnnotatedDataFrame",meta) design = model.matrix(~COTININE, data=data.frame(meta)) d = edgeR::DGEList(otu_table(NYC_HANES.sns), group = meta$COTININE) d = edgeR::calcNormFactors(d) otu_table.voom <- voom(d, design = design) eset <- ExpressionSet(assayData = otu_table.voom$E, phenoData = pdata) biosis <- read.csv("biosis.tsv",header = T, sep = '\t') ds <- tax_table(NYC_HANES.sns) %>% as.data.frame %>% bind_cols(data.frame(OTU=rownames(tax_table(NYC_HANES.sns)))) %>% filter(Domain != "Unassigned") %>% left_join(biosis, by = c("Genus"="X1")) sAero <- (ds %>% filter(X2=="Aerobic"))$OTU %>% as.character sAnae <- (ds %>% filter(X2=="Anaerobic"))$OTU %>% as.character sFana <- (ds %>% filter(X2=="F Anaerobic" | X2=="Aero / Facultative Anaerobic"))$OTU %>% as.character my.gs <- list(aero=sAero, anae=sAnae, fana=sFana) my.gsva <- gsva(eset, my.gs, parallel.sz = 1) fit <- limma::lmFit(my.gsva, my.gsva$COTININE) %>% eBayes() topTable(fit)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.