Description Usage Arguments Value Examples
Calculate prevalence of taxa across range of sample sizes, from 1 to 100.
1 | get_back_counts_for_lineplotf(prev_df, met_name)
|
prev_df |
Dataframe of sample prevalence and taxonomic classification for all OTUs in OTU table |
met_name |
Name of environment (title of plot) |
Returns lineplot of prevalence of known and MDM taxa.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | ##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function(prev_df, met_name) {
newl <- lapply(list(1, 25, 50, 75, 100), function(val) {
prev_df$Rank6 <- as.character(prev_df$Rank6)
prev_df[grep("uncultured", prev_df$Rank6), ]$Rank6 <- "MDM" # convert all uncultured to MDM
prev_df[grep("Ambiguous", prev_df$Rank6), ]$Rank6 <- "MDM" # convert all ambiguous taxa to MDM
prev_df[is.na(prev_df$Rank6), ]$Rank6 <- "MDM" # convert all unassigned taxa to MDM
prev_df[prev_df$Rank6 != "MDM", ]$Rank6 <- "Known" # convert rest of taxa (non-MDM) to Known
total_val <- nrow(prev_df[prev_df$Prevalence > val, ])
prev_df_above_val <- prev_df[prev_df$Prevalence > val, ] # subset to most prevalent taxa meeting val threshold
num_MDM <- nrow(prev_df_above_val[prev_df_above_val$Rank6 == "MDM", ])
print(num_MDM) # calculate number of MDM present in at least [val] samples
num_K <- nrow(prev_df_above_val) - num_MDM
print(num_K) # calculate number of known taxa present in at least [val] samples
MDM_or_known_per_tax_level_df <- data.frame(MDM_type = c("MDM", "Known"), Num_OTU = c(num_MDM, num_K)) # create dataframe of # MDM/Known taxa present for each val threshold
MDM_or_known_per_tax_level_df$Prev_val <- val
return(MDM_or_known_per_tax_level_df)
})
newl_df <- do.call(rbind, newl) # bind together all prevalence threshold results in one dataframe
newl_df$Met_Name <- met_name # create new column of environment - useful when comparing against multiple environments
# return(newl_df)
# create lineplot of prevalence of OTUs present in 1,25,50,75,100 samples
g <- ggplot(newl_df, aes(Prev_val, Num_OTU, color = MDM_type)) +
geom_line(aes(linetype = MDM_type)) + scale_y_log10() +
theme_classic() + ylab("Number of OTUs (log scaled)") + xlab("Sample Prevalence") +
ggtitle(paste(met_name, "OTU Prevalence", sep = " "))
print(g)
things_to_keep <- list(df = newl_df, prev_plot = g)
return(things_to_keep)
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.