# Keep only columns (variable) of interest from the biocrates dataframe selected_columns <- names(df) %in% c( "Compound", "Sample.Name", "Sample.Identification", "Sample.Type", "Analyte Intensity [cps]", "Internal Std. Intensity [cps]", LOWCON_CONDITIONS ) df.filtered <- df[selected_columns] # Renaming column for consistent naming accross datasets df.filtered <- df.filtered %>% rename( Analyte.Intensity = `Analyte Intensity [cps]`, Internal.Std.Intensity = `Internal Std. Intensity [cps]`, ) # Extract PBS (zero samples) first from the dataframe df.zero_samples <- df.filtered[ df.filtered$Sample.Type == "Zero Sample", ] # Extract QC 2 samples df.qc_samples <- df.filtered[ df.filtered$Sample.Type == "QC Level 2", ] # Filter the dataframe to keep only biological samples # As we don't have any annotation file, we get our sample list directly from the dataframe df.filtered <- df.filtered[ df.filtered$Sample.Type %in% c("Sample", "Pooled QC"), ] # Problem in annotation, need to remove some files # df.filtered <- df.filtered[ which(!df.filtered$Sample.Identification %in% c('44','45','47','49','50')),]
No filtering is done at this stage and this table is given as infromation only
# Extra step to check whether some metabolites should be removed because they have a large RSD in the QC samples df.qc_samples.summary <- df.qc_samples %>% mutate(Analyte.Peak.Intensity.Normalised = Analyte.Intensity/Internal.Std.Intensity) %>% group_by(Compound) %>% mutate(relative_standard_deviation = rsd(na.omit(Analyte.Peak.Intensity.Normalised))) df.qc_samples.summary %>% distinct(Compound, .keep_all = TRUE) %>% select(Compound,relative_standard_deviation) %>% easy_datatable()
df.zero_samples <- df.zero_samples %>% group_by(Compound) %>% mutate(mean_zero_peak_intensity = mean(Analyte.Intensity, na.rm = T), median_zero_peak_intensity = median(Analyte.Intensity, na.rm = T), highest_zero_peak_intensity = max(Analyte.Intensity, na.rm = TRUE)) # mutate(mean_peak_area = mean(Analyte.Peak.Area..area., na.rm = T), median_peak_area = median(Analyte.Peak.Area..area., na.rm = T), highest_peak_area = max(Analyte.Peak.Area..area., na.rm = TRUE), mean_concentration = mean(Concentration..??M., na.rm = T), median_concentration = median(Concentration..??M., na.rm = T), highest_concentration = max(Concentration..??M., na.rm = TRUE)) # Replace "NaN" values by NA df.zero_samples[sapply(df.zero_samples, is.nan)] <- NA # Replace "-Inf" values created by max() by NA df.zero_samples[df.zero_samples=="-Inf"] <- NA df.zero_sample_summary <- df.zero_samples %>% group_by(Compound) %>% select(Compound, mean_zero_peak_intensity,median_zero_peak_intensity) %>% distinct(Compound, .keep_all = T) df.filtered <- left_join(df.filtered,df.zero_sample_summary[,c("Compound","mean_zero_peak_intensity", "median_zero_peak_intensity")], by="Compound")
df.filtered <- df.filtered %>% # mutate(above_3concentration = ifelse(Concentration....M. >= mean_concentration*3, TRUE, FALSE), above_3peak_area = ifelse(Analyte.Peak.Area..area. >= mean_peak_area*3, TRUE, FALSE)) mutate(above_3mean_peak_intensity = ifelse(Analyte.Intensity >= mean_zero_peak_intensity*3, TRUE, FALSE), above_3median_peak_intensity = ifelse(Analyte.Intensity >= median_zero_peak_intensity*3, TRUE, FALSE))
df.filtered %>% group_by(Compound) %>% summarise(Number_sample_above = sum(above_3median_peak_intensity == TRUE, na.rm = T), Number_sample_below = sum(above_3median_peak_intensity == FALSE, na.rm = T), Number_NA = sum(is.na(above_3median_peak_intensity))) %>% easy_datatable()
# First we set to NA all values below the threshold to discard them df.filtered$Analyte.Intensity[ which(df.filtered$above_3median_peak_intensity == FALSE)] = NA df.filtered <- df.filtered %>% mutate(Above.LOD = ifelse(Analyte.Intensity > LOWCON_MINIMUM_INTENSITY, TRUE, FALSE)) df.filtered$Analyte.Intensity[ which(df.filtered$Above.LOD == FALSE)] = NA df.filtered <- df.filtered %>% mutate(Analyte.Intensity.Normalised = Analyte.Intensity/Internal.Std.Intensity) condition_summary <- df.filtered %>% group_by_at(vars(Compound, one_of(LOWCON_CONDITIONS))) %>% summarise( Number_values = sum(!is.na(Analyte.Intensity)), Number_NA = sum(is.na(Analyte.Intensity)) ) %>% mutate(Keep = ifelse(Number_values >= 1, TRUE, FALSE)) condition_summary %>% group_by_at(vars(one_of(LOWCON_CONDITIONS))) %>% summarise(Number_of_metabolite = sum(Keep == TRUE)) %>% easy_datatable()
df.tot_area_HOS <- df.filtered %>% group_by(Sample.Name) %>% # filter(Cell.line == "HOS") %>% mutate( total_intensity = sum(Analyte.Intensity, na.rm = T), total_intensity_normalised = sum(Analyte.Intensity.Normalised, na.rm = T) ) %>% distinct(Sample.Name, .keep_all = TRUE) %>% ungroup() df.tot_area_HOS$condition <- df.tot_area_HOS %>% select(one_of(LOWCON_CONDITIONS)) %>% purrr::imap(.f = ~ paste(.y, .x, sep=" ")) %>% as_tibble %>% tidyr::unite(condition, LOWCON_CONDITIONS, sep=" | ") %>% pull(condition) df.tot_area_HOS$condition <- as.factor(df.tot_area_HOS$condition) ggplot(data=df.tot_area_HOS, aes(x=reorder(Sample.Identification, as.numeric(condition)), y=total_intensity, fill=condition)) + geom_bar(stat="identity")+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(x = "Sample name", y = " Total peak intensity") + theme(legend.position = "bottom")
ggplot(data=df.tot_area_HOS, aes(x=reorder(Sample.Identification, as.numeric(condition)), y=total_intensity_normalised, fill=condition)) + geom_bar(stat="identity")+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(x = "Sample name", y = " Total peak intensity") + theme(legend.position = "bottom") # knitr::knit_exit()
Remove SD outliers (i.e. set values to NA): r LOWCON_SD_OUTLIER_REM (according
to parameter lowcon_sd_outlier_removal)
# Calculate 2 standard deviation per group df.tot_area_HOS <- df.tot_area_HOS %>% group_by(condition) %>% mutate(tot_intensity_2_sd_low = mean(total_intensity_normalised)-1.5*sd(total_intensity_normalised),tot_intensity_2_sd_high = mean(total_intensity_normalised)+1.5*sd(total_intensity_normalised)) df.tot_area_HOS <- df.tot_area_HOS %>% mutate(inside_range = ifelse(total_intensity_normalised>tot_intensity_2_sd_low & total_intensity_normalised<tot_intensity_2_sd_high, TRUE, FALSE)) ggplot(data=df.tot_area_HOS, aes(x=reorder(Sample.Identification, as.numeric(condition)), y=total_intensity_normalised, fill=condition)) + geom_bar(stat="identity", aes(colour=inside_range))+ # scale_fill_manual(inside_range = c("FALSE" = "red", "TRUE" = "white"), legend = FALSE) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(x = "Sample name", y = "Total peak intensity") + theme(legend.position = "bottom")
# Remove outlier samples from the data if (LOWCON_SD_OUTLIER_REM){ df.tot_area_HOS$total_intensity_normalised[ which(df.tot_area_HOS$inside_range == FALSE)] = NA }
df.summary_HOS <- df.tot_area_HOS %>% group_by(condition) %>% mutate(mean_peak_intensity = mean(total_intensity_normalised, na.rm = T), standard_deviation = sd(total_intensity_normalised, na.rm = T)) %>% summarise(mean_peak_intensity = unique(mean_peak_intensity), standard_deviation = unique(standard_deviation)) ggplot(df.summary_HOS, aes(x=condition, y=mean_peak_intensity, fill=condition)) + geom_bar(position=position_dodge(), stat="identity") + geom_errorbar(aes(ymin=mean_peak_intensity-standard_deviation, ymax=mean_peak_intensity+standard_deviation), width=.2, # Width of the error bars position=position_dodge(.9)) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(x = "Condition", y = "Mean peak intensity") + theme(legend.position = "bottom")
summary_rsd_HOS <- df.tot_area_HOS %>% group_by(condition) %>% mutate(mean_peak_intensity = mean(total_intensity_normalised, na.rm = T), rsd = rsd(na.omit(total_intensity_normalised))) %>% summarise(mean_peak_intensity = unique(mean_peak_intensity), relative_standard_deviation = unique(rsd)) colnames(summary_rsd_HOS) <- c("Condition", "Mean peak intensity", "Relative Standard deviation (%)") summary_rsd_HOS %>% easy_datatable()
A minimum number of 2 values is required for the RSD to be reported in the table.
# Bring condition names and inside range value to the main dataframe df.filtered.condition.HOS <- left_join(df.filtered,df.tot_area_HOS[,c("Sample.Name","condition","inside_range")], by="Sample.Name") # Remove outlier samples found to be outside 1.5 standard deviation if (LOWCON_SD_OUTLIER_REM){ df.filtered.condition.HOS$Analyte.Intensity.Normalised[ which(df.filtered.condition.HOS$inside_range == FALSE)] = NA } # Keeping data for showing table later df.filtered.condition.HOS.table <- df.filtered.condition.HOS df.filtered.condition.HOS <- df.filtered.condition.HOS %>% # filter(Cell.line == "HOS") %>% group_by(condition, Compound) %>% mutate( relative_standard_deviation = rsd(na.omit(Analyte.Intensity.Normalised)), metabolite_mean_peak_area = mean(Analyte.Intensity.Normalised, na.rm = T) ) %>% distinct(Compound, condition, .keep_all = TRUE) df.metabolite_rsd_HOS <- df.filtered.condition.HOS %>% select(Compound, condition, relative_standard_deviation) %>% spread(Compound, relative_standard_deviation) df.metabolite_rsd_HOS %>% easy_datatable()
# Table that shows samples with normalized intensity (long) df.filtered %>% select(Compound, Sample.Identification, Sample.Type, all_of(LOWCON_CONDITIONS), Analyte.Intensity.Normalised) %>% easy_datatable( caption = "Long table of normalized intensities", export_csv = params$data_export_long, export_path = paste0(params$data_export_prefix, "_lowcon_fia_long.csv") ) # Table that shows samples with normalized intensity (wide) df.filtered %>% wide_conc_table_compounds_x_samples(value = "Analyte.Intensity.Normalised") %>% easy_datatable( caption = paste( "Wide table of normalized intensities", "(compounds x samples, with some metadata)" ), export_csv = params$data_export_wide, export_path = paste0(params$data_export_prefix, "_lowcon_fia_wide.csv") )
cat("\n## SD-filtered normalized intensity dataset\n\n") # Table that shows samples with normalized intensity (long) df.filtered.condition.HOS.table %>% ungroup() %>% select(Compound, Sample.Identification, Sample.Type, all_of(LOWCON_CONDITIONS), Analyte.Intensity.Normalised) %>% easy_datatable( caption = "Long table of normalized intensities" ) # Table that shows samples with normalized intensity (wide) df.filtered.condition.HOS.table %>% ungroup() %>% wide_conc_table_compounds_x_samples(value = "Analyte.Intensity.Normalised") %>% easy_datatable(caption = paste( "Wide table of normalized intensities", "(compounds x samples, with some metadata)" ))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.