PSD.R

library(tidyverse)
library(FCSplankton)

setwd("PATH/TO/PROJECT/")

unstained <- FALSE

### Load Data

## Load mie lookup table
mie <- as.data.frame(read.csv(system.file("scatter", paste0("calibrated-mieINFLUX.csv"),package="FCSplankton")))
mie[1:3,] ## NOTE: Leo and Penny are included in the same Mie lookup table.

## Load metadata
meta <- read_csv("metadata.txt")
meta[1:3,]

## Qc metadata
unique(meta$sample) # check to make sure there are no metadata typos
length(unique(meta$sample)) # check to make sure number of unique samples matches expectation
# check to make sure each sample number appears only once (unstained) or twice (for stained)
if(unstained==FALSE){
  names(which(table(meta$sample) != 2))
}else{
  names(which(table(meta$sample) != 1))
}


## Convert metadata
file <- paste0(meta$file,".fcs") # format  sample name to filename (.fcs)
time <- meta$time
sample <- meta$sample
station <- meta$station
cast <- meta$cast
lat <- meta$lat
lon <- meta$lon
depth <- meta$depth
replicate <- meta$replicate
volume <- meta$volume
stain <- meta$stain
flag <- meta$flag
comments <- meta$comments

# create new metadata tibble
metadata <- tibble(file, time, sample, station, cast, lat, lon, depth, replicate, volume, stain, flag, comments)
metadata[1:3,]


### Create and bin particle size distribution (PSD) data

## Set PSD bin widths
m <- 137
v_min <- 0.002
delta_v_inv <- 11 # define the width of each bin
breaks <- round(v_min*2^(((1:m)+1)*delta_v_inv^-1),4) # log 2 spaced binning
print(breaks)

## Load gated FCS data to generate PSD
if(unstained){folder <- "unstained"
}else{folder <- c("unstained","stained")}

distribution <- tibble()

for(folder_name in folder){
  # Read FCS
  file_list <- list.files(paste0(folder_name,"/raw"), pattern = ".fcs$", full.names=T)

  # load one file
  this_file <- file_list[1]
  fcs <- read_influx(this_file, transformation = TRUE) # create a dataframe WITH log-amplified data)

  # replace number by column indice of FSC, 692, 580 and 530 respectively
  id <- c(13,23,19,17)
  names(fcs)[id]
  names_pmt <- names(fcs)[id] # original names of FSC, 692, 580 and 530 respectively

  ## PSD
  for (this_file in file_list){

    # Read data
    print(paste(this_file))
    fcs <- read_influx(this_file, transformation = TRUE) # create a flowFrame WITH transformation (for log-amplified data)

    # change header
    id <- match(names_pmt, names(fcs))
    names(fcs)[id] <- c("scatter", "red", "orange", "green")

    # Load gating parameters
    previous <- sub("raw","gating",paste0(this_file, ".RData"))
    if(file.exists(previous)) load(previous)

    # Assign "beads" label to particles
    fcs <- classify_fcs(fcs, gates.log[][1])

    # Normalize to beads
    beads <- fcs[which(fcs$pop == "beads"),]
    fcs$norm.scatter <- fcs$scatter / mean(beads$scatter)
    fcs$norm.orange <- fcs$orange / mean(beads$orange)
    fcs$norm.red <- fcs$red / mean(beads$red)
    fcs$norm.green <- fcs$green / mean(beads$green)

    # Apply gates and label particles according to 'gates.log'
    fcs <- try(classify_fcs(fcs, gates.log))
    if(class(fcs) == "try-error") next

    # Convert scatter to Qc
    id <- findInterval(fcs$norm.scatter, mie$scatter)
    id[which(id == 0)] <- 1
    fcs$quotas <- mie[id, "Qc_Penny_lwr"]

    # Select wanted and remove unwanted populations
    if(folder == "unstained") fcs.pop <- dplyr::filter(fcs, pop != "unknown" & pop != "beads")
    if(folder == "stained") fcs.pop <- dplyr::filter(fcs, pop== "bacteria")

    # create PSD
    psd <- fcs.pop %>%
      group_by(pop, breaks = cut(quotas, breaks = c(breaks,Inf), right=FALSE), .drop=F) %>%
      count(breaks) %>%
      pivot_wider(names_from = breaks, values_from = n)
    psd <- psd %>% add_column(file = basename(this_file), .before = 1)
    distribution <- dplyr::bind_rows(distribution, psd)
  }

  # Merge sample metadata with PSD
  DIST <- as_tibble(merge(distribution, metadata, by="file", all.x=TRUE))

  # convert breaks to geometric mean values
  clmn <- grep("^\\[.*?)$", names(DIST))

  # calculate abundance in each size class bin
  DIST[,clmn] <-  DIST[,clmn] / DIST[["volume"]]


}


### Data Correction

# Get PSD bin column names
clmn <- grep("^\\[.*?)$", names(DIST))

## Remove pro population from bacteria + prochlorococcus gate PSD
PSD_all <- cyano_psd <- DIST %>%
  dplyr::filter(stain!=1) # Only include unstained populations. Bacteria samples will be added back during the correction step.

pro <- DIST%>%
  dplyr::filter(pop == "prochloro" & stain!=1)

# Bacteria population correction
if(unstained == FALSE){
  bact <- hetero <- DIST %>%
    dplyr::filter(pop == "bacteria")

  # find matching sample numbers
  id.pro <- match(bact$sample, pro$sample, nomatch=0)
  id.bact <- which(id.pro != 0)

  # sanity check
  bact$sample[id.bact] == pro$sample[id.pro]

  # Remove Pro population from bacteria PSD - NOTE: this Assumes staining does not impact scatter values
  clmn <- grep("^\\[.*?)$", names(bact))
  hetero[id.bact,clmn] <- bact[id.bact,clmn] - pro[id.pro, clmn]
  hetero <- hetero[id.bact, ]
  hetero[,clmn][hetero[,clmn] < 0] <- 0
  hetero <- dplyr::select(hetero,-sample)
  hetero[1:3,]

  PSD_all <- rbind(cyano_psd, hetero) # Combine unstained PSD with corrected bacteria PSD
}

PSD_all[1:3,]

# calculate the geometric mean Qc for each size class
clmn <- grep(")", names(PSD_all))
b <- strsplit(sub("\\[","",sub("\\)","",colnames(PSD_all)[clmn])),",")
Qc_geom_mean <- unlist(list(lapply(b, function(x) sqrt(mean(as.numeric(x))*max(as.numeric(x))))))

colnames(PSD_all)[clmn] <- Qc_geom_mean

# convert dataframe to long format
PSD_long <- PSD_all %>%
  pivot_longer(
    cols = (clmn),
    names_to = "Qc",
    values_to = "abundance_per_bin",
    values_drop_na = TRUE
  )

# Remove infinite values from PSD bin breaks
PSD_long$Qc <- as.numeric(PSD_long$Qc)
PSD_long$Qc[sapply(PSD_long$Qc, is.infinite)] <- NA

# Calculate biomass (µgC/L) per bin
PSD$biomass_per_bin <- PSD$Qc * PSD$abundance_per_bin # (pgC/cell * cell/µL)

## Plot distributions to check PSD bacteria correction
if(unstained == FALSE){
PSD_correction_check <- PSD_long

PSD_correction_check <- PSD_correction_check %>%
  dplyr::select(-time,-file,-stain,-flag,-comments,-volume) %>%
  dplyr::group_by(sample,Qc,pop) %>%
  summarize_all(function(x) mean(x, na.rm=T)) %>%
  arrange(station)

PSD_correction_check$sample <- as_factor(PSD_correction_check$sample)

group_colors <- c(corrected_bacteria ="cyan",
                  bacteria = "lightsalmon1",
                  prochloro=viridis::viridis(4)[1])

 PSD_correction_check %>%
   group_by(station,sample)  %>%
   dplyr::filter(pop=="bacteria" | pop=="corrected_bacteria" | pop=="prochloro") %>%
   ggplot() +
   ggridges::geom_density_ridges(aes(x = Qc, y = treatment, height = abundance_per_bin, fill =pop, group = interaction(incubation,treatment,pop)), stat="identity", color="darkgrey", alpha=0.35,size=.35,panel_scaling=FALSE) +
   scale_x_continuous(trans = "log10", limits = c(.001, 1)) +
   scale_fill_manual(name = 'Population', values = group_colors, breaks = c("corrected_bacteria","bacteria","prochloro"), labels = c("Corrected Bacteria","Bacteria+Pro","Prochloro")) +
   theme(legend.key.size = unit(.35, 'cm')) +
   annotation_logticks(sides = "b")  +
   theme_bw() +
   facet_grid(incubation_time ~ position) +
   labs(x="Carbon Content Distribution (pgC)",
        y= "Depth (m)")

## Finalize PSD dataframe
PSD <- dplyr::filter(PSD_long,pop!="bacteria")    # remove uncorrected bacteria from dataframe
PSD$pop[PSD$pop=="corrected_bacteria"]<-"bacteria" # rename corrected_bacteria to bacteria
}

# Find closest equivilent spherical diameter matches to the carbon quota from the Mie lookup table for each particle
mie <- as.data.frame(read.csv(system.file("scatter", paste0("calibrated-mieINFLUX.csv"),package="FCSplankton")))
id <- findInterval(PSD$Qc, mie$"Qc_Penny_lwr", all.inside = TRUE)
PSD[,"esd"] <- mie[id,"diam_Penny_lwr"]
PSD[1:3,]

###  Save PSD data in a .csv file
PSD[1:3,]
project <- basename(getwd())
write_csv(PSD,file = paste0("Influx_", project,"_PSD.csv"))
fribalet/FCSplankton documentation built on Oct. 11, 2024, 7:06 a.m.