In cytolab/DebarcodeR: Automated Demultiplexing for Fluorescent Cell Barcoding

knitr::opts_chunk$set(echo = TRUE)

## STEP 3

# NOTE: This only needs to be run prior to the first time running debarcodeR
install.packages("devtools")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(c("flowCore", "CytoML", "flowWorkspace", "ggcyto"))
remotes::install_github("cytolab/DebarcodeR")
install.packages(c("dplyr", "stringr", "devtools", "ggpointdensity", "CytobankAPI", "tidyverse", "RColorBrewer", "readxl", "ggpubr", "rstatix", "mixsmsn"))
devtools::install_github("bjreisman/cytotidyr")
devtools::install_github("ejanalysis/analyze.stuff")

## STEP 4

library(flowCore)
library(CytoML)
library(flowWorkspace)
library(dplyr)
library(ggcyto)
library(cytotidyr)
library(DebarcodeR)
library(ggpointdensity)
library(CytobankAPI)
library(tidyverse)
library(stringr)
library(RColorBrewer)
library(facetscales)
library(readxl)
library("analyze.stuff")
library(ggpubr)
library(rstatix)
library(mixsmsn)
set.seed(1)

##STEP 6

# a) set API token from Cytobank
token <- "YOUR API TOKEN"

# b) set experiment ID
experiment.id <- EXPERIMENT_ID_NUMBER

# c) set PARENT DIRECTORY where all fcs files are as working directory
base_directory <- "PARENT DIRECTORY"
setwd(base_directory)

# authenticate session and get experiment info
cyto_session <- authenticate("vanderbilt", auth_token = token)
exp_info <- fetchCytobankExperiment(cyto_session, experiment.id)

##STEP 7

# a) - c)
plate_name <- "YOUR PLATE NAME" 
columns <- "1-6 OR 7-12"
first_col <- as.numeric(str_split(columns, "-")[[1]][1])
cell_type <- "YOUR CELL TYPE"

# d) input your initials - will be used later in the name of the experiment clone
initials <- "YOUR INITIALS"

# e) input in names of "Live cells for analysis gate" (instructions in Figure 2A)
live_cells_gate <- "Live cells for analysis"

# f) input name of compensation
comp_name <- "YOUR COMPENSATION MATRIX NAME"

# make PLATE DIRECTORY
dir.create(paste0(base_directory,"/", plate_name,"_",columns))

# g) input platemap file directory
my.plate.map <- data.frame(read_excel("YOUR PLATEMAP FILEPATH", sheet = plate_name))

# h) input names of FCB, UTC, and stain files
FCB_file <- "FCB_CELLS_FCS"
stain_file <- "STAIN_CELLS FCS"
UTC_file <- "UTC_CELLS FCS"
filepaths <- c(FCB_file, stain_file, UTC_file)
filepaths

## STEP 8

# read flowset and gatingset for all three files
# flowSet is a list of flowFrames
# flowFrames is a data.frame with preprocessed events from fcs file downloaded from Cytobank
myflowset<- read.flowSet(file.path(filepaths),truncate_max_range = FALSE) 
GatingSet(myflowset)
myflowSet.list <- as.list(myflowset@frames)
mygatingset <- cytobank_to_gatingset(exp_info$gates.path, filepaths) 

# transform and compensate data
mygatingset_scaled <- transform(mygatingset, exp_info$transforms)

# pull live cells gate for all three fcs files 
fs <- gs_pop_get_data(mygatingset, live_cells_gate,inverse.transform = T)
fs<-cytoset_to_flowSet(fs)
myestd <- fs[[UTC_file]]

## STEP 9

setwd(paste0(base_directory,"/", plate_name,"_",columns))

# call fcb flowframe to debarcode - this is the control data (AKA the red points)
myfcb <- fs[[FCB_file]] #subset out the gated FCB flow frame from the flow set for FCB

# for plot output, calls specific dataframe, gets filename
prefix <- str_split(basename(myfcb@description$FILENAME), ".fcs")[[1]][1]

# plot pb v po for fcb  
ggplot(myfcb, aes(x=`Pacific Orange-A`, y = `Pacific Blue-A`)) + 
  geom_bin2d(bins = 400) + 
  scale_fill_viridis_c(option = "A") + 
  theme_bw()

# choose predictors that are used to model pacific orange and pacific blue dye uptake
my.predictors1 <- c("fsc_a", "ssc_a","fsc_h", "ssc_h","ssc_b_h","ssc_b_a") #orange
my.predictors2 <- c("fsc_a", "ssc_a","fsc_h", "ssc_h","ssc_b_h","ssc_b_a") #blue 

## deskewing allows for the amount of dye taken up by each barcoded well to be adjusted and predicted once this has been done, GMM can be used to predict which barcode belongs to which well 
## 1 from figure 1d in paper -  in deskewing, linear regression is first used to model dye uptake across a continuous range of cell subsets using the UTC. In the second step of de-skewing (1B), the uptake model from step 1a is used to predict dye uptake for each cell in the barcoded dataset, and corrected values are generated by subtracting the observed and predicted dye uptake.
debarcoded.ff <- deskew_fcbFlowFrame(myfcb,
                                     uptake = myestd,
                                     predictors = my.predictors2,
                                     channel = c("pacific_blue_a"))


debarcoded.ff <- deskew_fcbFlowFrame(debarcoded.ff, # output of first run
                                     uptake = myestd,
                                     predictors = my.predictors1,
                                     channel = c("pacific_orange_a"))


## sanity check plot, making sure you have 48 (relatively) distinct populations and a decent grid (it's okay if populations are missing, that usually means that whatever was in the well killed the cells)

bind_cols(lapply(lapply(debarcoded.ff@barcodes, `[[`, 'deskewing'),`[[`, 'values')) %>%
  ggplot(aes(x=pacific_orange_a, y = pacific_blue_a)) + 
  geom_pointdensity(adjust = 0.01, shape = ".") + 
  scale_color_viridis_c(option = "A") + 
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        text = element_text(size = 6)) + 
  coord_fixed()+
  xlim(2, 8)

#saves deskewed plot to working directory
ggsave(paste0(prefix, "_deskewed.png"), width = 4, height = 4, units = "in", dpi = 300)

#cluster the 'deskewed' data to estimate the barcoding level probabilities for each cell
debarcoded.ff <- cluster_fcbFlowFrame(debarcoded.ff,
                                      channel = c("pacific_orange_a"),
                                      levels = 6,
                                      opt = "fisher")

debarcoded.ff <- cluster_fcbFlowFrame(debarcoded.ff,
                                      channel = c("pacific_blue_a"),
                                      levels = 8,
                                      opt = "fisher")

# 3 from figure 1d in paper - assigning cells to the most probable level and discarding cells with unacceptable probabilities of originating from more than one level or those in the tails of the fitted distribution

## assigning pb and po, assign based on probability, don't mess with likelihood cutoff (likelihood of cell coming from a different population), ambiguity cutoff (within each cutoff lets include or exclude cells, higher cutoff, more cells included)
debarcoded.ff <- assign_fcbFlowFrame(debarcoded.ff,
                                  channel = c("pacific_orange_a"),
                                  likelihoodcut = 10, #a likelihood cutoff for discarding unlikely cells, less than 1/k as likely as the most likely cell from that population
                                  ambiguitycut = 0.4) #numeric from 0 to 1, threshhold below which to discard ambigious cells, discards cells with more than 40% chance of originating from another population
#usually higher for orange because populations are less distinct

debarcoded.ff <- assign_fcbFlowFrame(debarcoded.ff,
                                  channel = c("pacific_blue_a"),
                                  likelihoodcut = 10,
                                  ambiguitycut = 0.4)


# get assignments and split by debarcoded well
myassignments <- getAssignments(debarcoded.ff) #says which level assigned to for PB and PO
debarcoded.fs <- split(myfcb, getAssignments(debarcoded.ff)) #splits flow frame by assignments

#now time to output pretty plots showing well assignment!

# set plot parameters and labels
mypal <- c("grey50", scales::hue_pal()(8))
label_df <- 
      tibble(pb_deskewed = debarcoded.ff@barcodes[["pacific_blue_a"]][["deskewing"]][["values"]],
       po_deskewed = debarcoded.ff@barcodes[["pacific_orange_a"]][["deskewing"]][["values"]],
       pb_level = as.numeric(debarcoded.ff@barcodes[["pacific_blue_a"]][["assignment"]][["values"]]),
       po_level = as.numeric(debarcoded.ff@barcodes[["pacific_orange_a"]][["assignment"]][["values"]])) %>%
      dplyr::filter(pb_level !=0, po_level!=0) %>%
        mutate(well = paste0(LETTERS[pb_level], po_level+(first_col-1))) %>%
        group_by(well) %>%
        summarise_if(is.numeric, median)
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors,
                             rownames(qual_col_pals)))
values = sample(col_vector)

# plot well level
plot.well<-tibble(pb_deskewed = debarcoded.ff@barcodes[["pacific_blue_a"]][["deskewing"]][["values"]],
       po_deskewed = debarcoded.ff@barcodes[["pacific_orange_a"]][["deskewing"]][["values"]],
       pb_level = as.factor(debarcoded.ff@barcodes[["pacific_blue_a"]][["assignment"]][["values"]]),
       po_level = as.factor(debarcoded.ff@barcodes[["pacific_orange_a"]][["assignment"]][["values"]])) %>%
        dplyr::filter(pb_level !=0, po_level!=0) %>%
        mutate(well = paste0(LETTERS[pb_level], po_level)) %>%
  ggplot(aes(y= pb_deskewed, x = po_deskewed, col = well)) +
  geom_point(shape = ".",alpha = 0.3)  +
  scale_color_manual(values = values, guide = "none") + 
  coord_fixed() + 
  geom_text(data = label_df, size = 2.5,aes(label=  well), color = "black")  + theme_bw()+
  xlim(2, 8)

ggsave(paste0(prefix, "_well_all.png"), plot.well, width = 4, height = 4, units = "in", dpi = 300)

# plot pb level
plot.pb<-tibble(pb_deskewed = debarcoded.ff@barcodes[["pacific_blue_a"]][["deskewing"]][["values"]],
       po_deskewed = debarcoded.ff@barcodes[["pacific_orange_a"]][["deskewing"]][["values"]],
       pb_level = as.factor(debarcoded.ff@barcodes[["pacific_blue_a"]][["assignment"]][["values"]]),
       po_level = as.factor(debarcoded.ff@barcodes[["pacific_orange_a"]][["assignment"]][["values"]])) %>%
  ggplot(aes(y= pb_deskewed, x = po_deskewed, col = pb_level)) +
  geom_point(shape = ".",alpha = 0.3) +
  scale_color_manual(values = mypal, guide = "none") + 
  coord_fixed() + 
  geom_text(data = label_df, size = 2.5,aes(label=  well), color = "black")  + theme_bw()+
  xlim(2, 8)

ggsave(paste0(prefix, "_well_pblevel.png"), plot.pb, width = 4, height = 4, units = "in", dpi = 300)

# plot po level
plot.po<-tibble(pb_deskewed = debarcoded.ff@barcodes[["pacific_blue_a"]][["deskewing"]][["values"]],
       po_deskewed = debarcoded.ff@barcodes[["pacific_orange_a"]][["deskewing"]][["values"]],
       pb_level = as.factor(debarcoded.ff@barcodes[["pacific_blue_a"]][["assignment"]][["values"]]),
       po_level = as.factor(debarcoded.ff@barcodes[["pacific_orange_a"]][["assignment"]][["values"]])) %>%
  ggplot(aes(y= pb_deskewed, x = po_deskewed, col = po_level)) +
  geom_point(shape = ".",alpha = 0.3)   +
  scale_color_manual(values = mypal, guide = "none") + 
  coord_fixed() + 
  geom_text(data = label_df, size = 2.5,aes(label=  well), color = "black")  + theme_bw()+
  xlim(2, 8)

ggsave(paste0(prefix, "_well_polevel.png"), plot.po,width = 4, height = 4, units = "in", dpi = 300)

## make a plate map and apply it to update the Data slot
## platemap that aligns well name and number to barcode level
myplatemap <- tibble(
  `Pacific Blue-A` = rep(1:8, times = 6),
  `Pacific Orange-A` = rep(1:6, each = 8)
) %>% 
  mutate(well = paste0(LETTERS[`Pacific Blue-A`], str_pad(`Pacific Orange-A`+(first_col-1), side = "left", width = 2, pad = "0"))) %>%
  janitor::clean_names() %>%
  mutate_all(as.character)

debarcoded.fs <- apply_platemap(debarcoded.fs,
                                myplatemap,
                                prefix = prefix)

## STEP 10

## This step makes a new fcs file for each well and saves those to your working directory in a folder called "debarcoded

# undoing transformations and compensations - want raw fcs data in files
gs.i<- GatingSet(debarcoded.fs)
debarcoded.fs.inverted<- transform(gs.i, invertTransformerList(exp_info$transforms))
#Inverts a TransformerList object to return the 'back transformation'
debarcoded.fs.decompensated <- compensate(debarcoded.fs.inverted,
                          solve(exp_info[["compensations"]][[comp_name]])) 

# get pop data and output debarcoded files, 48 wells and everything that's been unassigned
flowset = gs_pop_get_data(debarcoded.fs.decompensated)
outdir <- "debarcoded"
write.flowSet(flowset, outdir = outdir)

#should now have 50 files in a new folder called "debarcoded
#48 (one for each well) + 1 (unassigned) + 1 annotation .txt file

## STEP 11

## now we are repeated all of the above steps (deskewing, clustering, etc...) for the stained cells (black dots) - there are going to be less comments because its all the same steps except with the stained file instead of the FCB file (we are still using the FCB sample to model pacific orange and pacific blue dye uptake though)

# call stained flowframe to debarcode, black dots
mystained <- fs[[stain_file]]

# for plot output
prefix <- str_split(basename(mystained@description$FILENAME), ".fcs")[[1]][1]

# plot pb v po for stained
ggplot(mystained, aes(x=`Pacific Orange-A`, y = `Pacific Blue-A`)) + 
  geom_bin2d(bins = 400) + 
  scale_fill_viridis_c(option = "A") + 
  theme_bw()

# plot pb v po for utc
ggplot(myestd, aes(x=`Pacific Orange-A`, y = `Pacific Blue-A`)) + 
  geom_bin2d(bins = 400) + 
  scale_fill_viridis_c(option = "A") + 
  theme_bw()

# choose predictors for po and pb
my.predictors1 <- c("fsc_a", "ssc_a","fsc_h", "ssc_h","ssc_b_h","ssc_b_a")
my.predictors2 <- c("fsc_a", "ssc_a","fsc_h", "ssc_h","ssc_b_h","ssc_b_a")

# deskewing pb and po, ffs = flow frame stained
debarcoded.ffs <- deskew_fcbFlowFrame(mystained,
                                     uptake = myestd,
                                     predictors = my.predictors2,
                                     channel = c("pacific_blue_a"))

debarcoded.ffs <- deskew_fcbFlowFrame(debarcoded.ffs, #output of first run
                                     uptake = myestd,
                                     predictors = my.predictors1,
                                     channel = c("pacific_orange_a"))


## sanity check plot
bind_cols(lapply(lapply(debarcoded.ffs@barcodes, `[[`, 'deskewing'),`[[`, 'values')) %>%
  sample_n(25000) %>%
  ggplot(aes(x=pacific_orange_a, y = pacific_blue_a)) + 
  geom_pointdensity(adjust = 0.01, shape = ".") + 
  scale_color_viridis_c(option = "A") + 
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        text = element_text(size = 6)) + 
  coord_fixed()+
  xlim(2, 8)
ggsave(paste0(prefix, "_deskewed.png"), width = 4, height = 4, units = "in", dpi = 300)

# clustering po and pb
debarcoded.ffs <- cluster_fcbFlowFrame(debarcoded.ffs,
                                      channel = c("pacific_orange_a"),
                                      levels = 6,
                                      opt = "fisher")

debarcoded.ffs <- cluster_fcbFlowFrame(debarcoded.ffs,
                                      channel = c("pacific_blue_a"),
                                      levels = 8,
                                      opt = "fisher")

## assigning po and pb
debarcoded.ffs <- assign_fcbFlowFrame(debarcoded.ffs,
                                  channel = c("pacific_orange_a"),
                                  likelihoodcut = 10,
                                  ambiguitycut = 0.4)


debarcoded.ffs <- assign_fcbFlowFrame(debarcoded.ffs,
                                  channel = c("pacific_blue_a"),
                                  likelihoodcut = 10,
                                  ambiguitycut = 0.4)

# get assignments and split by debarcoded well
myassignments <- getAssignments(debarcoded.ffs)
debarcoded.fss <- split(mystained, getAssignments(debarcoded.ffs))

# set plot parameters and labels
mypal <- c("grey50", scales::hue_pal()(8))
label_df <- 
      tibble(pb_deskewed = debarcoded.ffs@barcodes[["pacific_blue_a"]][["deskewing"]][["values"]],
       po_deskewed = debarcoded.ffs@barcodes[["pacific_orange_a"]][["deskewing"]][["values"]],
       pb_level = as.numeric(debarcoded.ffs@barcodes[["pacific_blue_a"]][["assignment"]][["values"]]),
       po_level = as.numeric(debarcoded.ffs@barcodes[["pacific_orange_a"]][["assignment"]][["values"]])) %>%
      dplyr::filter(pb_level !=0, po_level!=0) %>%
        mutate(well = paste0(LETTERS[pb_level],  po_level+(first_col-1))) %>%
        group_by(well) %>%
        summarise_if(is.numeric, median)
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors,
                             rownames(qual_col_pals)))
values = sample(col_vector)

# plot well level
plot.well<-tibble(pb_deskewed = debarcoded.ffs@barcodes[["pacific_blue_a"]][["deskewing"]][["values"]],
       po_deskewed = debarcoded.ffs@barcodes[["pacific_orange_a"]][["deskewing"]][["values"]],
       pb_level = as.factor(debarcoded.ffs@barcodes[["pacific_blue_a"]][["assignment"]][["values"]]),
       po_level = as.factor(debarcoded.ffs@barcodes[["pacific_orange_a"]][["assignment"]][["values"]])) %>%
        dplyr::filter(pb_level !=0, po_level!=0) %>%
        mutate(well = paste0(LETTERS[pb_level], po_level)) %>%
  ggplot(aes(y= pb_deskewed, x = po_deskewed, col = well)) +
  geom_point(shape = ".",alpha = 0.3)   +
  scale_color_manual(values = values, guide = "none") + 
  coord_fixed() + 
  geom_text(data = label_df, size = 2.5,aes(label=  well), color = "black") + theme_bw()+
  xlim(2, 8)

ggsave(paste0(prefix, "_well_all.png"), plot.well, width = 4, height = 4, units = "in", dpi = 300)

# plot pb level
plot.pb<-tibble(pb_deskewed = debarcoded.ffs@barcodes[["pacific_blue_a"]][["deskewing"]][["values"]],
       po_deskewed = debarcoded.ffs@barcodes[["pacific_orange_a"]][["deskewing"]][["values"]],
       pb_level = as.factor(debarcoded.ffs@barcodes[["pacific_blue_a"]][["assignment"]][["values"]]),
       po_level = as.factor(debarcoded.ffs@barcodes[["pacific_orange_a"]][["assignment"]][["values"]])) %>%
  ggplot(aes(y= pb_deskewed, x = po_deskewed, col = pb_level)) +
  geom_point(shape = ".",alpha = 0.3)  +
  scale_color_manual(values = mypal, guide = "none") + 
  coord_fixed() + 
  geom_text(data = label_df, size = 2.5,aes(label=  well), color = "black")  + theme_bw()+
  xlim(2, 8)

ggsave(paste0(prefix, "_well_pblevel.png"), plot.pb, width = 4, height = 4, units = "in", dpi = 300)

# plot po level
plot.po<-tibble(pb_deskewed = debarcoded.ffs@barcodes[["pacific_blue_a"]][["deskewing"]][["values"]],
       po_deskewed = debarcoded.ffs@barcodes[["pacific_orange_a"]][["deskewing"]][["values"]],
       pb_level = as.factor(debarcoded.ffs@barcodes[["pacific_blue_a"]][["assignment"]][["values"]]),
       po_level = as.factor(debarcoded.ffs@barcodes[["pacific_orange_a"]][["assignment"]][["values"]])) %>%
  ggplot(aes(y= pb_deskewed, x = po_deskewed, col = po_level)) +
  geom_point(shape = ".",alpha = 0.3)  +
  scale_color_manual(values = mypal, guide = "none") + 
  coord_fixed() + 
  geom_text(data = label_df, size = 2.5,aes(label=  well), color = "black")  + theme_bw()+
  xlim(2, 8)

ggsave(paste0(prefix, "_well_polevel.png"), plot.po,width = 4, height = 4, units = "in", dpi = 300)

## make a plate map and apply it to update the pData slot
myplatemap <- tibble(
  `Pacific Blue-A` = rep(1:8, times = 6),
  `Pacific Orange-A` = rep(1:6, each = 8)
) %>% 
  mutate(well = paste0(LETTERS[`Pacific Blue-A`], str_pad(`Pacific Orange-A`+(first_col-1), side = "left", width = 2, pad = "0"))) %>%
  janitor::clean_names() %>%
  mutate_all(as.character)

debarcoded.fss <- apply_platemap(debarcoded.fss,
                                myplatemap,
                                prefix = prefix)

## STEP 12

## This step makes a new fcs file for each well and saves those to your working directory in a folder called "debarcoded

# undoing transformations and compensations
gs.i<- GatingSet(debarcoded.fss)
debarcoded.fss.inverted<- transform(gs.i, invertTransformerList(exp_info$transforms))
debarcoded.fss.decompensated <- compensate(debarcoded.fss.inverted,
                          solve(exp_info[["compensations"]][[comp_name]])) 

# get pop data and output debarcoded files
flowset = gs_pop_get_data(debarcoded.fss.decompensated)
outdir <- "debarcoded"
write.flowSet(flowset, outdir = outdir)
#now a total of 98 fcs files in the debarcoded folder - 49 files/fcb, 49 files/stained
#48 = 1 file/well
#1 file for unassigned cells

## STEP 13

# clone experiment, upload debarcoded files
# data info for clone
todaysdate <- Sys.Date()
newname <- paste0(initials, " - ",experiment.id, "_", todaysdate, "_", plate_name, "_",cell_type,"_",stain_gate, " " ,"(",columns," Debarcoded)")

# get UTC/FCB/stain files so that they are part of the cloned experiment
all_file_rows <- data.frame(exp_info$fcs_files[,c(1,3)])
FCB_file_row <- all_file_rows[all_file_rows$filename ==FCB_file,]
stain_file_row <- all_file_rows[all_file_rows$filename ==stain_file,]

# clone the original experiment
fileID = c(FCB_file_row$originalId, stain_file_row$originalId)
new.experiment.id <-
  CytobankAPI::experiments.clone_selective(
    cyto_session,
    experiment.id,
    newname,
    fcs_files = as.numeric(fileID),
    clone_gates = T,
    clone_compensations = T,
    clone_user_access = T,
    clone_annotations = T,
    clone_project = T
  )

# upload newly debarcoded fcs files to clone
upload.list <- list.files( "debarcoded", recursive = T, pattern = ".fcs")
zipped.path <- utils::zip(zipfile = paste0('Upload_', todaysdate),
                          files = file.path( "debarcoded", upload.list))
paste0('Upload_', todaysdate)
upload.output <- fcs_files.upload_zip(cyto_session,
                                      new.experiment.id$id[[1]],
                                      paste0('Upload_', todaysdate, ".zip"), timeout = 500
)
#now all 98 files are on Cytobank

##NOTE: it can take up to 5 mins (usually a lot quicker) for all of the files to upload to the new Cytobank experiment, there should be 101 total, DON'T MOVE ON UNTIL ALL FILES ARE ON CYTOBANK, otherwise the sample tags will not be appropriately applied

## STEP 15

#pad plate.col column with zeros
platemap <- my.plate.map %>%
  arrange(plate.col) %>%
  mutate(plate.col = str_pad(as.character(plate.col), 2, pad = "0"))

# view platemap as sanity check
platemap
table(platemap$plate.col, platemap$plate.row) #makes sure that there is something assigned for every row and column

# download existing sample tags from cytobank
sampletags.path <- sample_tags.download(cyto_session, new.experiment.id[[1]])
sampletags.tb <- read_tsv(sampletags.path)

# update existing sample tags with new platemap info
sampletags.tb.updated <-
  sampletags.tb %>%
  mutate(well = substr(stringr::str_extract(sampletags.tb$`FCS Filename`, "_[[:upper:]]\\d{2}.fcs"),2,4)) %>%
  mutate(`Plate Column` = substr(well, 2,3),
         `Plate Row` = substr(well,1,1)) %>%
  select(-well)%>%
  left_join(platemap[c((1+(first_col-1)*8):(48+(first_col-1)*8)),], by = c("Plate Column" = "plate.col", "Plate Row" = "plate.row")) %>%
  mutate(Conditions = if_else(is.na(Conditions), "-", Conditions)) %>%
  mutate(Doses = if_else(is.na(Doses), "-",Doses))

# upload sample tags to Cytobank
sampletags.path.updated <- write_tsv(sampletags.tb.updated, sampletags.path)
print("Uploading Sample Tags")
sampletags.uploaded <- sample_tags.upload(cyto_session,  new.experiment.id[[1]], sampletags.path)

#this process should be fairly instantaneous

## STEP 16

todaysdate <- Sys.Date()
setwd(paste0(base_directory,"/", plate_name,"_",columns))

# fetch new cytobank exp info
exp_info <- fetchCytobankExperiment(cyto_session, new.experiment.id[[1]])

#now 101 files - 98 from debarcoded FCB and stained cells, 3 original, not debarcoded fcs files (UTC, FCB, stain)

# find debarcoded files in working directory
filelist1<- list.files(pattern = ".fcs", recursive = T)

# read debarcoded files and apply scales and compensation
myflowset <- read.flowSet(filelist1)
mygatingset <- GatingSet(myflowset)
mygatingset_compensated <-compensate(mygatingset, exp_info[["compensations"]][[comp_name]])
mygatingset_scaled <- transform(mygatingset, exp_info$transforms)
fs <- gs_pop_get_data(mygatingset)
hello<-cytoset_to_flowSet(fs)
colnames(hello) <- exp_info[["panels"]][[1]]

# create data frame with all fcb and stained debarcoded data
tidydata <- as.data.frame(hello, use_longnames = T) %>%
  as_tibble() %>% 
  dplyr::mutate(`FCS Filename` = basename(`FCS Filename`)) %>%
  left_join(exp_info$sampletags, by = c("FCS Filename" = "FCS.Filename"))

#upload "sample type tags" so we know which cells should be red and which should be black
tidydata$`Sample.Type` <- "placeholder"
tidydata$`Sample.Type`[grepl("Stain", tidydata$`FCS Filename`)] <- "Stain"
tidydata$`Sample.Type`[grepl("FCB", tidydata$`FCS Filename`)] <- "FCB"

#double check that there are only Stain and FCB assigned "Sample.Type" cells (sanity check)
unique(tidydata$`Sample.Type`)

# rename channels for plotting
colnames(tidydata)[8]<-c("PB")
colnames(tidydata)[9]<-c("PO")

markers <- colnames(tidydata)[grep("PB", colnames(tidydata)):(grep("Ax700 Viability", colnames(tidydata))-1)]

# plot pb v po for sanity check - make sure that all cells are present
tidydata %>%
  sample_n(25000) %>%
  ggplot(aes(x=PO, y = PB)) + 
  geom_pointdensity(adjust = 0.01, shape = ".") + 
  scale_color_viridis_c(option = "A") + 
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        text = element_text(size = 6)) + 
  coord_fixed()


# organize data and create columns with x-axis labels for the output plot
tidydata$pb<-as.numeric(fct_rev(as.factor(tidydata$Plate.Row)))
tidydata$po<-as.numeric(fct_rev(as.factor(tidydata$Plate.Column)))
tidydata$Well <- paste0("POL",tidydata$po,"_","PBL",tidydata$pb)
tidydata$WELL<-as.numeric(as.factor(tidydata$Well))
tidydata$WELL <- factor(tidydata$WELL, levels=unique(tidydata$WELL))
tidydata$WELL <- str_pad(tidydata$WELL, side = "left", width = 2, pad = "0")
tidydata$letternum <- paste(tidydata$Plate.Row,tidydata$Plate.Column,sep = "")
tidydata$Dose <- str_pad(tidydata$Dose, side = "left", width = 2, pad = "0")
#$longname <- paste(tidydata$WELL,"_",tidydata$Dose,"_",tidydata$Well, sep = "")
tidydata$longname <- paste(tidydata$Dose,"_",tidydata$letternum,"_",tidydata$Well,sep = "")
tidydata.sorted <- tidydata[rev(order(tidydata$Sample.Type)), ]

#double check marker names
colnames(tidydata.sorted)

# scale the y-axis for each marker
scales_y <- list()
for (s in c(markers)){
  scales_y[[s]] <- scale_y_continuous(limits = c(-1, 8))
}

# create debarcodeR output plot!
plot.i <-
  tidydata.sorted %>%
  gather(Marker, Value, paste(markers)) %>%
  ggplot(aes(y=Value, x = `longname`, color = `Sample.Type`)) +
  geom_point(shape = ".", position = position_jitter(height = 0.1),alpha = 0.5) + #facet_grid(Marker~Dose, scales = "free",space =     "free_x") + 
  facet_grid_sc(Marker~Condition,scales = list(x = "free",y = scales_y),space = "free_x")+scale_y_continuous()+
  labs(title = paste0(experiment.id, "_", Sys.Date(), "_", plate_name, "_", columns, "_", cell_type, "_", stain_gate)) +
  scale_color_manual(values=c("red", "black")) +
  guides(color = guide_legend(override.aes = list(size=5))) +
  theme_bw() +  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.35), 
        panel.spacing.y = unit(0.2, "in")) + theme(legend.position="none") 

#save debarcodeR output plot!
ggsave(
  paste0(experiment.id, "_", todaysdate, "_", plate_name, "_", columns, "_Rothko.png"),
  plot.i,
  width = 18,
  height = 12,
  units = "in",
  dpi = 300
)