# Load all libraries
# If you get an error message, you will need to try re-installing packages
library(FlowSOM)
library(flowCore)
library(Biobase)
library(gplots)
library(ggplot2)
library(hexbin)
library(MEM)
library(tidyverse)
library(Rtsne)
library(uwot)
library(viridis)
library(ggExtra)
# read FCS files into R
setwd(paste(getwd(), "/datafiles/PBMC", sep = ""))
files <-  dir(pattern = "*.fcs")

# Run MEM on the manually gated populations (each FCS files is a pop) from paper
MEM.values.orig = MEM(
  files,
  transform = TRUE,
  cofactor = 15,
  choose.markers = FALSE,
  markers = "12:20,22:23,25:33,35:36,38:40",
  choose.ref = FALSE,
  zero.ref = FALSE,
  rename.markers = FALSE,
  new.marker.names = "CD19,CD117,CD11b,CD4,CD8,CD20,CD34,CD61,CD123,CD45RA,CD45,CD10,CD33,CD11c,CD14,CD69,CD15,CD16,CD44,CD38,CD25,CD3,IgM,HLA-DR,CD56",
  file.is.clust = TRUE,
  add.fileID = FALSE,
  IQR.thresh = NULL
)

# build MEM heatmap and output enrichment scores
build.heatmaps(
  MEM.values.orig,
  cluster.MEM = "both",
  display.thresh = 1,
  newWindow.heatmaps = FALSE,
  output.files = FALSE,
  labels = TRUE,
  only.MEMheatmap = FALSE
)

# prepare data for use in UMAP
data <- lapply(lapply(files, read.FCS), exprs)
ID <- c(1:length(data))
combined.data = as.data.frame(do.call(rbind, mapply(
  cbind, data, "File ID" = ID, SIMPLIFY = F
)))
combined.data$`File ID` <- as.numeric(combined.data$`File ID`)
chosen.markers = combined.data[, c(12:20, 22:23, 25:33, 35:36, 38:40)]
transformed.chosen.markers <- chosen.markers %>%
  mutate_all(function(x)
    asinh(x / 15))
overall_seed = 43
# Run UMAP on all surface markers
set.seed(overall_seed)
myumap <-
  umap(transformed.chosen.markers,
       ret_model = TRUE,
       n_threads = 1, 
       verbose = TRUE)
umap.data = as.data.frame(myumap$embedding)

range <- apply(apply(umap.data, 2, range), 2, diff)
graphical.ratio <- (range[1] / range[2])

# UMAP flat dot plot and density dot plot
UMAP.plot <- data.frame(x = umap.data[, 1], y = umap.data[, 2])

ggplot(UMAP.plot) + coord_fixed(ratio = graphical.ratio) + 
  geom_point(aes(x = x, y = y), cex = 1) + labs(x = "UMAP 1", y = "UMAP 2", 
                                                title = "UMAP on PBMC Data") + 
  theme_bw() + 
  labs(caption = "Data from Digggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63")

ggplot(UMAP.plot, aes(x = x, y = y)) + coord_fixed(ratio = graphical.ratio)  + 
  geom_bin2d(bins = 128) +
  scale_fill_viridis_c(option = "A", trans = "sqrt") + 
  scale_x_continuous(expand = c(0.1, 0)) +
  scale_y_continuous(expand = c(0.1, 0)) + labs(x = "UMAP 1", y = "UMAP 2", 
                                                title = "UMAP on PBMC Data") + 
  theme_bw() + 
  labs(caption = "Data from Diggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63")
# Run FlowSOM on the UMAP axes
umap.matrix <- as.matrix(umap.data)

# create flowFrame
UMAP.metadata <-
  data.frame(name = dimnames(umap.matrix)[[2]],
             desc = paste('UMAP', dimnames(umap.matrix)[[2]]))
UMAP.metadata$range <- apply(apply(umap.matrix, 2, range), 2, diff)
UMAP.metadata$minRange <- apply(umap.matrix, 2, min)
UMAP.metadata$maxRange <- apply(umap.matrix, 2, max)
umap.flowframe <- new("flowFrame",
                      exprs = umap.matrix,
                      parameters = AnnotatedDataFrame(UMAP.metadata))

# implement the FlowSOM on the data
fsom <-
  FlowSOM(
    umap.flowframe,
    compensate = FALSE,
    transform = FALSE,
    toTransform = c(1:2),
    scale = TRUE,
    colsToUse = c(1:2),
    nClus = 10,
    seed = overall_seed
  )
FlowSOM.clusters <-
  as.matrix(fsom[[2]][fsom[[1]]$map$mapping[, 1]])

# plot FlowSOM clusters on UMAP axes
ggplot(UMAP.plot) + coord_fixed(ratio=graphical.ratio) + 
  geom_point(aes(x=x, y=y, color=FlowSOM.clusters),cex = 1.5) + 
  labs(x = "UMAP 1", y = "UMAP 2",title = "FlowSOM Clustering on UMAP Axes", 
       color = "FlowSOM Cluster") + theme_bw() + 
  guides(colour = guide_legend(override.aes = list(size=5)))+
  labs(caption = "Data from Diggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63")
# Run MEM on the FlowSOM clusters from UMAP
cluster = as.numeric(as.vector((FlowSOM.clusters)))
MEM.data = cbind(transformed.chosen.markers, cluster)

MEM.values.uf = MEM(
  MEM.data,
  transform = FALSE,
  cofactor = 15,
  choose.markers = FALSE,
  markers = "all",
  choose.ref = FALSE,
  zero.ref = FALSE,
  rename.markers = FALSE,
  new.marker.names = "CD19,CD117,CD11b,CD4,CD8,CD20,CD34,CD61,CD123,CD45RA,CD45,CD10,CD33,CD11c,CD14,CD69,CD15,CD16,CD44,CD38,CD25,CD3,IgM,HLA-DR,CD56",
  file.is.clust = FALSE,
  add.fileID = FALSE,
  IQR.thresh = NULL
)

# build MEM heatmap and output enrichment scores
build.heatmaps(
  MEM.values.uf,
  cluster.MEM = "both",
  cluster.medians = "none",
  display.thresh = 1,
  newWindow.heatmaps = FALSE,
  output.files = FALSE,
  labels = TRUE,
  only.MEMheatmap = FALSE
)
# RMSD to compare labels from all populations
orig.MEM.scores = as.data.frame(MEM.values.orig[[5]])
rownames(orig.MEM.scores) = paste0(rownames(orig.MEM.scores), " (Fig.1)")
uf.MEM.scores = as.data.frame(MEM.values.uf[[5]])
rownames(uf.MEM.scores) = paste0(rownames(uf.MEM.scores), ' (UMAP)')
all.MEM.values = as.matrix(rbind(uf.MEM.scores, orig.MEM.scores))

RMSD_vals <-
  MEM_RMSD(
    all.MEM.values,
    format = NULL,
    newWindow.heatmaps = FALSE,
    output.matrix = FALSE
  )


JonathanIrish/MEMv3 documentation built on July 14, 2019, 10:43 p.m.