Analysis of leukemia patient blood data, from which generated Figure 4

library(CytoSpill)
library(flowCore)
library(ggplot2)
library(Rtsne)
library(dplyr)
library(RColorBrewer)
library(reshape2)
library(Rphenograph)

read data

#read expression
data_Bo <- flowCore::exprs(flowCore::read.FCS("/Users/qmiao/CytoSpill copy/data/Bo_862043_labeled.fcs", transformation = FALSE, truncate_max_range = FALSE))
#remove negative values if any
data_Bo[which(data_Bo<0)] <- 0

Bo_marker <- as.character(read.csv(file="/Users/qmiao/CytoSpill copy/data/Bo_marker.csv", header=F)$V1)
names(Bo_marker) <- colnames(data_Bo)

population_names <- c("B_Cells", "CD4_T_cells", "CD8_T_cells", "NK cells")

select channels used for compensation and analysis

data_Bo_temp <- data_Bo[,c(1:4,6:15,17:32,34:37,39:50)]
#remove duplicates
duplicates_id <- duplicated(data_Bo_temp)
data_Bo_temp <- data_Bo_temp[!duplicates_id,]

use CytoSpill for compensation

Bo_results <- SpillComp(data = data_Bo_temp, cols = 1:46, n = 20000, threshold = 0.1, flexrep = 5, neighbor = 1)
compensated_Bo <- Bo_results[[1]]

add back population label

##compensated exprs
compensated_Bo_exprs <- as.data.frame(flowCore::exprs(compensated_Bo))
compensated_Bo_exprs[,"label"] <- as.factor(data_Bo[,"label"][!duplicates_id])
##uncompensated exprs
data_Bo_temp <- as.data.frame(data_Bo_temp)
data_Bo_temp[,"label"] <- as.factor(data_Bo[,"label"][!duplicates_id])

downsample

# downsample for faster calculation, plotting
nsample = 20000
# subsample
set.seed(123)
rowsample <- sample(nrow(data_Bo_temp), nsample)
compensated_Bo_exprs_downsample <- compensated_Bo_exprs[rowsample,]
data_Bo_temp_downsample <- data_Bo_temp[rowsample,]

#function to censor data, for clear heatmap
censor_dat <- function (x, a = 0.99){
  q = quantile(x, a)
  x[x > q] = q
  return(x)
}

#function for arcsinh transform
transf <- function (x){asinh(x/5)}

Run Rphenograph

calculate_pheno <- function (data, cols, asinhtransfer = T){
  if (asinhtransfer <- T) {
    data[,cols] <- transf(data[,cols])
  }
  pheno_out <- Rphenograph::Rphenograph(data[,cols])
  cluster <- igraph::membership(pheno_out[[2]])
  return(cluster)
}
uncompensated_pheno <- calculate_pheno(data_Bo_temp_downsample, cols = 1:46)
compensated_pheno <- calculate_pheno(compensated_Bo_exprs_downsample, cols = 1:46)

calculate tsne maps on uncompensated data

calculate_tsne <- function (data, cols, asinhtransfer = T, verbose = T, dims = 2, seed = 123){
  set.seed(seed)
  tsne_dat <- data[,cols]
  #asinh/5 transfer
  if (asinhtransfer) {tsne_dat <- transf(tsne_dat)}
  tsne_out <- Rtsne::Rtsne(tsne_dat, verbose = verbose, dims = dims)
  return(tsne_out)
}

#calculate based on uncompensated data
uncompensated_tsne =  calculate_tsne(data_Bo_temp_downsample, cols = 1:46)

# Setup some colors for plotting
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))

plot population label on uncompensated tsne

levels(data_Bo_temp_downsample$label) <- c(population_names)
tclust = data_Bo_temp_downsample[,"label"]

tsne_coor <- uncompensated_tsne$Y
colnames(tsne_coor) <- c("tsne_1", "tsne_2")

col_list  <- c("#DC050C", "#1965B0", "#882E72",
               "#FF7F00", "#E7298A", "#E78AC3",
               "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D",
               "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999",
               "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000",
               "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00")

p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+
  geom_point(size=0.3, alpha=1, aes(color=as.factor(tclust)))+
  scale_color_manual(values = col_list, name = "cell type")+
  ggtitle('Bo uncompensated data tsne')+
  guides(color=guide_legend(override.aes=list(size=5)))+
  theme(strip.background = element_blank(),
        panel.background=element_rect(fill='white', colour = 'black'),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank(),
        legend.key = element_blank())
p

plot Rphenograhp cluster on uncompensated tsne

tclust = uncompensated_pheno

tsne_coor <- uncompensated_tsne$Y
colnames(tsne_coor) <- c("tsne_1", "tsne_2")

col_list  <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", 
               "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", 
               "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", 
               "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", 
               "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", 
               "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00")

p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+
  geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+
  scale_color_manual(values = col_list, name = "Phenograph cluster")+
  ggtitle('Bo uncompensated data tsne with Phenograph clusters')+
  guides(color=guide_legend(override.aes=list(size=5),ncol=2))+
  theme(strip.background = element_blank(),
        panel.background=element_rect(fill='white', colour = 'black'),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank(),
        legend.key = element_blank())
p

calculate tsne on compensated data

compensated_tsne =  calculate_tsne(compensated_Bo_exprs_downsample, cols = 1:46)

plot population on compensated tsne

levels(compensated_Bo_exprs_downsample$label) <- c(population_names)
tclust = compensated_Bo_exprs_downsample[,"label"]

tsne_coor <- compensated_tsne$Y
colnames(tsne_coor) <- c("tsne_1", "tsne_2")

col_list  <- c("#DC050C", "#1965B0", "#882E72",
               "#FF7F00", "#E7298A", "#E78AC3",
               "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D",
               "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999",
               "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000",
               "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00")

p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+
  geom_point(size=0.3, alpha=1, aes(color=as.factor(tclust)))+
  scale_color_manual(values = col_list, name = "cell type")+
  ggtitle('Bo compensated data tsne')+
  guides(color=guide_legend(override.aes=list(size=5)))+
  theme(strip.background = element_blank(),
        panel.background=element_rect(fill='white', colour = 'black'),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank(),
        legend.key = element_blank())
p

plot Rphenograhp cluster on compensated tsne

tclust = compensated_pheno

tsne_coor <- compensated_tsne$Y
colnames(tsne_coor) <- c("tsne_1", "tsne_2")

col_list  <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", 
               "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", 
               "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", 
               "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", 
               "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", 
               "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00")

p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+
  geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+
  scale_color_manual(values = col_list, name = "Phenograph cluster")+
  ggtitle('Bo compensated data tsne with Phenograph clusters')+
  guides(color=guide_legend(override.aes=list(size=5),ncol=2))+
  theme(strip.background = element_blank(),
        panel.background=element_rect(fill='white', colour = 'black'),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank(),
        legend.key = element_blank())
p

uncompensated marker density plots

the following plots the asinh(x/5) transformed intensities were normalized between 0-1 by using the 0.99 percentile of the data.

pdat <- transf(data_Bo_temp_downsample[,-47])
censor_pdat <- apply(pdat, 2, censor_dat)

censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X)))

### add colnames
for (i in seq_along(Bo_marker)){
  names(Bo_marker)[i] <- paste(names(Bo_marker)[i], Bo_marker[i], sep ='-')
}
dimnames(censor_pdat)[[2]] <- names(Bo_marker[c(1:4,6:15,17:32,34:37,39:50)])

censor_pdat <- as.data.frame(censor_pdat)
censor_pdat$tsne_1 <- uncompensated_tsne$Y[,1]
censor_pdat$tsne_2 <- uncompensated_tsne$Y[,2]

pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel")

p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+
    facet_wrap(~channel, scales = "free", ncol = 8)+
    geom_point(alpha=0.5, size=0.3)+
    scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+
    ggtitle("Uncomepensated Bo markers")+
    theme(strip.background = element_blank(),
          strip.text.x = element_text(size = 11),
          axis.line=element_blank(),
          axis.text.x=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          panel.background=element_blank(),
          panel.border=element_blank(),
          panel.grid.major=element_blank(),
          panel.grid.minor=element_blank(),
          plot.background=element_blank()) 
p
  #ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Bo_uncompensated_marker.png", plot = p,width=15, height=8, dpi = 300)

compensatd marker density plot

pdat <- transf(compensated_Bo_exprs_downsample[,-47])
censor_pdat <- apply(pdat, 2, censor_dat)

censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X)))

dimnames(censor_pdat)[[2]] <- names(Bo_marker[c(1:4,6:15,17:32,34:37,39:50)])

censor_pdat <- as.data.frame(censor_pdat)
censor_pdat$tsne_1 <- uncompensated_tsne$Y[,1]
censor_pdat$tsne_2 <- uncompensated_tsne$Y[,2]

pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel")

p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+
    facet_wrap(~channel, scales = "free", ncol = 8)+
    geom_point(alpha=0.5, size=0.3)+
    scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+
    ggtitle("Compensated Bo markers")+
    theme(strip.background = element_blank(),
          strip.text.x = element_text(size = 11),
          axis.line=element_blank(),
          axis.text.x=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          panel.background=element_blank(),
          panel.border=element_blank(),
          panel.grid.major=element_blank(),
          panel.grid.minor=element_blank(),
          plot.background=element_blank()) 
p

  #ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Bo_compensated_marker.png", plot = p,width=15, height=8, dpi = 300)

compensatd marker density plot on compensated tsne plot

pdat <- transf(compensated_Bo_exprs_downsample[,-47])
censor_pdat <- apply(pdat, 2, censor_dat)

censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X)))

dimnames(censor_pdat)[[2]] <- names(Bo_marker[c(1:4,6:15,17:32,34:37,39:50)])

censor_pdat <- as.data.frame(censor_pdat)
censor_pdat$tsne_1 <- compensated_tsne$Y[,1]
censor_pdat$tsne_2 <- compensated_tsne$Y[,2]

pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel")

p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+
    facet_wrap(~channel, scales = "free", ncol = 8)+
    geom_point(alpha=0.5, size=0.3)+
    scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+
    ggtitle("Compensated Bo markers on compensated tsne")+
    theme(strip.background = element_blank(),
          strip.text.x = element_text(size = 11),
          axis.line=element_blank(),
          axis.text.x=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          panel.background=element_blank(),
          panel.border=element_blank(),
          panel.grid.major=element_blank(),
          panel.grid.minor=element_blank(),
          plot.background=element_blank()) 
p
  #ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Bo_compensated_marker_on_compensated_tsne.png", plot = p,width=15, height=8, dpi = 300)

draw histograms

library(reshape2)

plot_multi_histogram <- function(df, feature, label_column, cutoff) {
    plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
    geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") +
    geom_density(alpha=0.2) +
    geom_vline(aes(xintercept=cutoff), color="black", linetype="dashed", size=1) +
    labs(x=feature, y = "Density") +
    theme_classic()
    plt + guides(fill=guide_legend(title=label_column)) +
    scale_fill_discrete(labels = c("Uncompensated", "Compensated"))
}

prep_hist_pdata <- function(df1, df2, feature, cutoffs) {
  exprs1 <- df1[feature]
  exprs1 <- exprs1[,]
  # exprs1 <- exprs1[which(exprs1>0),]
  exprs2 <- df2[feature]
  exprs2 <- exprs2[,]
  # exprs2 <- exprs2[which(exprs2>0),]
  label <- c(rep("dat1", length(exprs1)), rep("dat2", length(exprs2)))
  df <- cbind(c(exprs1,exprs2), label)
  colnames(df) <- c("value", "label")
  df <- as.data.frame(df)
  df[,1] <- as.numeric(as.character(df[,1]))
  df[,1] <- asinh(df[,1]/5)
  cutoff <- asinh(cutoffs[match(feature, colnames(df1))]/5)
  return(list(df,cutoff))
}

bo_pt194di <- prep_hist_pdata(data_Bo_temp_downsample, compensated_Bo_exprs_downsample, feature = "Pt194Di", cutoffs = Bo_results[[3]])
plot_multi_histogram(bo_pt194di[[1]], feature="value", label_column = "label", cutoff = bo_pt194di[[2]])
write.FCS(flowFrame(as.matrix(data_Bo_temp[,-47])), filename = "~/CytoSpill copy/data/flowSOM/data_Bo_temp.fcs")
write.FCS(flowFrame(as.matrix(compensated_Bo_exprs[,-47])), filename = "~/CytoSpill copy/data/flowSOM/compensated_Bo_exprs.fcs")
save.image("~/CytoSpill copy/data/Bo_analysis.Rdata")
sessionInfo()


KChen-lab/CytoSpill documentation built on June 15, 2020, 6:12 p.m.