sconeTutorial.R
In scone: Single Cell Overview of Normalized Expression data

## ----options, include=FALSE, cache=FALSE, results='hide', message=FALSE-------

knitr::opts_chunk$set(fig.align="center", cache=FALSE,error=FALSE,
                      fig.width=6,fig.height=6,autodep=TRUE,
                      out.width="600px", out.height="600px",
                      results="markup", echo=TRUE, eval=TRUE)

options(getClass.msg=FALSE)

set.seed(6473) ## for reproducibility

library(scone)
library(RColorBrewer)


## ----datain, message=FALSE----------------------------------------------------

library(scRNAseq)

## ----- Load Example Data -----
fluidigm <- ReprocessedFluidigmData(assays = "rsem_counts")

## ----showqc-------------------------------------------------------------------

## ----- List all QC fields -----

# List all qc fields (accessible via colData())
metadata(fluidigm)$which_qc


## ----biocoverage--------------------------------------------------------------

# Joint distribution of "biological condition"" and "coverage type""
table(colData(fluidigm)$Coverage_Type,
      colData(fluidigm)$Biological_Condition)


## ----prefilter----------------------------------------------------------------

# Preliminary Sample Filtering: High-Coverage Only
is_select = colData(fluidigm)$Coverage_Type == "High"
fluidigm = fluidigm[,is_select]

# Retain only detected transcripts
fluidigm = fluidigm[which(apply(assay(fluidigm) > 0,1,any)),]


## ----ralign-------------------------------------------------------------------

# Define a color scheme
cc <- c(brewer.pal(9, "Set1"))

# One batch per Biological Condition
batch = factor(colData(fluidigm)$Biological_Condition)

# Alignment Quality Metrics
qc = colData(fluidigm)[,metadata(fluidigm)$which_qc]

# Barplot of read proportion mapping to human transcriptome
ralign = qc$RALIGN
o = order(ralign)[order(batch[order(ralign)])] # Order by batch, then value

barplot(ralign[o], col=cc[batch][o], 
        border=cc[batch][o], main="Percentage of reads mapped")
legend("bottomleft", legend=levels(batch), fill=cc,cex=0.4)


## ----nreads-------------------------------------------------------------------

# Barplot of total read number
nreads = qc$NREADS
o = order(nreads)[order(batch[order(nreads)])] # Order by batch, then value

barplot(nreads[o], col=cc[batch][o], 
        border=cc[batch][o], main="Total number of reads")
legend("topright", legend=levels(batch), fill=cc, cex=0.4)


## ----qpc----------------------------------------------------------------------

## ----- PCA of QC matrix -----
qpc = prcomp(qc,center = TRUE,scale. = TRUE)
barplot((qpc$sdev^2)/sum(qpc$sdev^2), border="gray", 
        xlab="PC", ylab="Proportion of Variance", main="Quality PCA")


## ----qpc_view-----------------------------------------------------------------

# Barplot of PC1 of the QC matrix
qc1 = as.vector(qpc$x[,1])
o = order(qc1)[order(batch[order(qc1)])]

barplot(qc1[o], col=cc[batch][o], 
        border=cc[batch][o], main="Quality PC1")
legend("bottomright", legend=levels(batch), 
       fill=cc, cex=0.8)


## ----fnr_fit------------------------------------------------------------------

# Extract Housekeeping Genes
data(housekeeping)
hk = intersect(housekeeping$V1,rownames(assay(fluidigm)))

# Mean log10(x+1) expression
mu_obs = rowMeans(log10(assay(fluidigm)[hk,]+1))

# Assumed False Negatives
drop_outs = assay(fluidigm)[hk,] == 0

# Logistic Regression Model of Failure
ref.glms = list()
for (si in 1:dim(drop_outs)[2]){
  fit = glm(cbind(drop_outs[,si],1 - drop_outs[,si]) ~ mu_obs,
            family=binomial(logit))
  ref.glms[[si]] = fit$coefficients
}


## ----fnr_vis,fig.width=8,fig.height=4,out.width="800px",out.height="400px"----

par(mfrow=c(1,2))

# Plot Failure Curves and Calculate AUC
plot(NULL, main = "False Negative Rate Curves",
     ylim = c(0,1),xlim = c(0,6), 
     ylab = "Failure Probability", xlab = "Mean log10 Expression")
x = (0:60)/10
AUC = NULL
for(si in 1:ncol(assay(fluidigm))){
  y = 1/(exp(-ref.glms[[si]][1] - ref.glms[[si]][2] * x) + 1)
  AUC[si] = sum(y)/10
  lines(x, 1/(exp(-ref.glms[[si]][1] - ref.glms[[si]][2] * x) + 1),
        type = 'l', lwd = 2, col = cc[batch][si])
}

# Barplot of FNR AUC
o = order(AUC)[order(batch[order(AUC)])]

barplot(AUC[o], col=cc[batch][o], border=cc[batch][o], main="FNR AUC")
legend("topright", legend=levels(batch), fill=cc, cex=0.4)


## ----metric_sample_filter, fig.height= 10,out.height="1000px"-----------------

# Initial Gene Filtering: 
# Select "common" transcripts based on proportional criteria.
num_reads = quantile(assay(fluidigm)[assay(fluidigm) > 0])[4]
num_cells = 0.25*ncol(fluidigm)
is_common = rowSums(assay(fluidigm) >= num_reads ) >= num_cells

# Metric-based Filtering
mfilt = metric_sample_filter(assay(fluidigm),
                             nreads = colData(fluidigm)$NREADS,
                             ralign = colData(fluidigm)$RALIGN,
                             gene_filter = is_common,
                             pos_controls = rownames(fluidigm) %in% hk,

                             zcut = 3, mixture = FALSE,
                             plot = TRUE)

# Simplify to a single logical
mfilt = !apply(simplify2array(mfilt[!is.na(mfilt)]),1,any)


## ----thresh,fig.width= 6, fig.height= 4, out.width="600px",out.height="400px"----

hist(qc$RALIGN, breaks = 0:100)
# Hard threshold
abline(v = 15, col = "yellow", lwd = 2)
# 3 (zcut) standard deviations below the mean ralign value
abline(v = mean(qc$RALIGN) - 3*sd(qc$RALIGN), col = "green", lwd = 2)
# 3 (zcut) MADs below the median ralign value
abline(v = median(qc$RALIGN) - 3*mad(qc$RALIGN), col = "red", lwd = 2)
# Sufficient threshold
abline(v = NULL, col = "grey", lwd = 2)

# Final threshold is the minimum of 
# 1) the sufficient threshold and 
# 2) the max of all others
thresh = min(NULL,
             max(c(15,mean(qc$RALIGN) - 3*sd(qc$RALIGN),
                   median(qc$RALIGN) - 3*mad(qc$RALIGN))))
abline(v = thresh, col = "blue", lwd = 2, lty = 2)

legend("topleft",legend = c("Hard","SD","MAD","Sufficient","Final"),
       lwd = 2, col = c("yellow","green","red","grey","blue"),
       lty = c(1,1,1,1,2), cex = .5)


## ----filterCount--------------------------------------------------------------

goodDat = fluidigm[,mfilt]

# Final Gene Filtering: Highly expressed in at least 5 cells
num_reads = quantile(assay(fluidigm)[assay(fluidigm) > 0])[4]
num_cells = 5
is_quality = rowSums(assay(fluidigm) >= num_reads ) >= num_cells



## ----scone_init---------------------------------------------------------------


# Expression Data (Required)
expr = assay(goodDat)[is_quality,]

# Biological Origin - Variation to be preserved (Optional)
bio = factor(colData(goodDat)$Biological_Condition)

# Processed Alignment Metrics - Variation to be removed (Optional)
qc = colData(goodDat)[,metadata(goodDat)$which_qc]
ppq = scale(qc[,apply(qc,2,sd) > 0],center = TRUE,scale = TRUE)

# Positive Control Genes - Prior knowledge of DE (Optional)
poscon = intersect(rownames(expr),strsplit(paste0("ALS2, CDK5R1, CYFIP1,",
                                                  " DPYSL5, FEZ1, FEZ2, ",
                                                  "MAPT, MDGA1, NRCAM, ",
                                                  "NRP1, NRXN1, OPHN1, ",
                                                  "OTX2, PARD6B, PPT1, ",
                                                  "ROBO1, ROBO2, RTN1, ",
                                                  "RTN4, SEMA4F, SIAH1, ",
                                                  "SLIT2, SMARCA1, THY1, ",
                                                  "TRAPPC4, UBB, YWHAG, ",
                                                  "YWHAH"),split = ", ")[[1]])

# Negative Control Genes - Uniformly expressed transcripts (Optional)
negcon = intersect(rownames(expr),hk)

# Creating a SconeExperiment Object
my_scone <- SconeExperiment(expr,
                qc=ppq, bio = bio,
                negcon_ruv = rownames(expr) %in% negcon,
                poscon = rownames(expr) %in% poscon
)


## ----scone_in2----------------------------------------------------------------

## ----- User-defined function: Dividing by number of detected genes -----

EFF_FN = function (ei)
{
  sums = colSums(ei > 0)
  eo = t(t(ei)*sums/mean(sums))
  return(eo)
}

## ----- Scaling Argument -----

scaling=list(none=identity, # Identity - do nothing

             eff = EFF_FN, # User-defined function

             sum = SUM_FN, # SCONE library wrappers...
             tmm = TMM_FN, 
             uq = UQ_FN,
             fq = FQT_FN,
             deseq = DESEQ_FN)


## ----scone_in3, eval=FALSE----------------------------------------------------
#  
#  # Simple FNR model estimation with SCONE::estimate_ziber
#  fnr_out = estimate_ziber(x = expr, bulk_model = TRUE,
#                           pos_controls = rownames(expr) %in% hk,
#                           maxiter = 10000)
#  
#  ## ----- Imputation List Argument -----
#  imputation=list(none=impute_null, # No imputation
#                  expect=impute_expectation) # Replace zeroes
#  
#  ## ----- Imputation Function Arguments -----
#  # accessible by functions in imputation list argument
#  impute_args = list(p_nodrop = fnr_out$p_nodrop, mu = exp(fnr_out$Alpha[1,]))
#  
#  my_scone <- scone(my_scone,
#                  imputation = imputation, impute_args = impute_args,
#                  scaling=scaling,
#                  k_qc=3, k_ruv = 3,
#                  adjust_bio="no",
#                  run=FALSE)

## ----scone_params-------------------------------------------------------------

my_scone <- scone(my_scone,
                scaling=scaling,
                k_qc=3, k_ruv = 3,
                adjust_bio="no",
                run=FALSE)

head(get_params(my_scone))


## ----scone_params_view--------------------------------------------------------

apply(get_params(my_scone),2,unique)


## ----scone_params_filt, eval=FALSE--------------------------------------------
#  
#  is_screened = ((get_params(my_scone)$imputation_method == "expect") &
#                   (get_params(my_scone)$scaling_method %in% c("none",
#                                                               "eff")))
#  
#  my_scone = select_methods(my_scone,
#                            rownames(get_params(my_scone))[!is_screened ])
#  

## ----scone_run----------------------------------------------------------------

BiocParallel::register(
  BiocParallel::SerialParam()
) # Register BiocParallel Serial Execution

my_scone <- scone(my_scone,
                  scaling=scaling,
                  run=TRUE,
                  eval_kclust = 2:6,
                  stratified_pam = TRUE,
                  return_norm = "in_memory",
                  zero = "postadjust")


## ----scone_view1--------------------------------------------------------------

# View Metric Scores
head(get_scores(my_scone))

# View Mean Score Rank
head(get_score_ranks(my_scone))

# Extract normalized data from top method
out_norm = get_normalized(my_scone,
                          method = rownames(get_params(my_scone))[1])


## ----biplot_color-------------------------------------------------------------

pc_obj = prcomp(apply(t(get_scores(my_scone)),1,rank),
                center = TRUE,scale = FALSE)
bp_obj = biplot_color(pc_obj,y = -get_score_ranks(my_scone),expand = .6)


## ----biplot_color4------------------------------------------------------------

bp_obj = biplot_color(pc_obj,y = -get_score_ranks(my_scone),expand = .6)

points(t(bp_obj[1,]), pch = 1, col = "red", cex = 1)
points(t(bp_obj[1,]), pch = 1, col = "red", cex = 1.5)

points(t(bp_obj[rownames(bp_obj) == "none,none,no_uv,no_bio,no_batch",]),
       pch = 1, col = "blue", cex = 1)
points(t(bp_obj[rownames(bp_obj) == "none,none,no_uv,no_bio,no_batch",]),
       pch = 1, col = "blue", cex = 1.5)

arrows(bp_obj[rownames(bp_obj) == "none,none,no_uv,no_bio,no_batch",][1],
       bp_obj[rownames(bp_obj) == "none,none,no_uv,no_bio,no_batch",][2],
       bp_obj[1,][1],
       bp_obj[1,][2],
       lty = 2, lwd = 2)


## ----sconeReport, eval=FALSE--------------------------------------------------
#  
#  # Methods to consider
#  scone_methods = c(rownames(get_params(my_scone))[1:12],
#                    "none,none,no_uv,no_bio,no_batch")
#  
#  # Shiny app
#  sconeReport(my_scone,methods = scone_methods,
#              qc = ppq,
#              bio = bio,
#              negcon = negcon, poscon = poscon)
#  

## ----session------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
scone documentation built on Nov. 8, 2020, 5:20 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
scone
Single Cell Overview of Normalized Expression data

inst/doc/sconeTutorial.R
In scone: Single Cell Overview of Normalized Expression data

Try the scone package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

scone Single Cell Overview of Normalized Expression data

inst/doc/sconeTutorial.R In scone: Single Cell Overview of Normalized Expression data

Try the scone package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

scone
Single Cell Overview of Normalized Expression data

inst/doc/sconeTutorial.R
In scone: Single Cell Overview of Normalized Expression data