R/idFeatures.R
In flowStats: Statistical methods for the analysis of flow cytometry data

Documented in idFeaturesByBackgating

## Internal use only.
## Identify and labeling significant features using backgating return values and
## divisive clustering method (diana from the cluster package).
##
## input argument:
##     bg    A data.frame, must be a return value of the backgating function
##     nDim  The number of dimension of the data subject to normalization
##     thres.merge Threshold (defult to 0.2). A proportion to the high
##                 (distance) between potential features. Used to divide
##                 clusters of features.
##     lambda      Threshold (defult to 0.05). A proportion to the numbe of
##                 samples. Used to decide outliers of features.
##     weight.mfeature Weight bogus features as zero
##     plot.workflow   Plot results of workflow of 'idFeatures'
##
## return value:
##     register        A list containg features for each samples.
##

idFeaturesByBackgating <- function(bg, nDim, thres.sigma=2.5, lambda=0.1,
                                   reference.method="median",
                                   plot.workflow=FALSE,
                                   ask=names(dev.cur())!="pdf")
{   
    ## 'bg' is the return value of function flowStats:::backGating
    if (lambda > 1) {
        message("idFeatures: lambda must be between 0 to 1. Defaulting to
                 0.1. \n")
        lambda <- 0.1
    }
    
    if (! reference.method %in% c("median", "mean"))
        stop(paste(reference.method, "is not a valid method."))
    
    ## 1. take all the centoids from all the backgating and apply 'diana'
    ##    to get the clusters of features
    cent <- bg[bg$type=="centroid", ]
    # cent.translate <- .centerFigure(cent, nDim)
    
    ## cluster centered centroids using 'diana' 
    cent.clust <- .useDivCluster(cent, nDim, thres.sigma, cut.by.height=TRUE)

    ## 2. label the outliers
    filt.clust <- .filterOutlier(cent.clust, lambda=lambda, keep.outlier=TRUE,
                                 label.outlier=".outlier")

    ## 3. get reference features, do not include outliers
    ## reference <- .calcReference(filt.clust, nDim) # if keep.outlier=FALSE
    reference <- .calcReference(
                    filt.clust[!filt.clust$cluster %in% ".outlier", ], nDim,
                    method=reference.method)

    ## 4. register features for each samples (with bogus features)
    perSample <- split(filt.clust, factor(filt.clust$sample))
    register  <- lapply(perSample, .classifyFeatures,
                        refF=reference, nDim=nDim)
    ## 5. prepare return value register (containing the reference features)
   
    register$reference <- reference
    
    if (plot.workflow)
        .plotWorkFlow(cent=cent, cent.clust=cent.clust,
                      filt.clust=filt.clust, register=register, ask=ask)
    
     return(register)
}

###############################
## get the centered figure
###############################
.centerFigure <- function(cent, nDim)
{
  ## cent: centroids of the return values of backgating
  ## return translated centroids (called centered figures)
  TransMatrix <- lapply(split(cent, cent$sample),
                        function(x) mean(x[, 1:nDim], na.rm=TRUE))
  cF <- lapply(split(cent, cent$sample),
               function(x, TransMatrix) {
                   I <- matrix(1, nrow=nrow(x), ncol=1)
                   x[, 1:nDim] <- x[, 1:nDim] - I %*% TransMatrix[[x$sample[1]]]
                   x
               }, TransMatrix)
  
  return(list(centered=do.call(rbind, cF), TransMatrix=TransMatrix))
}

#################################################
## filter out outlier cluster and cluster members
## threshold = lambda * number of total features
#################################################
.filterOutlier <-function(center, lambda=0.1,
                          keep.outlier=FALSE, label.outlier=NULL)
{
    if (keep.outlier & is.null(label.outlier))
        label.outlier <- ".outlier"
    
    nclust <- table(center$cluster)
    outlier_thres <- lambda * sum(nclust)
    #outlier_thres <- median(table(center$cluster)) * lambda
    outlier <- names(nclust)[nclust < outlier_thres]
    if (length(outlier) == length(nclust))
        stop("idFeature: chose smaller lambda.")

    ifelse(keep.outlier,
       center$cluster[center$cluster %in% outlier] <- label.outlier,
       center <- center[!center$cluster %in% outlier, ])

    attr(center, "outlier_thres") <- outlier_thres
    return(center)
}

##############################
## calculate the refenrence ##
##############################
.calcReference <- function(center, nDim=NULL, method="mean")  {
  
    ## return value: ref is a data frame with columns of channels of interest,
    ## cluster, sample, and bogus.
  
    if (is.null(nDim)) stop("Dimension of the variables must be provided")
    
    ref <- do.call(rbind, lapply(split(center, center$cluster),
                   function(a) {
                      ref <- as.data.frame(t(apply(a[, 1:nDim], 2, method)))
                      ref$cluster <- a$cluster[1]
                      return(ref)
                  }))
    ref$sample <- "reference"
    ref$bogus <- rep(FALSE, nrow(ref))
    return(ref)
}


###########################################################
## use divisive hierarchical clustering, diana-algorithm ##
###########################################################
.useDivCluster <- function(center, nDim, thres.sigma=2.5,
                           cut.by.height=TRUE, k=2) {
    ## sign (or re-sign the cluster label)
    clustD  <- cluster::diana(center[, 1:nDim])
    
    ## cut into n groups by 'heightThres' or k
    if (cut.by.height) {
        ## heightThres indicates where the tree should be cut
        heightThres <- sd(clustD$height) * thres.sigma
        center$cluster <- stats::cutree(as.hclust(clustD), h=heightThres)
        attr(center, "height") <- heightThres
        
    }
    else {
        center$cluster <- stats::cutree(as.hclust(clustD), k=k)
        attr(center, "k") <- k
    }

    attr(center, "cluster") <- clustD
    return(center)
}

######################
## classifyFeatures ##
######################
.classifyFeatures <- function(eachSample, refF, nDim)
{
  ## initialize parameter: dim and dim names of reg are equeal xzto that of refF
  reg <- refF
  reg[, 1:nDim] <- NA
  reg$sample <- rep(eachSample$sample[1], nrow(refF))
  
  #reg <- data.frame(apply(refF, 2, function(x) rep(NA, length(x))),
  #                  sample=rep(eachSample$sample[1], nrow(refF)),
  #                  bogus=rep(TRUE, nrow(refF)))
  #reg$cluster <- refF$cluster
  
  perClust <- split(eachSample, factor(eachSample$cluster))

  ## ignore outliers (the label not in refF$clust
  label <- unique(eachSample$cluster)
  anyOutlier <- label[!label %in% refF$cluster]
  if (length(anyOutlier))
      perClust <- perClust[!names(perClust) %in% anyOutlier]


  dim <- seq_len(nDim)
  
  for (j in 1:length(perClust)) {
       lab <- perClust[[j]]$cluster[1]
       if (nrow(perClust[[j]]) == 1) 
            reg[reg$cluster==lab, dim] <- as.matrix(perClust[[j]][, dim])
       else {
           tmp <- rbind(refF[lab, dim], perClust[[j]][,dim])
           
           #we don't need to call compositions::dist here since tmp is data.frame
           # and it always falls back to the stats::dist without any
          # compositions::cdt transformation
           mindist <-
                   which.min(as.numeric(dist(tmp))[1:nrow(perClust[[j]])])
              
           reg[reg$cluster==lab, dim] <- as.matrix(perClust[[j]][mindist, dim])
       }
       reg$bogus[reg$cluster==lab] <- FALSE
   }

  ## 2. deal with NA peaks, give the refFeatures and cluster status as bogus
    naIdx <- which(is.na(reg[, 1]))
  
    if (!identical(naIdx, integer(0))) {
       reg[naIdx, dim] <- refF[naIdx, dim]
       reg[naIdx, "bogus"] <- TRUE
    }
  
  return(reg)
}


###################################################
## check if the number of features for
## each sample is equal to that of reference
## if the feature is missing, replace with a bogus
###################################################
.checkFeatureNum <- function(combf, reference, nDim) {
    lapply(combf, function(x) {
        if (nrow(x) != nrow(reference)) {
           i <- nrow(x)
           x[i+1,] <- x[i,]
           x[i+1, 1:(nDim+1)] <-
               reference[!(reference$cluster %in% x$cluster), 1:(nDim+1)]
           x$bogus[i+1] = TRUE
           x <- x[sort(x$cluster), ]
        }
        x
    })
}


################################
## plot the results of workflow
################################

.plotWorkFlow <- function(cent, cent.clust, filt.clust, register, ask)
{
  par(ask=ask)
  on.exit(par(ask=FALSE))
  ## 1. cent
  fo <- as.formula(paste(names(cent)[1], "~", names(cent)[2]))
  mainTitle <- do.call(paste, as.list(unique(cent$channel)))
  mainTitle <- paste("Centroids of high-density areas yielded by
                     backgating on channel", mainTitle)
  print(lattice::xyplot(fo, group=channel,
                        data=cent,
                        auto.key=list(cex=0.8, title="channel", space="right"),
                        main=mainTitle,
                        sub="potential features"))
  
  col <- rainbow(length(unique(cent$sample)))
  key <- list(text=list(levels(factor(cent$sample))),
              points=list(col=col, pch=20, alpha=0.7),
              cex=0.7, title="sample")
  
  print(lattice::xyplot(fo, group=sample,
                        data=cent,
                        col=col, pch=20, alpha=0.7,
                        key=c(key, space="right"),
                        main=mainTitle,
                        sub="potential features"))
  
  ## 2. cent.translate$centered
  #print(lattice::xyplot(fo, group=sample,
  #                     data=cent.translate$centered,
  #                     auto.key=list(cex=0.8, title="sample", space="right"),
  #                     main="Centered figure: translate the center of
  #                           centroids to the origin"))
  
  ## 2. cent.clust
  subTitle <- paste("Clusters containing <",
                    format(attr(cent.clust, "height"), digit=3),
                    "members are considered outliers.")
  
  
  print(plot(attr(cent.clust, "cluster"), which.plot=2,
             xlab="Potential Features",
             main=paste("Cut the tree at height",
                         format(attr(cent.clust, "height"), digit=3))
             ))
  
  d <- density(attr(cent.clust, "cluster")$height)
  print(plot(d, main="Distribution of the cluster (diana) height"))
  abline(v=attr(cent.clust, "height"), col="grey")
  text(x= attr(cent.clust, "height"), y=mean(d$y)+2*sd(d$y),
       labels="cutting the\n tree here", pos=4)
  
  print(lattice::xyplot(fo, group=cluster,
                   data=cent.clust,
                   auto.key=list(cex=0.8,title="cluster", space="right"),
                   main="Clusters of potential features",
                   sub=subTitle))
  ## plot dendrogram of attr(cent.clust, "cluster")
  
  
  ## 3. filt.clust
  print(lattice::xyplot(fo, group=cluster,
                        data=filt.clust,
                        auto.key=list(cex=0.8, title="channel", space="right"),
                        sub=subTitle,
                        main="Clusters of features and outliers"))
                        
  ## 4. register -- final features and reference features
  fo <- as.formula(paste(names(cent)[1], "~", names(cent)[2],
                         " | sample"))
  print(lattice::xyplot(fo , group=cluster,
                        data=do.call(rbind, register),
                        auto.key=list(cex=0.8, title="label",space="right"),
                        main="Labeled features"))
  print(lattice::xyplot(fo, group=bogus,
                        data=do.call(rbind, register),
                        auto.key=list(cex=0.8, title="Bogus",space="right"),
                        main="Labeled features",
                        sub="Missing features are those with bogus=TURE."))
        
}

Any scripts or data that you put into this service are public.

flowStats documentation built on Nov. 8, 2020, 6:49 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

flowStats
Statistical methods for the analysis of flow cytometry data

R/idFeatures.R
In flowStats: Statistical methods for the analysis of flow cytometry data

Defines functions .plotWorkFlow .checkFeatureNum .classifyFeatures .useDivCluster .calcReference .filterOutlier .centerFigure idFeaturesByBackgating

Documented in idFeaturesByBackgating

Try the flowStats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

flowStats Statistical methods for the analysis of flow cytometry data

R/idFeatures.R In flowStats: Statistical methods for the analysis of flow cytometry data

Defines functions .plotWorkFlow .checkFeatureNum .classifyFeatures .useDivCluster .calcReference .filterOutlier .centerFigure idFeaturesByBackgating

Documented in idFeaturesByBackgating

Try the flowStats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

flowStats
Statistical methods for the analysis of flow cytometry data

R/idFeatures.R
In flowStats: Statistical methods for the analysis of flow cytometry data