R/utils.R

Defines functions MapQueryData.local PairwiseIntegrateReference.STACAS ParseRow ParseMergePair.local AddDatasetID.2 FindAnchors.wdist ScoreAnchors.Seurat FilterAnchors.Seurat TopDimFeatures.Seurat FindAnchorPairs.Seurat FindNN.Seurat Sweep `%||%` FindIntegrationAnchors.wdist AdjustSampleTree.Seurat CountAnchors.Seurat check.genes get.blocklist weighted.Anchors.STACAS threshold_from_percentile strength_function probabilistic_reject reweight_anchors inconsistent_anchors logistic

# Logistic transformation
logistic <- function(x,scale = NULL, center = NULL, invert = F){
  a <- ifelse(invert,1, -1)
  if(is.null(scale))  scale = sd(x)
  if(is.null(center)) center = mean(x)
  sigm <- 1/(1+ exp(a*(x-center)/scale))
  return(sigm)    
}

# Mark anchor as inconsistent based on cell-type labeling 
# Anchors will be consider as inconsistent if they are linking cells classified as different cell populations. 
inconsistent_anchors <- function(anchors, seed=123,
                                 cell.labels=NULL,
                                 label.confidence=1,
                                 quantile_ss=0){
  
  if (! cell.labels %in% colnames(anchors@object.list[[1]]@meta.data)) {
    stop(sprintf("Please specify a valid metadata column with label annotations (cell.labels). %s not found", cell.labels))
  }
  
  labels <- data.frame()
  for (i in seq_along(anchors@object.list)){
    x <- anchors@object.list[[i]]
    labels <- rbind(labels, data.frame(dataset = i, ncell = 1:ncol(x),
                                       label = as.character(x@meta.data[,cell.labels])))
  }
  
  labels[,"dataset_cell"] <- paste0(labels[,"dataset"], "_", labels[,"ncell"])
  
  #Convert NAs to 'unknown'
  nas <- is.na(labels[,"label"])
  labels[nas, "label"] <- "unknown"
  nas <- labels[, "label"] == "NaN"
  labels[nas, "label"] <- "unknown"
  
  #Exclude 'Multi' labels
  multis <- tolower(labels[, "label"]) == "multi"
  labels[multis, "label"] <- "unknown"
  
  df <- anchors@anchors
  df[,"dataset1_cell1"] <- paste0(df[,"dataset1"], "_", df[,"cell1"])
  df[,"dataset2_cell2"] <- paste0(df[,"dataset2"], "_", df[,"cell2"])
  
  order1 <- match(df$dataset1_cell1, labels$dataset_cell)
  df[,"Label_1"] <- labels[order1, "label"]
  
  order2 <- match(df$dataset2_cell2, labels$dataset_cell)
  df[,"Label_2"] <- labels[order2, "label"]
  
  #Flag consistency of labels
  df$Consistent <- df$Label_1 == df$Label_2 | df$Label_1 == "unknown" | df$Label_2 == "unknown"
  df$Consistent[is.na(df$Consistent)] <- TRUE
  
  #Rescue some anchors with a given probability

  df <- probabilistic_reject(anchors=df, accept_rate = 1-label.confidence,
                                    q = quantile_ss, seed = seed)
  anchors@anchors <- df
  return(anchors)
}

#Re-weight anchors based on combined seurat anchor score and rPCA distance
reweight_anchors <- function(ref.anchors, alpha=0.8,
                                 dist.pct = 0.5,
                                 dist.scale.factor = 2)
{
  df <- ref.anchors@anchors
  eps <- 10^(-6) #small number to avoid log(0)
  
  dist.mean.center = quantile(df$dist.mean,dist.pct)
  dist.mean.scale = sd(df$dist.mean,na.rm = T)/dist.scale.factor
  
  squash <- logistic(x = df$dist.mean, invert = T, 
                     center = dist.mean.center, 
                     scale = dist.mean.scale)
  
  #weighted geometric mean
  logsum = alpha*log(squash + eps) + (1-alpha)*log(df$knn.score + eps)
  df$score <- exp(logsum) - eps
  
  ref.anchors@anchors <- df
  return(ref.anchors)
}

# Probabilistic rejection of inconsistent anchors
# accept_rate: probability of rejecting anchors when labels are inconsistent
# q: anchor score quantile above which probabilistic rejection applies
probabilistic_reject <- function(anchors, accept_rate=0, q=0, seed=seed){
  
  set.seed(seed)
  
  e0 <- quantile(anchors$score, q)
  pos <- anchors[anchors$Consistent==FALSE, "score"] - e0
  bin <- as.numeric(pos > 0)
  
  #Randomly accept anchors with accept_rate probability above quantile=q
  accept <- bin*accept_rate > runif(length(bin), 0, 1)

  #Add this column
  flag <- anchors$Consistent
  flag[flag==FALSE] <- accept
  anchors['Retain_ss'] <- flag

  return(anchors)
}

# To be used in the distance matrix among datasets (when building SampleTree integration)
strength_function <- function(anchor.df,
                              method = "weight.sum", 
                              usecol = "score"
                              ){

  if(method== "counts") {
    strength <- nrow(anchor.df)
  }
  
  if(method== "mean.score") {
    if(usecol == "dist.mean")   strength <- mean(1-anchor.df[,usecol])
    if(usecol == "score")    strength <- mean(anchor.df[,usecol])
  }

  if(method== "weight.sum") {
    if(usecol == "dist.mean")  strength <- sum((1-anchor.df[,usecol]))
    if(usecol == "score")  strength <- sum((anchor.df[,usecol]))
  }
  return(strength)    
}

#aux function to determine anchor threshold from pairwise anchor score percentile
threshold_from_percentile <- function(anchors, dist.pct=0.8) {
  i=1
  min.pcntile <- vector()
  for (l1 in lev1) {
    j=1
    pcntile <- vector()
    for (l2 in lev2) {
      v <- subset(ref.anchors@anchors, subset=(dataset1==l1 & dataset2==l2))$dist.mean
      if(length(v)>0) {
        pcntile[j] <- sort(v)[dist.pct*length(v)]
        j=j+1
      }
    }
    min.pcntile[i] <- min(pcntile)
    i=i+1
  }
  dist.thr <- min(min.pcntile)
  return(dist.thr)
}

# Compute similarity matrix betwen datasets based on different criteria
weighted.Anchors.STACAS <- function(
  anchor.df,
  offsets,
  obj.lengths,
  usecol = "score",
  method = "weight.sum"
) {
  usecol = match.arg(arg = usecol ,choices = c("dist.mean","score"))
  method = match.arg(arg = method ,choices = c("weight.sum","counts"))
  
  similarity.matrix <- matrix(data = 0, ncol = length(x = offsets), nrow = length(x = offsets))
  similarity.matrix[upper.tri(x = similarity.matrix, diag = TRUE)] <- NA
  total.cells <- sum(obj.lengths)
  offsets <- c(offsets, total.cells)
  for (i in 1:nrow(x = similarity.matrix)){
    for (j in 1:ncol(x = similarity.matrix)){
      if (!is.na(x = similarity.matrix[i, j])){
        relevant.rows <- anchor.df[(anchor.df$dataset1 %in% c(i, j)) & (anchor.df$dataset2 %in% c(i, j)), ]
        score <- strength_function(relevant.rows, method = method, usecol = usecol)/2
        ncell <- min(obj.lengths[[i]], obj.lengths[[j]])
        similarity.matrix[i, j] <- score / ncell
      }
    }
  }
  return(similarity.matrix)
}

get.blocklist = function(obj) {
  data("genes.blocklist")
  
  mm <- intersect(unlist(genes.blocklist$Mm), rownames(obj))
  hs <- intersect(unlist(genes.blocklist$Hs), rownames(obj))
  
  if (length(mm) > length(hs)) {
    return(genes.blocklist$Mm)
  } else {
    return(genes.blocklist$Hs)
  }
}

check.genes <- function(obj.list) {
  allgenes <- rownames(obj.list[[1]])
  lgt <- length(obj.list) 
  for (i in 2:lgt) {
    allgenes <- c(allgenes, rownames(obj.list[[i]]))
  }
  frac <- table(allgenes)/lgt
  
  found.in.all <- names(frac)[frac == 1]
  frac.all <- length(found.in.all) / length(unique(allgenes))
  
  if (frac.all < 1) {
    message(sprintf("%.1f %% of genes found across all datasets", 100*frac.all))
  }  
  if (frac.all < 0.5) {
    warning("More than half of the genes were not consistently found across datasets. Please check that datasets use the same gene symbols")
  }
  
  return(found.in.all)
}

# Count anchors between data sets
# IMPORTED from Seurat 3.2.1
CountAnchors.Seurat <- function(
    anchor.df,
    offsets,
    obj.lengths
) {
  similarity.matrix <- matrix(data = 0, ncol = length(x = offsets), nrow = length(x = offsets))
  similarity.matrix[upper.tri(x = similarity.matrix, diag = TRUE)] <- NA
  total.cells <- sum(obj.lengths)
  offsets <- c(offsets, total.cells)
  for (i in 1:nrow(x = similarity.matrix)){
    for (j in 1:ncol(x = similarity.matrix)){
      if (!is.na(x = similarity.matrix[i, j])){
        relevant.rows <- anchor.df[(anchor.df$dataset1 %in% c(i, j)) & (anchor.df$dataset2 %in% c(i, j)), ]
        score <- nrow(x = relevant.rows)
        ncell <- min(obj.lengths[[i]], obj.lengths[[j]])
        similarity.matrix[i, j] <- score / ncell
      }
    }
  }
  return(similarity.matrix)
}

# Adjust sample tree to only include given reference objects
# IMPORTED from Seurat 3.2.1
AdjustSampleTree.Seurat <- function(x, reference.objects) {
  for (i in 1:nrow(x = x)) {
    obj.id <- -(x[i, ])
    if (obj.id[[1]] > 0) {
      x[i, 1] <- -(reference.objects[[obj.id[[1]]]])
    }
    if (obj.id[[2]] > 0) {
      x[i, 2] <- -(reference.objects[[obj.id[[2]]]])
    }
  }
  return(x)
}

###Modified function to return anchor distances together with anchor pairs - distances can be used to filter anchors at a later stage
FindIntegrationAnchors.wdist <- function(
    object.list = NULL,
    assay = NULL,
    reference = NULL,
    normalization.method = "LogNormalize",
    anchor.features = 2000,
    l2.norm = TRUE,
    dims = 1:10,
    k.anchor = 5,
    k.filter = 200,
    k.score = 30,
    max.features = 200,
    verbose = TRUE
) {
  
  reduction <- "pca"
  nn.method = "rann"
  eps = 0
  
  object.ncells <- sapply(X = object.list, FUN = function(x) dim(x = x)[2])
  if (any(object.ncells <= max(dims))) {
    bad.obs <- which(x = object.ncells <= max(dims))
    stop("Max dimension too large: objects ", paste(bad.obs, collapse = ", "),
         " contain fewer than ", max(dims), " cells. \n Please specify a",
         " maximum dimensions that is less than the number of cells in any ",
         "object (", min(object.ncells), ").")
  }
  if (!is.null(x = assay)) {
    if (length(x = assay) != length(x = object.list)) {
      stop("If specifying the assay, please specify one assay per object in the object.list")
    }
    object.list <- sapply(
      X = 1:length(x = object.list),
      FUN = function(x) {
        DefaultAssay(object = object.list[[x]]) <- assay[x]
        return(object.list[[x]])
      }
    )
  } else {
    assay <- sapply(X = object.list, FUN = DefaultAssay)
  }
  object.list <- Seurat:::CheckDuplicateCellNames(object.list)
  
  slot <- "data"
  
  if (is.numeric(anchor.features)) {
    if (verbose) {
      message("Computing ", anchor.features, " integration features")
    }
    anchor.features <- SelectIntegrationFeatures(
      object.list = object.list,
      nfeatures = anchor.features,
      assay = assay
    )
  }
  nn.reduction <- reduction
  # if using pca, only need to compute the internal neighborhood structure once
  # for each dataset
  internal.neighbors <- list()
  if (nn.reduction == "pca") {
    k.filter <- NA
    if (verbose) {
      message("Computing within dataset neighborhoods")
    }
    k.neighbor <- max(k.anchor, k.score)
    internal.neighbors <- pbapply::pblapply(
      X = 1:length(x = object.list),
      FUN = function(x) {
        Seurat:::NNHelper(
          data = Embeddings(object = object.list[[x]][[nn.reduction]])[, dims],
          k = k.neighbor + 1,
          method = nn.method,
          eps = eps
        )
      }
    )
  }
  
  # determine pairwise combinations
  combinations <- expand.grid(1:length(x = object.list), 1:length(x = object.list))
  combinations <- combinations[combinations$Var1 < combinations$Var2, , drop = FALSE]
  # determine the proper offsets for indexing anchors
  objects.ncell <- sapply(X = object.list, FUN = ncol)
  offsets <- as.vector(x = cumsum(x = c(0, objects.ncell)))[1:length(x = object.list)]
  if (is.null(x = reference)) {
    # All pairwise combinations
    if (verbose) {
      message("Finding all pairwise anchors")
    }
  } else {
    reference <- unique(sort(reference))
    if (max(reference) > length(object.list)) {
      stop('Error: requested reference object ', max(reference), " but only ",
           length(object.list), " objects provided")
    }
    # modify the combinations matrix to retain only R-R and R-Q comparisons
    if (verbose) {
      message("Finding anchors between all query and reference datasets")
      ok.rows <- (combinations$Var1 %in% reference) | (combinations$Var2 %in% reference)
      combinations <- combinations[ok.rows, ]
    }
  }
  # determine all anchors
  all.anchors <- pbapply::pblapply(
    X = 1:nrow(x = combinations),
    FUN = function(row) {
      i <- combinations[row, 1]
      j <- combinations[row, 2]
      suppressWarnings(
      object.1 <- DietSeurat(
        object = object.list[[i]],
        assays = assay[i],
        features = anchor.features,
        counts = FALSE,
        scale.data = TRUE,
        dimreducs = reduction
      ))
      suppressWarnings(
      object.2 <- DietSeurat(
        object = object.list[[j]],
        assays = assay[j],
        features = anchor.features,
        counts = FALSE,
        scale.data = TRUE,
        dimreducs = reduction
      ))
      # suppress key duplication warning
      suppressWarnings(object.1[["ToIntegrate"]] <- object.1[[assay[i]]])
      DefaultAssay(object = object.1) <- "ToIntegrate"
      if (reduction %in% Reductions(object = object.1)) {
        slot(object = object.1[[reduction]], name = "assay.used") <- "ToIntegrate"
      }
      object.1 <- DietSeurat(object = object.1, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction)
      suppressWarnings(object.2[["ToIntegrate"]] <- object.2[[assay[j]]])
      DefaultAssay(object = object.2) <- "ToIntegrate"
      if (reduction %in% Reductions(object = object.2)) {
        slot(object = object.2[[reduction]], name = "assay.used") <- "ToIntegrate"
      }
      object.2 <- DietSeurat(object = object.2, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction)

      common.features <- intersect(
        x = rownames(x = Loadings(object = object.1[["pca"]])),
        y = rownames(x = Loadings(object = object.2[["pca"]]))
      )
      object.pair <- merge(x = object.1, y = object.2, merge.data = TRUE)
      
      projected.embeddings.1<- t(x = GetAssayData(object = object.1, slot = "scale.data")[common.features, ]) %*%
        Loadings(object = object.2[["pca"]])[common.features, ]
      
      object.pair[['projectedpca.1']] <- CreateDimReducObject(
        embeddings = rbind(projected.embeddings.1, Embeddings(object = object.2[["pca"]])),
        assay = DefaultAssay(object = object.1),
        key = "projectedpca1_"
      )
      projected.embeddings.2 <- t(x = GetAssayData(object = object.2, slot = "scale.data")[common.features, ]) %*%
        Loadings(object = object.1[["pca"]])[common.features, ]
      object.pair[['projectedpca.2']] <- CreateDimReducObject(
        embeddings = rbind(projected.embeddings.2, Embeddings(object = object.1[["pca"]])),
        assay = DefaultAssay(object = object.2),
        key = "projectedpca2_"
      )
      object.pair[["pca"]] <- CreateDimReducObject(
        embeddings = rbind(
          Embeddings(object = object.1[["pca"]]),
          Embeddings(object = object.2[["pca"]])),
        assay = DefaultAssay(object = object.1),
        key = "pca_"
      )
      
      slot(object = object.pair[["projectedpca.1"]], name = "cell.embeddings") <- Sweep(
        x = Embeddings(object = object.pair[["projectedpca.1"]]),
        MARGIN = 2,
        STATS = apply(X = Embeddings(object = object.pair[["projectedpca.1"]]), MARGIN = 2, FUN = sd),
        FUN = "/"
      )
      slot(object = object.pair[["projectedpca.2"]], name = "cell.embeddings") <- Sweep(
        x = Embeddings(object = object.pair[["projectedpca.2"]]),
        MARGIN = 2,
        STATS = apply(X = Embeddings(object = object.pair[["projectedpca.2"]]), MARGIN = 2, FUN = sd),
        FUN = "/"
      )
      object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.1")
      object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.2")
      
      reduction <- "projectedpca.1.l2"
      reduction.2 <- "projectedpca.2.l2"

      internal.neighbors <- internal.neighbors[c(i, j)]
      
      anchors <- FindAnchors.wdist(
        object.pair = object.pair,
        assay = c("ToIntegrate", "ToIntegrate"),
        slot = slot,
        cells1 = colnames(x = object.1),
        cells2 = colnames(x = object.2),
        internal.neighbors = internal.neighbors,
        reduction = reduction,
        reduction.2 = reduction.2,
        nn.reduction = nn.reduction,
        dims = dims,
        k.anchor = k.anchor,
        k.filter = k.filter,
        k.score = k.score,
        max.features = max.features,
        nn.method = nn.method,
        eps = eps,
        verbose = verbose
      )
      anchors[, 1] <- anchors[, 1] + offsets[i]
      anchors[, 2] <- anchors[, 2] + offsets[j]
      
      return(anchors)
    }
  )
  
  all.anchors <- do.call(what = 'rbind', args = all.anchors)
  
  all.anchors <- rbind(all.anchors, all.anchors[, c(2, 1, 3, 5, 4)])  ##keep distance information
  
  all.anchors <- AddDatasetID.2(anchor.df = all.anchors, offsets = offsets, obj.lengths = objects.ncell)  ##add dataset IDs
  command <- LogSeuratCommand(object = object.list[[1]], return.command = TRUE)
  anchor.set <- new(Class = "IntegrationAnchorSet",
                    object.list = object.list,
                    reference.objects = reference %||% seq_along(object.list),
                    anchors = all.anchors,
                    offsets = offsets,
                    anchor.features = anchor.features,
                    command = command
  )
  return(anchor.set)
}

#First (otherwise second) function
`%||%` <- function(lhs, rhs) {
  if (!is.null(x = lhs)) {
    return(lhs)
  } else {
    return(rhs)
  }
}

##Modified Sweep function (from Seurat 3.1)
Sweep <- function(x, MARGIN, STATS, FUN = '-', check.margin = TRUE, ...) {
  if (any(grepl(pattern = 'X', x = names(x = formals(fun = sweep))))) {
    return(sweep(
      X = x,
      MARGIN = MARGIN,
      STATS = STATS,
      FUN = FUN,
      check.margin = check.margin,
      ...
    ))
  } else {
    return(sweep(
      x = x,
      MARGIN = MARGIN,
      STATS = STATS,
      FUN = FUN,
      check.margin = check.margin,
      ...
    ))
  }
}

# Find nearest neighbors
# IMPORTED from Seurat 3.2.1
FindNN.Seurat <- function(
    object,
    cells1 = NULL,
    cells2 = NULL,
    internal.neighbors,
    grouping.var = NULL,
    dims = 1:10,
    reduction = "cca.l2",
    reduction.2 = character(),
    nn.dims = dims,
    nn.reduction = reduction,
    k = 300,
    nn.method = "rann",
    eps = 0,
    integration.name = 'integrated',
    verbose = TRUE
) {
  if (xor(x = is.null(x = cells1), y = is.null(x = cells2))) {
    stop("cells1 and cells2 must both be specified")
  }
  if (!is.null(x = cells1) && !is.null(x = cells2) && !is.null(x = grouping.var)) {
    stop("Specify EITHER grouping.var or cells1/2.")
  }
  if (is.null(x = cells1) && is.null(x = cells2) && is.null(x = grouping.var)) {
    stop("Please set either cells1/2 or grouping.var")
  }
  if (!is.null(x = grouping.var)) {
    if (nrow(x = unique(x = object[[grouping.var]])) != 2) {
      stop("Number of groups in grouping.var not equal to 2.")
    }
    groups <- names(x = sort(x = table(object[[grouping.var]]), decreasing = TRUE))
    cells1 <- colnames(x = object)[object[[grouping.var]] == groups[[1]]]
    cells2 <- colnames(x = object)[object[[grouping.var]] == groups[[2]]]
  }
  if (verbose) {
    message("Finding neighborhoods")
  }
  if (!is.null(x = internal.neighbors[[1]])) {
    nnaa <- internal.neighbors[[1]]
    nnbb <- internal.neighbors[[2]]
  } else {
    dim.data.self <- Embeddings(object = object[[nn.reduction]])[ ,nn.dims]
    dims.cells1.self <- dim.data.self[cells1, ]
    dims.cells2.self <- dim.data.self[cells2, ]
    nnaa <- Seurat:::NNHelper(
      data = dims.cells1.self,
      k = k + 1,
      method = nn.method,
      eps = eps
    )
    nnbb <- Seurat:::NNHelper(
      data = dims.cells2.self,
      k = k + 1,
      method = nn.method,
      eps = eps
    )
  }
  if (length(x = reduction.2) > 0) {
    nnab <- Seurat:::NNHelper(
      data = Embeddings(object = object[[reduction.2]])[cells2, ],
      query = Embeddings(object = object[[reduction.2]])[cells1, ],
      k = k,
      method = nn.method,
      eps = eps
    )
    nnba <- Seurat:::NNHelper(
      data = Embeddings(object = object[[reduction]])[cells1, ],
      query = Embeddings(object = object[[reduction]])[cells2, ],
      k = k,
      method = nn.method,
      eps = eps
    )
  } else {
    dim.data.opposite <- Embeddings(object = object[[reduction]])[ ,dims]
    dims.cells1.opposite <- dim.data.opposite[cells1, ]
    dims.cells2.opposite <- dim.data.opposite[cells2, ]
    nnab <- Seurat:::NNHelper(
      data = dims.cells2.opposite,
      query = dims.cells1.opposite,
      k = k,
      method = nn.method,
      eps = eps
    )
    nnba <- Seurat:::NNHelper(
      data = dims.cells1.opposite,
      query = dims.cells2.opposite,
      k = k,
      method = nn.method,
      eps = eps
    )
  }
  
  object <- SetIntegrationData(
    object = object,
    integration.name = integration.name,
    slot = 'neighbors',
    new.data = list('nnaa' = nnaa, 'nnab' = nnab, 'nnba' = nnba, 'nnbb' = nnbb, 'cells1' = cells1, 'cells2' = cells2)
  )
  return(object)
}


# Find Anchor pairs
# IMPORTED from Seurat 3.2.1
FindAnchorPairs.Seurat <- function(
    object,
    integration.name = 'integrated',
    k.anchor = 5,
    verbose = TRUE
) {
  neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors')
  max.nn <- c(ncol(x = neighbors$nnab), ncol(x = neighbors$nnba))
  if (any(k.anchor > max.nn)) {
    message(paste0('warning: requested k.anchor = ', k.anchor, ', only ', min(max.nn), ' in dataset'))
    k.anchor <- min(max.nn)
  }
  if (verbose) {
    message("Finding anchors")
  }
  # convert cell name to neighbor index
  nn.cells1 <- neighbors$cells1
  nn.cells2 <- neighbors$cells2
  cell1.index <-  suppressWarnings(which(colnames(x = object) == nn.cells1, arr.ind = TRUE))
  ncell <- 1:nrow(x = neighbors$nnab)
  ncell <- ncell[ncell %in% cell1.index]
  anchors <- list()
  # pre allocate vector
  anchors$cell1 <- rep(x = 0, length(x = ncell) * 5)
  anchors$cell2 <- anchors$cell1
  anchors$score <- anchors$cell1 + 1
  idx <- 0
  indices.ab <- Indices(object = neighbors$nnab)
  indices.ba <- Indices(object = neighbors$nnba)
  for (cell in ncell) {
    neighbors.ab <- indices.ab[cell, 1:k.anchor]
    mutual.neighbors <- which(
      x = indices.ba[neighbors.ab, 1:k.anchor, drop = FALSE] == cell,
      arr.ind = TRUE
    )[, 1]
    for (i in neighbors.ab[mutual.neighbors]){
      idx <- idx + 1
      anchors$cell1[idx] <- cell
      anchors$cell2[idx] <- i
      anchors$score[idx] <- 1
    }
  }
  anchors$cell1 <- anchors$cell1[1:idx]
  anchors$cell2 <- anchors$cell2[1:idx]
  anchors$score <- anchors$score[1:idx]
  anchors <- t(x = do.call(what = rbind, args = anchors))
  anchors <- as.matrix(x = anchors)
  object <- SetIntegrationData(
    object = object,
    integration.name = integration.name,
    slot = 'anchors',
    new.data = anchors
  )
  if (verbose) {
    message(paste0("\tFound ", nrow(x = anchors), " anchors"))
  }
  return(object)
}

# Top dim features
# IMPORTED from Seurat 3.2.1
TopDimFeatures.Seurat <- function(
    object,
    reduction,
    dims = 1:10,
    features.per.dim = 100,
    max.features = 200,
    projected = FALSE
) {
  dim.reduction <- object[[reduction]]
  max.features <- max(length(x = dims) * 2, max.features)
  num.features <- sapply(X = 1:features.per.dim, FUN = function(y) {
    length(x = unique(x = as.vector(x = sapply(X = dims, FUN = function(x) {
      unlist(x = TopFeatures(object = dim.reduction, dim = x, nfeatures = y, balanced = TRUE, projected = projected))
    }))))
  })
  max.per.pc <- which.max(x = num.features[num.features < max.features])
  features <- unique(x = as.vector(x = sapply(X = dims, FUN = function(x) {
    unlist(x = TopFeatures(object = dim.reduction, dim = x, nfeatures = max.per.pc, balanced = TRUE, projected = projected))
  })))
  features <- unique(x = features)
  return(features)
}

# Filter anchors
# IMPORTED from Seurat 3.2.1
FilterAnchors.Seurat <- function(
    object,
    assay = NULL,
    slot = "data",
    integration.name = 'integrated',
    features = NULL,
    k.filter = 200,
    nn.method = "rann",
    eps = 0,
    verbose = TRUE
) {
  if (verbose) {
    message("Filtering anchors")
  }
  assay <- assay %||% DefaultAssay(object = object)
  features <- features %||% VariableFeatures(object = object)
  if (length(x = features) == 0) {
    stop("No features provided and no VariableFeatures computed.")
  }
  features <- unique(x = features)
  neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors')
  nn.cells1 <- neighbors$cells1
  nn.cells2 <- neighbors$cells2
  cn.data1 <- L2Norm(
    mat = as.matrix(x = t(x = GetAssayData(
      object = object[[assay[1]]],
      slot = slot)[features, nn.cells1])),
    MARGIN = 1)
  cn.data2 <- L2Norm(
    mat = as.matrix(x = t(x = GetAssayData(
      object = object[[assay[2]]],
      slot = slot)[features, nn.cells2])),
    MARGIN = 1)
  nn <- NNHelper(
    data = cn.data2[nn.cells2, ],
    query = cn.data1[nn.cells1, ],
    k = k.filter,
    method = nn.method,
    eps = eps
  )
  
  anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors")
  position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) {
    which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1]
  })
  anchors <- anchors[!is.na(x = position), ]
  if (verbose) {
    message("\tRetained ", nrow(x = anchors), " anchors")
  }
  object <- SetIntegrationData(
    object = object,
    integration.name = integration.name,
    slot = "anchors",
    new.data = anchors
  )
  return(object)
}

# Score Anchors
# IMPORTED from Seurat 3.2.1
ScoreAnchors.Seurat <- function(
    object,
    assay = NULL,
    integration.name = 'integrated',
    verbose = TRUE,
    k.score = 30,
    do.cpp = TRUE
) {
  assay <- assay %||% DefaultAssay(object = object)
  anchor.df <- as.data.frame(x = GetIntegrationData(object = object, integration.name = integration.name, slot = 'anchors'))
  neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "neighbors")
  offset <- length(x = neighbors$cells1)
  indices.aa <- Indices(object = neighbors$nnaa)
  indices.bb <- Indices(object = neighbors$nnbb)
  indices.ab <- Indices(object = neighbors$nnab)
  indices.ba <- Indices(object = neighbors$nnba)
  nbrsetA <- function(x) c(indices.aa[x, 1:k.score], indices.ab[x, 1:k.score] + offset)
  nbrsetB <- function(x) c(indices.ba[x, 1:k.score], indices.bb[x, 1:k.score] + offset)
  # score = number of shared neighbors
  anchor.new <- data.frame(
    'cell1' = anchor.df[, 1],
    'cell2' = anchor.df[, 2],
    'score' = mapply(
      FUN = function(x, y) {
        length(x = intersect(x = nbrsetA(x = x), nbrsetB(x = y)))},
      anchor.df[, 1],
      anchor.df[, 2]
    )
  )
  # normalize the score
  max.score <- quantile(anchor.new$score, 0.9)
  min.score <- quantile(anchor.new$score, 0.01)
  anchor.new$score <- anchor.new$score - min.score
  anchor.new$score <- anchor.new$score / (max.score - min.score)
  anchor.new$score[anchor.new$score > 1] <-  1
  anchor.new$score[anchor.new$score < 0] <- 0
  anchor.new <- as.matrix(x = anchor.new)
  object <- SetIntegrationData(
    object = object,
    integration.name = integration.name,
    slot = 'anchors',
    new.data = anchor.new
  )
  return(object)
}

###Find Anchors functions, modified from Seurat 3.1 to return the distance in PC space between pairs of datasets
FindAnchors.wdist <- function(
    object.pair,
    assay,
    slot,
    cells1,
    cells2,
    internal.neighbors,
    reduction,
    reduction.2 = character(),
    nn.reduction = reduction,
    dims = 1:10,
    k.anchor = 5,
    k.filter = 200,
    k.score = 30,
    max.features = 200,
    nn.method = "rann",
    eps = 0,
    projected = FALSE,
    verbose = TRUE
) {
  # compute local neighborhoods, use max of k.anchor and k.score if also scoring to avoid
  # recomputing neighborhoods
  k.neighbor <- k.anchor
  if (!is.na(x = k.score)) {
    k.neighbor <- max(k.anchor, k.score)
  }
  object.pair <- FindNN.Seurat(
    object = object.pair,
    cells1 = cells1,
    cells2 = cells2,
    internal.neighbors = internal.neighbors,
    dims = dims,
    reduction = reduction,
    reduction.2 = reduction.2,
    nn.reduction = nn.reduction,
    k = k.neighbor,
    nn.method = nn.method,
    eps = eps,
    verbose = verbose
  )
  
  object.pair <- FindAnchorPairs.Seurat(
    object = object.pair,
    integration.name = "integrated",
    k.anchor = k.anchor,
    verbose = verbose
  )
  
  if (!is.na(x = k.filter)) {
    top.features <- TopDimFeatures.Seurat(
      object = object.pair,
      reduction = reduction,
      dims = dims,
      features.per.dim = 100,
      max.features = max.features,
      projected = projected
    )
    object.pair <- FilterAnchors.Seurat(
      object = object.pair,
      assay = assay,
      slot = slot,
      integration.name = 'integrated',
      features = top.features,
      k.filter = k.filter,
      nn.method = nn.method,
      eps = eps,
      verbose = verbose
    )
  }
  if (!is.na(x = k.score)) {
    object.pair = ScoreAnchors.Seurat(
      object = object.pair,
      assay = DefaultAssay(object = object.pair),
      integration.name = "integrated",
      verbose = verbose,
      k.score = k.score
    )
  }
  
  ###Return distances
  anc.tab <- object.pair@tools$integrated@anchors
  d1.2 <- numeric(length = dim(anc.tab)[1])
  d2.1 <- numeric(length = dim(anc.tab)[1])
  for (r in 1:dim(anc.tab)[1]) {
    c1 <- anc.tab[r,"cell1"]
    c2 <- anc.tab[r,"cell2"]
    d1.2[r] <- object.pair@tools$integrated@neighbors$nnab@nn.dist[c1, which(object.pair@tools$integrated@neighbors$nnab@nn.idx[c1,] == c2 )]
    d2.1[r] <- object.pair@tools$integrated@neighbors$nnba@nn.dist[c2, which(object.pair@tools$integrated@neighbors$nnba@nn.idx[c2,] == c1 )]
  }
  
  object.pair@tools$integrated@anchors <- cbind(object.pair@tools$integrated@anchors, dist1.2=d1.2)
  object.pair@tools$integrated@anchors <- cbind(object.pair@tools$integrated@anchors, dist2.1=d2.1)
  
  anchors <- GetIntegrationData(
    object = object.pair,
    integration.name = 'integrated',
    slot = 'anchors'
  )
  
  return(anchors)
}

#Modified from Seurat 4.1.1
AddDatasetID.2 <- function(
    anchor.df,
    offsets,
    obj.lengths
) {
  ndataset <- length(x = offsets)
  total.cells <- sum(obj.lengths)
  offsets <- c(offsets, total.cells)
  row.offset <- rep.int(x = offsets[1:ndataset], times = obj.lengths)
  dataset <- rep.int(x = 1:ndataset, times = obj.lengths)
  
  anchor.df <- data.frame(
    'cell1' = anchor.df[, 1] - row.offset[anchor.df[, 1]],
    'cell2' = anchor.df[, 2] - row.offset[anchor.df[, 2]],
    'score' = anchor.df[, 3],
    'dataset1' = dataset[anchor.df[, 1]],
    'dataset2' = dataset[anchor.df[, 2]],
    'dist1.2' = anchor.df[, 4],
    'dist2.1' = anchor.df[, 5]
  )
  return(anchor.df)
}

#Modified from Seurat 4.1.1
ParseMergePair.local <- function(clustering, i){
  # return 2-element list of datasets in first and second object
  datasets <- list('object1' = clustering[i, 1], 'object2' = clustering[i, 2])
  if (datasets$object1 > 0) {
    datasets$object1 <- ParseRow(clustering, datasets$object1)
  }
  if (datasets$object2 > 0) {
    datasets$object2 <- ParseRow(clustering, datasets$object2)
  }
  datasets$object1 <- abs(x = datasets$object1)
  datasets$object2 <- abs(x = datasets$object2)
  return(datasets)
}

#Modified from Seurat 4.1.1
ParseRow <- function(clustering, i){
  # returns vector of datasets
  datasets <- as.list(x = clustering[i, ])
  if (datasets[[1]] > 0) {
    datasets[[1]] <- ParseRow(clustering = clustering, i = datasets[[1]])
  }
  if (datasets[[2]] > 0) {
    datasets[[2]] <- ParseRow(clustering = clustering, i = datasets[[2]])
  }
  return(unlist(datasets))
}

#Modified from Seurat 4.1.1
PairwiseIntegrateReference.STACAS <- function(
    anchorset,
    new.assay.name = "integrated",
    features = NULL,
    features.to.integrate = NULL,
    normalization.method = "LogNormalize",
    dims = 1:30,
    k.weight = 100,
    weight.reduction = NULL,
    sd.weight = 1,
    sample.tree = NULL,
    preserve.order = TRUE,
    verbose = TRUE
) {
  
  nobj <- length(anchorset@object.list)
  reference.objects <- slot(object = anchorset, name = "reference.objects")
  features <- features %||% slot(object = anchorset, name = "anchor.features")
  features.to.integrate <- features.to.integrate %||% features
  
  if (length(reference.objects) == 1) {
    ref.obj <- anchorset@object.list[[reference.objects]]
    ref.obj[[new.assay.name]] <- CreateAssayObject(
      data = GetAssayData(ref.obj, slot = 'data')[features.to.integrate, ]
    )
    DefaultAssay(object = ref.obj) <- new.assay.name
    return(ref.obj)
  }
  anchors <- slot(object = anchorset, name = "anchors")
  offsets <- slot(object = anchorset, name = "offsets")
  objects.ncell <- sapply(anchorset@object.list, FUN = ncol)
  if (!is.null(weight.reduction)) {
    if (length(weight.reduction) == 1 | inherits(weight.reduction, what = "DimReduc")) {
      if (nobj == 2) {
        weight.reduction <- list(NULL, weight.reduction)
      } else if (inherits(weight.reduction, what = "character")) {
        weight.reduction <- as.list(rep(weight.reduction, times = nobj))
      } else {
        stop("Invalid input for weight.reduction. Please specify either the names of the dimension",
             "reduction for each object in the list or provide DimReduc objects.")
      }
    }
    if (length(weight.reduction) != nobj) {
      stop("Please specify a dimension reduction for each object, or one dimension reduction to be used for all objects")
    }
    if (inherits(weight.reduction, what = "character")) {
      weight.reduction <- as.list(weight.reduction)
    }
    available.reductions <- lapply(X = anchorset@object.list,
                                   FUN = FilterObjects, classes.keep = 'DimReduc')
    for (ii in 1:length(weight.reduction)) {
      if (ii == 1 & is.null(weight.reduction[[ii]])) next
      if (!inherits(weight.reduction[[ii]], what = "DimReduc")) {
        if (!weight.reduction[[ii]] %in% available.reductions[[ii]]) {
          stop("Requested dimension reduction (", weight.reduction[[ii]], ") is not present in object ", ii)
        }
        weight.reduction[[ii]] <- anchorset@object.list[[ii]][[weight.reduction[[ii]]]]
      }
    }
  }
  if (is.null(sample.tree)) {
    sample.tree <- SampleTree.STACAS(
      anchorset = anchorset,
      plot = FALSE
    )
  }
  cellnames.list <- list()
  for (ii in 1:nobj) {
    cellnames.list[[ii]] <- colnames(anchorset@object.list[[ii]])
  }
  
  unintegrated <- suppressWarnings(expr = merge(
    x = anchorset@object.list[[reference.objects[[1]]]],
    y = anchorset@object.list[reference.objects[2:length(reference.objects)]]
  ))
  names(anchorset@object.list) <- as.character(-(1:nobj))
  if (!is.null(weight.reduction)) {
    names(weight.reduction) <- names(anchorset@object.list)
  }
  if (verbose & (length(reference.objects) != nobj)) {
    message("Building integrated reference")
  }
  for (ii in 1:nrow(sample.tree)) {
    merge.pair <- as.character(sample.tree[ii, ])
    length1 <- ncol(anchorset@object.list[[merge.pair[1]]])
    length2 <- ncol(anchorset@object.list[[merge.pair[2]]])
    if (!(preserve.order) & (length2 > length1)) {
      merge.pair <- rev(merge.pair)
      sample.tree[ii, ] <- as.numeric(merge.pair)
    }
    if (!is.null(weight.reduction)) {
      # extract the correct dimreduc objects, in the correct order
      weight.pair <- weight.reduction[merge.pair]
    } else {
      weight.pair <- NULL
    }
    object.1 <- DietSeurat(
      object = anchorset@object.list[[merge.pair[1]]],
      assays = DefaultAssay(anchorset@object.list[[merge.pair[1]]]),
      counts = FALSE
    )
    object.2 <- DietSeurat(
      object = anchorset@object.list[[merge.pair[2]]],
      assays = DefaultAssay(anchorset@object.list[[merge.pair[2]]]),
      counts = FALSE
    )
    # suppress key duplication warning
    suppressWarnings(object.1[["ToIntegrate"]] <- object.1[[DefaultAssay(object = object.1)]])
    DefaultAssay(object = object.1) <- "ToIntegrate"
    object.1 <- DietSeurat(object = object.1, assays = "ToIntegrate")
    suppressWarnings(object.2[["ToIntegrate"]] <- object.2[[DefaultAssay(object = object.2)]])
    DefaultAssay(object = object.2) <- "ToIntegrate"
    object.2 <- DietSeurat(object = object.2, assays = "ToIntegrate")
    datasets <- ParseMergePair.local(sample.tree, ii)
    if (verbose) {
      message(
        "Merging dataset ",
        paste(datasets$object2, collapse = " "),
        " into ",
        paste(datasets$object1, collapse = " ")
      )
    }
    merged.obj <- merge(x = object.1, y = object.2, merge.data = TRUE)
    if (verbose) {
      message("Extracting anchors for merged samples")
    }
    filtered.anchors <- anchors[anchors$dataset1 %in% datasets$object1 & anchors$dataset2 %in% datasets$object2, ]
    if (nrow(filtered.anchors) == 0) {
      stop(sprintf("Zero anchors were found between datasets %s and %s",
           paste0(datasets$object1, collapse = ","),
           paste0(datasets$object2, collapse = ",")))
    }
    
    #k.weight == "max" disables local rescaling of anchor weights
    
    nanch.d2 <- length(unique(filtered.anchors$cell2))
    if (k.weight == "max" | k.weight > nanch.d2) {
      k.use <- nanch.d2
    } else {
      k.use <- k.weight
    }
    
    integrated.matrix <- Seurat:::RunIntegration(
      filtered.anchors = filtered.anchors,
      normalization.method = normalization.method,
      reference = object.1,
      query = object.2,
      cellnames.list = cellnames.list,
      new.assay.name = new.assay.name,
      features.to.integrate = features.to.integrate,
      features = features,
      dims = dims,
      weight.reduction = weight.reduction,
      k.weight = k.use,
      sd.weight = sd.weight,
      eps=0,
      verbose = verbose
    )
    integrated.matrix <- cbind(integrated.matrix, GetAssayData(object = object.1, slot = 'data')[features.to.integrate, ])

    merged.obj[[new.assay.name]] <- CreateAssayObject(data = integrated.matrix)
    DefaultAssay(object = merged.obj) <- new.assay.name
    anchorset@object.list[[as.character(x = ii)]] <- merged.obj
    anchorset@object.list[[merge.pair[[1]]]] <- NULL
    anchorset@object.list[[merge.pair[[2]]]] <- NULL
    invisible(x = CheckGC())
  }
  integrated.data <- GetAssayData(
    object = anchorset@object.list[[as.character(ii)]],
    assay = new.assay.name,
    slot = 'data'
  )
  integrated.data <- integrated.data[, colnames(x = unintegrated)]
 
  unintegrated[[new.assay.name]] <- CreateAssayObject(data = integrated.data,
                                    scale.data = matrix(),
                                    var.features = vector(),
                                    meta.features = data.frame(row.names = rownames(x = integrated.data)),
                                    misc = NULL)
  
  # "unintegrated" now contains the integrated assay
  DefaultAssay(unintegrated) <- new.assay.name
  VariableFeatures(unintegrated) <- features
  
  unintegrated <- SetIntegrationData(
    object = unintegrated,
    integration.name = "Integration",
    slot = "anchors",
    new.data = anchors
  )
  unintegrated <- SetIntegrationData(
    object = unintegrated,
    integration.name = "Integration",
    slot = "sample.tree",
    new.data = sample.tree
  )
  unintegrated[["FindIntegrationAnchors"]] <- slot(object = anchorset, name = "command")
  suppressWarnings(expr = unintegrated <- LogSeuratCommand(object = unintegrated))
  return(unintegrated)
}

#Modified from Seurat 4.1.1
MapQueryData.local <- function(
    anchorset,
    reference,
    new.assay.name = "integrated",
    normalization.method = "LogNormalize",
    features = NULL,
    features.to.integrate = NULL,
    dims = 1:30,
    k.weight = 100,
    weight.reduction = NULL,
    weights.matrix = NULL,
    no.offset = FALSE,
    sd.weight = 1,
    preserve.order = FALSE,
    eps = 0,
    verbose = TRUE
) {
  normalization.method <- match.arg(arg = normalization.method)
  reference.datasets <- slot(object = anchorset, name = 'reference.objects')
  nobj <- length(anchorset@object.list)
  anchors <- slot(object = anchorset, name = 'anchors')
  features <- features %||% slot(object = anchorset, name = "anchor.features")
  features.to.integrate <- features.to.integrate %||% features
  cellnames.list <- list()
  for (ii in 1:nobj) {
    cellnames.list[[ii]] <- colnames(anchorset@object.list[[ii]])
  }
  if (length(reference.datasets) == nobj) {
    query.datasets <- NULL
  } else {
    query.datasets <- setdiff(seq(from=1, to=nobj), reference.datasets)
  }
  query.corrected <- pbapply::pblapply(
    X = query.datasets,
    FUN = function(this.dataset) {
      if (verbose) {
        message("\nIntegrating dataset ", this.dataset, " with reference dataset")
      }
      filtered.anchors <- anchors[anchors$dataset1 %in% reference.datasets & anchors$dataset2 == this.dataset, ]
      if (nrow(filtered.anchors) == 0) {
        return(NULL)
      }
      integrated <- Seurat:::RunIntegration(
        filtered.anchors = filtered.anchors,
        reference = reference,
        query = anchorset@object.list[[this.dataset]],
        new.assay.name = new.assay.name,
        normalization.method = normalization.method,
        cellnames.list = cellnames.list,
        features.to.integrate = features.to.integrate,
        weight.reduction = weight.reduction,
        weights.matrix = weights.matrix,
        no.offset = no.offset,
        features = features,
        dims = dims,
        k.weight = k.weight,
        sd.weight = sd.weight,
        eps = eps,
        verbose = verbose
      )
      return(integrated)
    }
  )
  reference.integrated <- GetAssayData(
    object = reference,
    slot = 'data'
  )[features.to.integrate, ]
  query.corrected[[length(query.corrected) + 1]] <- reference.integrated
  all.integrated <- do.call(cbind, query.corrected)
  return(all.integrated)
}
carmonalab/STACAS documentation built on Feb. 3, 2024, 7:42 a.m.