R/assigntenx.R

Defines functions assigntenx

Documented in assigntenx

#' Define 10x clonotypes
#'
#' Takes a list of paired chains and defines clonotypes.
#'
#' @param list.pairs List. List of cells generated by tenx.
#' @param clonality_input Named vector. Changes the parameters of clonality function.
#' @param cell Character. Possible values: `B` Bcells, `T` Tcells, `Tgd` Tcells GamaDelta.
#' @export
#'
#'
assigntenx <- function(list.pairs = list.pairs,
                       method = method,
                       clonality_input = clonality_input,
                       cell = cell,
                       col_res = col_res,
                       save_files = save_files,
                       add_columns = add_columns){


  #Define classes to be used

  if(method == "unique_all"){
    classes <- names(list.pairs)
    classes <- classes[!grepl("None", classes)]
  } else{
    classes.list    <- switch(method, unique_paired = cell.classes$unique_paired,
                              sticky_ends = cell.classes$sticky_ends)

    classes         <- switch(cell, "B"   = classes.list$  BCell.classes,
                                    "T"   = classes.list$TabCell.classes,
                                    "Tgd" = classes.list$TgdCell.classes)
  }

  cdr3.match <- switch(cell, "B" = "cdr3_IG", "T" = "cdr3_TR", "Tgd" = "cdr3_TR")

  # Filter the classes to be used
  list.pairs_filt <- list.pairs[names(list.pairs) %in% classes]

  # Concatenate each element of the list into a list of lists
  # Transform it into a data frame with 1 cell per row
  res <- do.call(c, list.pairs_filt)
  res <- lapply(res, FUN = row1, add_columns)
  res <- bind_rows(res, .id = "classes")

  #Extract chain
  res$classes <- gsub("\\..*", "", res$classes)

  #Coalesces raw_clonotype and barcode columns for each chain/cell
  res <- res %>% mutate_at(vars(matches("raw")), as.character) %>% mutate(sc.raw_clonotypes = coalesce(!!!select(., matches("raw"))))
  res <- res %>% mutate_at(vars(matches("bar")), as.character) %>% mutate(sc.barcodes = coalesce(!!!select(., matches("bar"))))

  #Removes individual.chain columns
  res <- res %>% select(-contains("raw_clonotype_id"))
  res <- res %>% select(-contains("barcode_"))

  #Function to remove columns with all rows = NA
  all_na <- function(x) any(!is.na(x))

  classes.keep <- names(table(res$classes))[table(res$classes) > 1]
  res <- res %>% filter(classes %in% classes.keep)

  #Apply this procedures to each class separately
  for (i in unique(res$classes)) {

    res.sub <- res %>% filter(classes == i)
    barcodes <- res.sub$sc.barcodes
    raw_clonotypes <- res.sub$sc.raw_clonotypes

    #Order columns alphabetically and remove all NA rows
    res.sub <- res.sub[,order(colnames(res.sub))]
    res.sub <- res.sub %>% select_if(all_na)

    #Concatenate all V-genes
    v_gene <- res.sub %>% ungroup() %>% select(starts_with("v_gene")) %>% tidyr::unite("v_gene", sep = "_") %>% pull(v_gene)

    #All V-genes separated
    v_genes_unique <- res.sub %>% ungroup() %>% select(starts_with("v_gene"))

    #Concatenate all J-genes
    j_gene <- res.sub %>% ungroup() %>% select(starts_with("j_gene")) %>% tidyr::unite("j_gene", sep = "_") %>% pull(j_gene)

    #Concatenate all C-genes
    c_gene <- res.sub %>% ungroup() %>% select(starts_with("c_gene")) %>% tidyr::unite("c_gene", sep = "_") %>% pull(c_gene)

    #All J-genes separated
    j_genes_unique <- res.sub %>% ungroup() %>% select(starts_with("j_gene"))

    #Concatenate all CDR3 nt
    cdr3_col <- res.sub %>% ungroup() %>% select(starts_with("cdr3_nt")) %>% tidyr::unite("cdr3_nt", sep = "_") %>% pull(cdr3_nt)

    #All CDR3 nt separated
    cdr3_col_unique <- res.sub %>% ungroup() %>% select(starts_with("cdr3_nt"))

    #Concatenate all CDR3 aa
    cdr3_col2 <- res.sub %>% ungroup() %>% select(matches(cdr3.match)) %>% tidyr::unite("cdr3", sep = "_") %>% pull(cdr3)

    #All CDR3 aa separated
    cdr3_col2_unique <- res.sub %>% ungroup() %>% select(matches(cdr3.match))

    #Concatenate CDR3 nt length
    cdr3_length <- as.data.frame(apply(res.sub %>% ungroup() %>% select(starts_with("cdr3_nt")), MARGIN = 2, FUN = nchar)) %>% tidyr::unite("cdr3_length", sep = "_") %>% pull(cdr3_length)

    #Selected columns
    if(!is.null(add_columns)){
      selected_mat <- matrix(nrow = nrow(res.sub), ncol = length(add_columns))
      colnames(selected_mat) <- add_columns
      selected_mat <- as.data.frame(selected_mat)
      for(f in add_columns){
        selected <- as.data.frame(res.sub %>% ungroup() %>% select(starts_with(f))) %>% tidyr::unite(col = "col", sep = ";") %>% pull(col)
        selected_mat[,f] <- selected
      }

    }else{
      selected_mat <- NA
    }


    #Create final metadata table
    df.full <- data.frame(barcodes = barcodes,
                          v_genes = v_gene,
                          v_genes_unique,
                          j_genes = j_gene,
                          c_genes = c_gene,
                          j_genes_unique,
                          CDR3 = cdr3_col,
                          cdr3_col_unique,
                          cdr3_col2 = cdr3_col2,
                          cdr3_col2_unique,
                          cdr3_length = cdr3_length,
                          raw_clonotypes = raw_clonotypes,
                          selected = selected_mat)

    df.reduced <- data.frame(barcodes = barcodes,
                             v_genes = v_gene,
                             j_genes = j_gene,
                             CDR3 = cdr3_col,
                             cdr3_col2 = cdr3_col2,
                             cdr3_length = cdr3_length,
                             raw_clonotypes = raw_clonotypes)

    if(all(is.na(df.full$selected))){
      df.full$selected <- NULL
    }

    df1 <- switch(col_res, "full" = df.full, "reduced" = df.reduced)

    #If 2 or more chains are solutions for the same cell
    df1$v_genes <- gsub("\\+", ";", df1$v_genes)
    df1$j_genes <- gsub("\\+", ";", df1$j_genes)

    #Input default parameters for clonality
    if(length(clonality_input) == 0){
      clonality_input <- c(output = "Clonal.output.10x", vgene_col = "v_genes", jgene_col = "j_genes", cdr3_col = "CDR3",
                           cell = "T", output_original = T,  ident_col = "barcodes", mismatch = 0, search_genename = F)
    } else{

      input <- c(output = "Clonal.output.10x", vgene_col = "v_genes", jgene_col = "j_genes", cdr3_col = "CDR3",
                 cell = "T", output_original = T,  ident_col = "barcodes", mismatch = 0, search_genename = F)

      input[names(clonality_input)] <- clonality_input

      clonality_input <- input
    }

    clonality(data = df1,
              output = paste0(clonality_input["output"], i),
              vgene_col = clonality_input["vgene_col"],
              jgene_col = clonality_input["jgene_col"],
              cdr3_col = clonality_input["cdr3_col"],
              cell = clonality_input["cell"],
              output_original = as.logical(clonality_input["output_original"]),
              suffix = i,
              ident_col = clonality_input["ident_col"],
              mismatch = as.numeric(clonality_input["mismatch"]),
              search_genename = as.logical(clonality_input["search_genename"]))
  }


  # if(save.files == T){
  #   files <- ls(pattern = "^Clonal", envir = .GlobalEnv)
  #   for(i in files){
  #     write.xlsx(x = get(i), file = sprintf("%s.xlsx", i), rowNames = F)
  #   }
  #
  # }

}
victoraLab/clonality documentation built on March 19, 2024, 7:41 p.m.