R/helper_join_segments.R
In sigminer: Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

# data.table implementation ~2x speed up ----------------------------------

helper_join_segments2 <- function(segTab, add_loh = FALSE, loh_min_frac = 0.3) {
  x <- segTab # This is just a alias, not a copy, modify inplace will save memory
  index <- seq(1, nrow(x))
  x$index <- index
  x$dval <- c(NA, diff(x$segVal))
  i_eq <- x[, list(index = .SD[-1]$index[.SD[-1]$dval == 0]),
    by = list(sample, chromosome)
  ]$index

  x$index <- NULL
  x$dval <- NULL

  if (length(i_eq) > 0) {
    ## Split joining segments into different groups
    i_join <- sort(union(i_eq - 1L, i_eq))
    # z <- split(i_eq, findInterval(i_eq, i_eq[diff(i_eq) > 1] + 2L)) ## the first element is not included
    # grp <- rep(names(z), sapply(z, length) + 1L)
    ## Use rle to improve performance
    z <- rle(findInterval(i_eq, i_eq[diff(i_eq) > 1] + 2L))
    z$lengths <- z$lengths + 1L
    z$values <- as.character(z$values)
    dt_join <- x[i_join][,
      .collapse_segTab(.SD,
        cols = setdiff(
          colnames(x),
          c("chromosome", "start", "end", "segVal", "sample")
        ),
        add_loh = add_loh,
        loh_min_frac = loh_min_frac
      ),
      by = list(.grp = inverse.rle(z))
    ]
    dt_join$.grp <- NULL

    x <- data.table::rbindlist(
      list(x[setdiff(index, i_join)], dt_join),
      fill = TRUE
    )
  }

  return(x)
}

.collapse_segTab <- function(dt, cols = NULL, add_loh = FALSE, loh_min_frac = 0.3) {
  x <- data.table::data.table(
    chromosome = dt[1]$chromosome,
    start = dt[1]$start,
    end = dt[.N]$end,
    segVal = dt[1]$segVal,
    sample = dt[1]$sample
  )

  if (length(cols) > 0) {
    x[, (cols) := lapply(dt[, cols, with = FALSE], function(x) {
      if (is.numeric(x)) {
        mean(x, na.rm = TRUE)
      } else if (is.logical(x)) {
        any(x, na.rm = TRUE)
      } else {
        paste0(unique(na.omit(x)), collapse = ",")
      }
    })]
  }

  if (add_loh) {
    # When LOH regions have same total copy number values as adjacent
    # regions, only label the segments harbor LOH with minimal length fraction
    # A logical column 'loh' should exist
    if (any(dt$loh)) {
      dt$.len <- dt$end - dt$start + 1
      x$.loh_frac <- sum(dt$.len[dt$loh]) / sum(dt$.len)
      # x$loh is already TRUE
      # reset it
      if (x$.loh_frac <= loh_min_frac) {
        x$loh <- FALSE
      }
    } else {
      x$.loh_frac <- NA_real_
    }
  }

  return(x)
}

# helper_join_segments2_old <- function(segTab) {
#   # segTab <- segTab[order(segTab$start)]
#   final_orders <- c("chromosome", "start", "end", "segVal", "sample")
#   cls_cols <- setdiff(colnames(segTab), c(final_orders, "segVal2"))
#   segTab$segVal2 <- segTab$segVal
#   segTab <- segTab[, .groupby_collapse(.SD, cls_cols),
#                    by = list(sample, chromosome)
#   ]
#
#   segTab$data.table <- NULL
#   data.table::setcolorder(segTab, final_orders)
#   return(segTab)
# }
#
# .groupby_collapse <- function(dt, cols = NULL) {
#   if (length(cols) > 0) {
#     dt <- dt[, .collapse_top2bottom(.SD, cols = cols),
#       by = data.table::rleid(segVal)
#     ]
#   } else {
#     dt <- dt[, .collapse_top2bottom(.SD),
#       by = data.table::rleid(segVal)
#     ]
#   }
#   dt
# }
#
# .collapse_top2bottom <- function(dt, cols = NULL) {
#   x <- data.table::data.table(
#     start = dt[1]$start,
#     end = dt[.N]$end,
#     segVal = dt[1]$segVal2
#   )
#
#   if (!is.null(cols)) {
#     x[, (cols) := lapply(dt[, cols, with = FALSE], function(x) {
#       if (is.numeric(x)) {
#         mean(x, na.rm = TRUE)
#       } else {
#         paste0(unique(na.omit(x)), collapse = ",")
#       }
#     })]
#   }
#   return(x)
# }


# tidyverse implementation ------------------------------------------------


# helper_join_segments <- function(segTab) {
#   final_orders <- c("chromosome", "start", "end", "segVal", "sample")
#   segTab <- segTab %>%
#     dplyr::as_tibble() %>%
#     dplyr::group_by(.data$sample, .data$chromosome) %>%
#     tidyr::nest() %>%
#     dplyr::summarise(data = purrr::map(.data$data, join_segments)) %>%
#     tidyr::unnest("data") %>%
#     dplyr::select(final_orders, dplyr::everything()) %>%
#     data.table::as.data.table()
#
#   segTab
# }
#
# join_segments <- function(df) {
#   df <- df %>% dplyr::arrange(.data$start)
#   equal_index <- which(diff(df$segVal) == 0)
#
#   to_join_index <- sort(union(equal_index, equal_index + 1))
#
#   if (length(to_join_index) > 0) {
#     not_join_index <- setdiff(1:nrow(df), to_join_index)
#
#     cutpoint <- equal_index[which(diff(equal_index) > 1)] + 2L
#     join_list <- split(
#       to_join_index,
#       findInterval(to_join_index, cutpoint)
#     )
#
#
#     join_df <- purrr::map_df(join_list, function(index, df) {
#       res <- df[index, ]
#       out <- dplyr::tibble(
#         start = res$start[1],
#         end = res$end[length(res$end)],
#         segVal = res$segVal[1]
#       )
#       if (ncol(res) == 3L) {
#         return(out)
#       } else {
#         dplyr::bind_cols(
#           out,
#           dplyr::summarise_at(
#             res, dplyr::vars(-c("start", "end", "segVal")),
#             ~ ifelse(is.numeric(.), mean(., na.rm = TRUE),
#               paste0(unique(na.omit(.)), collapse = ",")
#             )
#           )
#         )
#       }
#     }, df = df)
#
#     if (length(not_join_index) > 0) {
#       df <- dplyr::bind_rows(
#         join_df,
#         df[not_join_index, ]
#       ) %>%
#         dplyr::arrange(.data$start)
#
#       return(df)
#     } else {
#       return(join_df)
#     }
#   } else {
#     return(df)
#   }
# }

Any scripts or data that you put into this service are public.

sigminer documentation built on May 29, 2024, 3:11 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

sigminer
Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

R/helper_join_segments.R
In sigminer: Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

Defines functions .collapse_segTab helper_join_segments2

Try the sigminer package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sigminer Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

R/helper_join_segments.R In sigminer: Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

Defines functions .collapse_segTab helper_join_segments2

Try the sigminer package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sigminer
Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

R/helper_join_segments.R
In sigminer: Extract, Analyze and Visualize Mutational Signatures for Genomic Variations