R/somatic_features.R

Defines functions allFeatures sameFeatureElement featureValue featureDensity breakpointGC withinFeature distanceToFeature svSize svType nearbyBreakpoints

Documented in allFeatures breakpointGC distanceToFeature featureDensity featureValue nearbyBreakpoints sameFeatureElement svSize svType withinFeature

# Functions related to identifiying SVs in the plasma that have features of tumor-derived somatic rearrangements

#' Count the number of times that a breakpoint is near an empirical breakpoint in the PCAWG SV callset
#'
#' @param query A GRanges object of structural variants with two breakpoints each to compute the number of nearby breakpoints for
#' @param subject A GRanges object of structural variants with two breakpoints each to query to compute the number of nearby breakpoints from
#' @param padding A character vector containing the padding to be added to the positions in subject. Breakpoints in subject that overlap a breakpoint for an element in query +/- padding will be counted as an overlap for that element of query. Example values are "1kb", 5mb", etc., where internally kb is coerced to '000' and mb is coerced to '000000'.
#' @param both A logical (TRUE/FALSE) for whether both breakpoints for a given rearrangement are assessed together. That is, in order for an overlap to be counted, both breakpoints in query +/- padding have to be within of both breakpoints in subject.
#' @param queryInSubject A logical (TRUE/FALSE) for whether or not the breakpoints in query are a subset of the breakpoints in subject. If TRUE, 1 will be subtracted from all of the counts obtained for each element of query to avoid counting overlaps of an element in query with the same element in subject
#'
#' @examples
#' # Example 1: Counting the number of times that each breakpoint in test_sv
#' #            is within 1kb, 10kb, 100kb, and 1mb of a breakpoint in pcawg_sv_setA. Here, test_sv
#' #            is a subset of pcawg_sv_setA, so queryInSubject is set to TRUE.
#' data(test_sv)
#' data(pcawg_sv_setA)
#' res <- nearbyBreakpoints(query = test_sv,
#'                          subject = pcawg_sv_setA,
#'                          padding = c("1kb", "10kb", "100kb", "1mb"),
#'                          both = FALSE,
#'                          queryInSubject = TRUE)
#' res
#'
#' @export
nearbyBreakpoints <- function(query, subject, padding, both, queryInSubject) {

  # Converting the character vactor padding to a numeric vector and dividing by 2
  # so that padding/2 is added to either side of query
  padding.num <- as.numeric(gsub("mb", "000000", gsub("kb", "000", gsub("bp", "", padding))))/2

  # If both is FALSE, then each breakpoint will be treated separately
  if (both == FALSE) {

    # Creating a matrix to store counts in
    m <- matrix(nrow = length(query),
                ncol = length(padding) * 2,
                dimnames = list(NULL,
                                paste0("within", rep(padding, 2), ".bp", c(rep(1, length(padding)), rep(2, length(padding))))))

    # Computing overlaps for breakpoints 1 and 2 separately
    for (i in seq_along(padding)) {
      # Breakpoint 1
      m[,i] <- countOverlaps(query + padding.num[i], subject, ignore.strand = TRUE) +
               countOverlaps(query + padding.num[i], subject$linked.to, ignore.strand = TRUE)

      # Breakpoint 2
      m[,i + length(padding)] <- countOverlaps(query$linked.to + padding.num[i], subject, ignore.strand = TRUE) +
                                 countOverlaps(query$linked.to + padding.num[i], subject$linked.to, ignore.strand = TRUE)
    }
  # If both is true, the breakpoints will be considered as pairs with respect to finding overlaps
  } else if (both == TRUE) {

    # Creating a matrix to store counts in
    m <- matrix(nrow = length(query),
                ncol = length(padding),
                dimnames = list(NULL,
                                paste0("within", padding, ".bothbp")),
                data = 0)

    # Computing overlaps for breakpoints 1 and 2 jointly
    for (i in seq_along(padding)) {

      # Checking for overlaps where breakpoint 1 in query overlaps breakpoint 1 in subject
      # and breakpoint 2 in query overlaps breakpoint 2 in subject
      olaps.bp1_bp1 <- findOverlaps(query + padding.num[i], subject, ignore.strand = TRUE)
      olaps.bp2_bp2 <- findOverlaps(query$linked.to + padding.num[i], subject$linked.to, ignore.strand = TRUE)
      w <- paste(queryHits(olaps.bp1_bp1), subjectHits(olaps.bp1_bp1), sep = "-")
      x <- paste(queryHits(olaps.bp2_bp2), subjectHits(olaps.bp2_bp2), sep = "-")
      same_ord <- w[which(w %in% x)]

      # Checking for overlaps where breakpoint 1 in query overlaps breakpoint 2 in subject
      # and breakpoint 2 in query overlaps breakpoint 1 in subject
      olaps.bp1_bp2 <- findOverlaps(query + padding.num[i], subject$linked.to, ignore.strand = TRUE)
      olaps.bp2_bp1 <- findOverlaps(query$linked.to + padding.num[i], subject, ignore.strand = TRUE)
      y <- paste(queryHits(olaps.bp1_bp2), subjectHits(olaps.bp1_bp2), sep = "-")
      z <- paste(queryHits(olaps.bp2_bp1), subjectHits(olaps.bp2_bp1), sep = "-")
      reverse_ord <- y[which(y %in% z)]

      # Removing potential redundancies (don't want to count the same SV twice)
      # This will happen if both breakpoints are close to each other
      redundant <- which(reverse_ord %in% same_ord)
      if (length(redundant) > 0) {
        reverse_ord <- reverse_ord[-redundant]
      }

      # The names of sv_ind will be the index of query
      sv_ind <- table(gsub("-.+", "", c(same_ord, reverse_ord)))
      m[as.numeric(names(sv_ind)),i] <- sv_ind
    }
  }

  if (queryInSubject == TRUE) {
    m <- m - 1
  }

  return(m)
}


#' Determines the structural variant type
#'
#'
#' @param x A GRanges object containing breakpoint 1 with a GRanges for breakpoint 2 in a metadata column caled linked.to
#'
#' @details For intrachromosomal events, it is expected that the position of breakpoint 1 is less than the position of breakpoint 2.
#' When the chromosome is not the same, the SV is assigned 'TRA', when the chromosome is the same, it is assgned 'INV' if the
#' strands are ++ or --, 'DEL' if the strands are +-, and 'DUP' if the strands are -+.
#'
#' @examples
#' Example 1: Determining the structural variant type for each rearrangement in test_sv
#' data(test_sv)
#' svType(test_sv)
#'
#' @export
svType <- function(x) {
  # Creating a vector to hold the SV types
  m <- matrix(nrow = length(x), ncol = 1, dimnames = list(NULL, "sv.type"))

  bp1 <- x[,-1]
  bp2 <- x$linked.to
  intrachromosomal <- which(as.character(seqnames(bp1)) == as.character(seqnames(bp2)))
  stopifnot(all(start(bp1[intrachromosomal]) < start(bp2[intrachromosomal])))

  # For interchromosomal events, the type is TRA independent of the strand information
  if (length(intrachromosomal) > 0) {
    m[-intrachromosomal,] <- "TRA"
  } else {
    m[,] <- "TRA"
  }

  # For intrachromosomal events:
  strands <- paste0(strand(bp1), strand(bp2))

  # Inversions result in ++ or --
  inversion_ind <- which(strands[intrachromosomal] %in% c("++", "--"))
  m[intrachromosomal[inversion_ind],] <- "INV"

  # Deletions result in +-
  deletion_ind <- which(strands[intrachromosomal] == "+-")
  m[intrachromosomal[deletion_ind],] <- "DEL"

  # Duplications result in -+
  duplication_ind <- which(strands[intrachromosomal] == "-+")
  m[intrachromosomal[duplication_ind],] <- "DUP"

  return(m)
}



#' Determines the structural variant size
#'
#'
#' @param x A GRanges object containing breakpoint 1 with a GRanges for breakpoint 2 in a metadata column caled linked.to
#'
#' @details For intrachromosomal evenets, it is expected that the position of breakpoint 1 is less than the position of  breakpoint 2.
#' Size is computed as 1/breakpoint distance (in megabases) for intrachromosomal events and is set to 0 for interchromosomal events.
#'
#' @examples
#' Example 1: Determining the structural variant size for each rearrangement in test_sv
#' data(test_sv)
#' svSize(test_sv)
#'
#' @export
svSize <- function(x) {
  # Creating a vector to hold the SV sizes
  m <- matrix(nrow = length(x), ncol = 1, dimnames = list(NULL, "sv.size"))

  bp1 <- x[,-1]
  bp2 <- x$linked.to
  intrachromosomal <- which(as.character(seqnames(bp1)) == as.character(seqnames(bp2)))
  stopifnot(all(start(bp1[intrachromosomal]) < start(bp2[intrachromosomal])))

  if (length(intrachromosomal) > 0) {
    m[-intrachromosomal,] <- 0
    m[intrachromosomal] <- as.numeric(format(1/((start(bp2[intrachromosomal]) - start(bp1[intrachromosomal]))/1e6), digits = 1, scientific = FALSE))
  } else {
    m[,] <- 0
  }

  return(m)
}



#' Determines the distance to a genomic feature
#'
#'
#' @param query A GRanges object containing breakpoint 1 with a GRanges for breakpoint 2 in a metadata column caled linked.to
#' @param feature A GRanges object containing the features to compute the distance from
#' @param feature.name A character vector of length 1 indicating the name of the feature being evaluated. This value is only used to create the output table.
#' @param boundary A logical(TRUE/FALSE) indicating whether or not to return the distance to the feature boundary. If TRUE, then the absolute distance to the nearest feature boundary will be returned.
#' If FALSE, then if a breakpoint in query is within a feature, a negative value corresponding to the number of bases inside the feature will be returned. For example, when boundary = TRUE, if a breakpoint is 100 bases into a LAD
#' then -100 will be returned, and if boundary = FALSE, then 100 will be returned.
#' @param average A logical (TRUE/FALSE) indicating whether or not to return the average distance of the two breakpoints for each structural variant to the feature
#'
#' @details Computes the distance (in megabases) that each breakpoint in query is away from the nearest range in feature.
#' If a breakpoint is inside a feature, then the distance will be negative. If there is a seqname in query that it not in feature,
#' then the value returned for that element of query will be NA.
#'
#' @examples
#' Example 1: Determining the distance of each breakpoint in test_sv to LADs
#' data(test_sv)
#' data(lad)
#' distanceToFeature(query = test_sv, feature = lad, feature.name = 'lad', boundary = FALSE, average = TRUE)
#'
#' @export
distanceToFeature <- function(query, feature, feature.name, boundary, average) {

  # Creating a GRanges of genomic regions not in the feature
  hg19.gr <- GRanges(seqinfo(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19))[1:24]
  interfeature.gr <- BiocGenerics::setdiff(hg19.gr, feature)

  # Distance of breakpoint 1 to feature
  bp1.dist2feature <- rep(NA, length(query))
  bp1.olaps <- distanceToNearest(query, feature)
  bp1.dist2feature[queryHits(bp1.olaps)] <- mcols(bp1.olaps)$distance/1e6
  bp1.infeature <- which(bp1.dist2feature == 0)
  bp1.dist2feature[bp1.infeature] <- -1 * mcols(distanceToNearest(query[bp1.infeature], interfeature.gr))$distance/1e6
  if (boundary == TRUE) {
    bp1.dist2feature[bp1.infeature] <- abs(bp1.dist2feature[bp1.infeature])
  }

  # Distance of breakpoint 2 to feature
  bp2.dist2feature <- rep(NA, length(query))
  bp2.olaps <- distanceToNearest(query$linked.to, feature)
  bp2.dist2feature[queryHits(bp2.olaps)] <- mcols(bp2.olaps)$distance/1e6
  bp2.infeature <- which(bp2.dist2feature == 0)
  bp2.dist2feature[bp2.infeature] <- -1 * mcols(distanceToNearest(query$linked.to[bp2.infeature], interfeature.gr))$distance/1e6
  if (boundary == TRUE) {
    bp2.dist2feature[bp2.infeature] <- abs(bp2.dist2feature[bp2.infeature])
  }

  if (average == TRUE) {
    avg.dist2feature <- (bp1.dist2feature + bp2.dist2feature)/2
    m <- matrix(c(bp1.dist2feature, bp2.dist2feature, avg.dist2feature), ncol = 3, byrow = FALSE,
                dimnames = list(NULL, c(paste0("bp1.dist2", feature.name), paste0("bp2.dist2", feature.name),
                                        paste0("avg.dist2", feature.name))))
  } else if (average == FALSE) {
    m <- matrix(c(bp1.dist2feature, bp2.dist2feature), ncol = 2, byrow = FALSE,
                dimnames = list(NULL, c(paste0("bp1.dist2", feature.name), paste0("bp2.dist2", feature.name))))
  }

  return(m)
}


#' Determines the number of breakpoints within a genomic feature
#'
#'
#' @param query A GRanges object containing breakpoint 1 with a GRanges for breakpoint 2 in a metadata column caled linked.to
#' @param feature A GRanges object containing the features to compute the distance from
#' @param feature.name A character vector of length 1 indicating the name of the feature being evaluated. This value is only used to create the output table.
#' @details Computes the number of breakpoints (0, 1, or 2) that are within a genomic feature.
#'
#' @examples
#' Example 1: Determining the number of breakpoints in each structural variant in test_sv in LADs
#' data(test_sv)
#' data(lad)
#' withinFeature(query = test_sv, feature = lad, feature.name = "lad")
#'
#' @export
withinFeature <- function(query, feature, feature.name) {

  # Creating a matrix to store the results in
  m <- matrix(nrow = length(query), ncol = 1, data = 0, dimnames = list(NULL, paste0("bp.in.", feature.name)))

  bp1.olaps <- unique(queryHits(findOverlaps(query, feature)))
  bp2.olaps <- unique(queryHits(findOverlaps(query$linked.to, feature)))
  bothbp.olaps <- table(c(bp1.olaps, bp2.olaps))
  m[as.numeric(names(bothbp.olaps)),] <- as.numeric(bothbp.olaps)

  return(m)
}



#' Determines the GC content surrounding structural variant breakpoints
#'
#'
#' @param query A GRanges object containing breakpoint 1 with a GRanges for breakpoint 2 in a metadata column caled linked.to
#' @param padding A character vector of length 1 containing the padding to be added to the positions in subject. Breakpoints in subject that overlap a breakpoint for an element in query +/- padding will be counted as an overlap for that element of query.
#' Example values are "1kb", 5mb", etc., where internally kb is coerced to '000' and mb is coerced to '000000'.
#' @param average A logical (TRUE/FALSE) indicating whether or not to return the average GC content near the two breakpoints for each structural variant
#'
#' @examples
#' Example 1: Determining the GC content of 1kb, and 10kb surrounding each breakpoint and computing the average
#'            for each structural variant and each distance.
#' data(test_sv)
#' breakpointGC(query = test_sv, padding = "1kb", average = TRUE)
#'
#' @export
breakpointGC <- function(query, padding, average) {

  # Converting the character vactor padding to a numeric vector
  padding.num <- as.numeric(gsub("mb", "000000", gsub("kb", "000", gsub("bp", "", padding))))/2

  # Creating a matrix to store the results in
  m <- matrix(nrow = length(query), ncol = 3, byrow = FALSE, data = 0,
              dimnames = list(NULL, c(paste0("bp1.gc.", padding), paste0("bp2.gc.",  padding),
                                      paste0("avg.gc.", padding))))

  # Defining seqinfo to enable trimming of out-of-bounds ranges upon the addition of padding
  hg19.gr <- GRanges(seqinfo(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19))[1:24]
  seqinfo(query) <- seqinfo(hg19.gr)[seqlevels(query)]
  seqinfo(query$linked.to) <- seqinfo(hg19.gr)[seqlevels(query$linked.to)]

  # Getting the GC content for breakpoint 1
  m[,1] <- biovizBase::GCcontent(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19, trim(query + padding.num))[,1]

  # Getting the GC content for breakpoint 2
  m[,2] <- biovizBase::GCcontent(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19, trim(query$linked.to + padding.num))[,1]

  if (average == TRUE) {
    m[,3] <- (m[,1] + m[,2])/2
  } else if (average == FALSE) {
    m <- m[,-3]
  }

  return(m)
}



#' Determines the density of genomic features surrounding breakpoints
#'
#'
#' @param query A GRanges object containing breakpoint 1 with a GRanges for breakpoint 2 in a metadata column caled linked.to
#' @param feature A GRanges object containing the features to compute the density from
#' @param padding A character vector containing the padding to be added to the positions in subject. Breakpoints in subject that overlap a breakpoint for an element in query +/- padding will be counted as an overlap for that element of query.
#' Example values are "100bp", "1kb", 5mb", etc., where internally kb is coerced to '000' and mb is coerced to '000000'.
#' @param feature.name A character vector of length 1 indicating the name of the feature being evaluated. This value is only used to create the output table.
#' @param average A logical (TRUE/FALSE) indicating whether or not to return the average value as well as the value for each individual breakpoint.
#'
#' @details Computes the fraction of basepairs within query +/- padding that are within feature
#'
#' @examples
#' Example 1: Determining the fraction of bases in query +/- 500kb for in each structural variant in test_sv that
#' are in protein-coding gene regions
#' data(test_sv)
#' data(gene)
#' featureDensity(query = test_sv, feature = gene, feature.name = "gene", padding = "500kb", average = TRUE)
#'
#' @export
featureDensity <- function(query, feature, feature.name, padding, average) {

  # Converting the character vactor padding to a numeric vector
  padding.num <- as.numeric(gsub("mb", "000000", gsub("kb", "000", gsub("bp", "", padding))))/2

  # Creating a matrix to store the results in
  m <- matrix(nrow = length(query), ncol = 3, byrow = FALSE, data = 0,
              dimnames = list(NULL, c(paste0("bp1.", feature.name, "dens.", padding), paste0("bp2.", feature.name, "dens.", padding),
                                      paste0("avg.", feature.name, "dens.", padding))))

  # Computing the density for breakpoint 1
  bp1.olaps <- findOverlaps(query + padding.num, feature)
  bp1.olaps.width <- width(pintersect((query + padding.num)[queryHits(bp1.olaps)], feature[subjectHits(bp1.olaps)]))
  bp1.olaps.width.sum <- tapply(bp1.olaps.width, queryHits(bp1.olaps), sum)
  m[as.numeric(names(bp1.olaps.width.sum)),1] <- as.numeric(bp1.olaps.width.sum)/1e6

  # Computing the density for breakpoint 2
  bp2.olaps <- findOverlaps(query$linked.to + padding.num, feature)
  bp2.olaps.width <- width(pintersect((query$linked.to + padding.num)[queryHits(bp2.olaps)], feature[subjectHits(bp2.olaps)]))
  bp2.olaps.width.sum <- tapply(bp2.olaps.width, queryHits(bp2.olaps), sum)
  m[as.numeric(names(bp2.olaps.width.sum)),2] <- as.numeric(bp2.olaps.width.sum)/1e6

  if (average == TRUE) {
    m[,3] <- (m[,1] + m[,2])/2
  } else if (average == FALSE) {
    m <- m[,-3]
  }

  return(m)
}


#' Grabs a value for an interval that overlaps each breakpoint
#'
#'
#' @param query A GRanges object containing breakpoint 1 with a GRanges for breakpoint 2 in a metadata column caled linked.to
#' @param feature A GRanges object containing the feature of interest.
#' @param feature.name A character vector of length 1 indicating the name of the feature being evaluated. This value is only used to create the output table.
#' @param column.name A character vector of length 1 specifying the name of the column in feature for which to grab a value from.
#' @param average A logical (TRUE/FALSE) indicating whether or not to return the average value as well as the value for each individual breakpoint.
#'
#' @examples
#' Example 1: Determining the replication timing score for each breakpoint
#' data(test_sv)
#' data(rep_timing)
#' featureValue(query = test_sv,
#'              feature = rep_timing,
#'              feature.name = 'repliscore',
#'              column.name = score,
#'              average = TRUE)
#'
#' @export
featureValue <- function(query, feature, feature.name, column.name, average) {

  # Creating a matrix to store the results in
  m <- matrix(nrow = length(query), ncol = 3, byrow = FALSE,
              dimnames = list(NULL, c(paste0("bp1.", feature.name), paste0("bp2.", feature.name),
                                      paste0("avg.", feature.name))))

  # Getting the index of column.name in feature
  col.ind <- which(names(mcols(feature)) == column.name)

  # Getting the feature value for breakpoint 1
  bp1.olaps <- findOverlaps(query, feature)
  m[queryHits(bp1.olaps),1] <- mcols(feature)[,col.ind][subjectHits(bp1.olaps)]

  # Getting the feature value for breakpoint 2
  bp2.olaps <- findOverlaps(query$linked.to, feature)
  m[queryHits(bp2.olaps),2] <- mcols(feature)[,col.ind][subjectHits(bp2.olaps)]

  if (average == TRUE) {
    m[,3] <- (m[,1] + m[,2])/2
  } else if (average == FALSE) {
    m <- m[,-3]
  }

  return(m)
}


#' Determine if both breakpoints are within the same feature element
#'
#' @param query A GRanges object containing breakpoint 1 with a GRanges for breakpoint 2 in a metadata column caled linked.to
#' @param feature A GRanges object containing the feature of interest.
#' @param feature.name A character vector of length 1 indicating the name of the feature being evaluated. This value is only used to create the output table.
#'
#' @examples
#' data(test_sv)
#' data(tad)
#' sameFeatureElement(query = test_sv, feature = tad, feature.name = 'tad')
#'
#' @export
sameFeatureElement <- function(query, feature, feature.name) {

  # Creating a matrix to store the results
  m <- matrix(nrow = length(query), ncol = 1, dimnames = list(NULL, paste0("same.", feature.name)))

  m[,] <- "No"
  bp1.tad <- findOverlaps(query, feature)
  bp2.tad <- findOverlaps(query$linked.to, feature)
  tad.ct <- table(c(paste(queryHits(bp1.tad), subjectHits(bp1.tad), sep = "-"),
                    paste(queryHits(bp2.tad), subjectHits(bp2.tad), sep = "-")))
  same.tad.ind <- as.numeric(gsub("-.+", "", names(tad.ct[which(tad.ct == 2)])))
  m[same.tad.ind,] <- "Yes"

  return(m)
}


#' Compute all features for pairs of breakpoints
#'
#'
#' @param query A Granges object containing breakpoints for which to compute features from
#'
#' @examples
#' data(test_sv)
#' data(pcawg_sv_setA)
#'
#' allFeatures(query = test_sv,
#'             subject = pcawg_sv_setA,
#'             queryInSubject = TRUE)
#'
#' @export
allFeatures <- function(query, subject, queryInSubject) {
  df <- cbind(as.data.frame(svType(query)),
              as.data.frame(svSize(query)),
              as.data.frame(nearbyBreakpoints(query = query, subject = subject, padding = c("1kb", "10kb", "100kb", "1mb", "10mb"), both = FALSE, queryInSubject)),
              as.data.frame(nearbyBreakpoints(query = query, subject = subject, padding = c("1kb", "10kb", "100kb", "1mb", "10mb"), both = TRUE, queryInSubject)),
              as.data.frame(distanceToFeature(query = query, feature = lad, feature.name = "lad", boundary = FALSE, average = TRUE)),
              as.data.frame(withinFeature(query = query, feature = lad, feature.name = "lad")),
              as.data.frame(featureValue(query = query, feature = rep_timing, feature.name = "repliscore", column.name = "score", average = TRUE)),
              as.data.frame(distanceToFeature(query = query, feature = centromere, feature.name = "centro", boundary = FALSE, average = TRUE)),
              as.data.frame(distanceToFeature(query = query, feature = telomere, feature.name = "telo", boundary = FALSE, average = TRUE)),
              as.data.frame(distanceToFeature(query = query, feature = tad, feature.name = "tadboundary", boundary = TRUE, average = TRUE)),
              as.data.frame(sameFeatureElement(query = query, feature = tad, feature.name = "tad")),
              as.data.frame(distanceToFeature(query = query, feature = cpg, feature.name = "cpg", boundary = FALSE, average = TRUE)),
              as.data.frame(distanceToFeature(query = query, feature = alu, feature.name = "alu", boundary = FALSE, average = TRUE)),
              as.data.frame(featureDensity(query = query, feature = gene, feature.name = "gene", padding = "1mb", average = TRUE)),
              as.data.frame(breakpointGC(query = query, padding = "100bp", average = TRUE)),
              as.data.frame(breakpointGC(query = query, padding = "1kb", average = TRUE)))

  # Changing categorical variables to factors
  df$sv.type <- factor(df$sv.type, levels = c("DEL", "DUP", "INV", "TRA"))
  df$same.tad <- factor(df$same.tad, levels = c("Yes", "No"))

  return(df)
}
cancer-genomics/plasmasv documentation built on May 15, 2020, 11:35 a.m.