#' Bin Median filter
#'
#' @param bin_data
#' @param seg_data
#' @param bincount_threshold
#' @param sd_threshold
#' @param lower_quantile
#' @param exploratory_plots
#' @param return_removed_data
#'
#' @return
#' @export
#'
#' @examples
bin_median_filter <- function(bin_data,
seg_data,
bincount_threshold = NULL,
sd_threshold = 2,
lower_quantile = NULL,
exploratory_plots = FALSE,
return_removed_data = FALSE) {
# removing chrom, chrompos and abspos columns
if (any(names(bin_data) %in% c("chrom", "chrompos", "abspos")) == TRUE) {
bin_data <- bin_data[-which(names(bin_data) %in% c("chrom", "chrompos", "abspos"))]
}
# Calculating median bins for each row
median_bins <- apply(bin_data, 2, median)
mean_mbins <- mean(median_bins)
sd_mbins <- sd(median_bins)
sd_threshold <- mean_mbins - sd_threshold*sd_mbins
# removing samples from bin
if (sd_threshold) {
bin_data_kept <- bin_data[-which(median_bins < sd_threshold)]
}
if (!is.null(lower_quantile)) {
bin_data_kept <- bin_data[-which(median_bins < quantile(median_bins, lower_quantile))]
}
if (!is.null(bincount_threshold)) {
bin_data_kept <- bin_data[-which(median_bins < bincount_threshold)]
}
# keeping the same cells on seg and bin file
seg_file_filtered <- seg_data[(names(seg_data) %in% names(bin_data_kept))]
# attaching bin and seg to a list
bin_seg_list <- list(seg_data_filtered = seg_file_filtered,
bin_data_filtered = bin_data_kept)
# keeping the removed data
if (!is.null(lower_quantile)) {
bin_data_removed <- bin_data[which(median_bins < quantile(median_bins, lower_quantile))]
}
if (!is.null(bincount_threshold)) {
bin_data_removed <- bin_data[which(median_bins < bincount_threshold)]
}
if(sd_threshold) {
bin_data_removed <- bin_data[which(median_bins < sd_threshold)]
}
print(paste(dim(bin_data_removed)[2], "cells were removed, to return them please set return_removed_data as TRUE", sep = " "))
# printing exploratory plots for median bins
if (exploratory_plots == TRUE) {
par(mfrow= c(1,2))
hist(median_bins,
breaks = 200)
if (!is.null(lower_quantile)) {
abline(v = quantile(median_bins, lower_quantile), col = "red")
}
if (!is.null(bincount_threshold)) {
abline(v = bincount_threshold, col = "blue")
}
if (sd_threshold) {
abline(v = sd_threshold, col = "green")
}
boxplot(median_bins)
}
if (return_removed_data == TRUE) {
return(bin_data_removed)
}
return(seg_file_filtered)
}
#######################################
# breakpoint filter
#######################################
#' Breakpoint Filtering
#'
#' @param seg_data
#' @param exploratory_plots
#' @param sd_threshold
#' @param keep_only_removed_data
#'
#' @return
#' @export
#'
#' @examples
breakpoint_filter <- function(seg_data,
exploratory_plots = FALSE,
sd_threshold = TRUE,
keep_only_removed_data = FALSE) {
# removing chrom, chrompos and abspos columns
if (any(names(seg_data) %in% c("chrom", "chrompos", "abspos")) == TRUE) {
seg_data <- seg_data[-which(names(seg_data) %in% c("chrom", "chrompos", "abspos"))]
}
rle_all <- apply(seg_data, 2, rle)
rle_length <- lapply(rle_all, function(x,i) length(x[[i]][2]))
rle_length <- vector()
for (i in seq_along(rle_all)) {
rle_length[i] <- length(unlist(rle_all[[i]][2]))
}
rle_mean <- mean(rle_length)
rle_sd <- sd(rle_length)
if (sd_threshold) {
seg_filtered_data <- seg_data[which(rle_length < (rle_mean + 2*rle_sd))]
seg_filtered_data_removed <- seg_data[which(rle_length > rle_mean + 2*rle_sd)]
}
# printing exploratory plots
if (exploratory_plots == TRUE) {
plot.new()
par(mfrow = c(1,2))
hist(rle_length,
breaks = 100)
if (sd_threshold) {
abline(v = rle_mean + 2*rle_sd, col = "blue")
}
plot(density(rle_length))
}
# in case you want to analyze the data that is going to be removed
if (keep_only_removed_data == TRUE) {
seg_filtered_data_removed <- seg_data[which( rle_length < quantile(rle_length, lower_quantile) | rle_length > quantile(rle_length, top_quantile))]
return(seg_filtered_data_removed)
}
print(paste(ncol(seg_filtered_data_removed), "cells were removed", sep = " "))
return(seg_filtered_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.