#' @title Peak Linker
#'
#' @description
#' Description
#'
#' @inheritParams genomic_count
#'
#' @param f_hit_df Project enviorment option
#' @param f_global_peak_ls Global peak list
#' @param ge_forward_seq Project enviorment option
#' @param ge_reverse_seq Project enviorment option
#' @param ge_est_insert_size Project enviorment option
#' @param ge_str_tag Project enviorment option
#' @param ge_sum_names Project enviorment option
#' @param ge_strand_ori Project enviorment option
#'
#' @return
#' Return
#'
#' @seealso \pkg{BiocParallel}
#' @seealso \code{junc_link}
#' @seealso \code{junc_addcols}
#' @export
#'
#' @importFrom dplyr %>%
#' @export
peak_linker <- function(
f_tmp_dir,
f_which_vervet,
f_Chromosome,
f_hit_df,
f_global_peak_ls,
ge_bin_size,
ge_bin_overlaps,
ge_forward_seq,
ge_reverse_seq,
ge_forward_strand,
ge_reverse_strand,
ge_str_tag,
ge_sum_names,
ge_strand_ori,
ge_est_insert_size
) {
# Load SAM-like flat file - Silently
f_in_ind_file <- paste0(f_tmp_dir, f_which_vervet, "_chr", f_Chromosome, ".txt")
suppressMessages(f_ind_var <- readr::read_tsv(f_in_ind_file))
# Adjust chromosomal position for read / peak correlation
f_adj_chromosomal <- tibble::tibble(
chr_pos = f_ind_var$Chromosomal_Position,
tag = 1:dim(f_ind_var)[1]
)
f_adj_step <- ge_bin_size * ( (( 1:ge_bin_overlaps ) - 1) / ge_bin_overlaps )
f_adj_pos_val_ls <- list()
for(f_adjusting in 1:ge_bin_overlaps){
#
f_adj_pos_val_ls[[f_adjusting]] <- tibble::tibble(
pos = CopperGenomicFunctions::slid_win( f_adj_chromosomal$chr_pos + f_adj_step[f_adjusting], ge_bin_size ) - f_adj_step[f_adjusting],
tag = 1:dim(f_ind_var)[1]
)
}
adj_pos_val <- tibble::as_tibble(CopperGenomicFunctions::concat_ls(f_adj_pos_val_ls))
f_adj_chromosomal$min <- stats::aggregate(
pos ~ tag,
data = adj_pos_val,
FUN = min
) %>%
dplyr::pull(pos)
f_adj_chromosomal$max <- stats::aggregate(
pos ~ tag,
data = adj_pos_val,
FUN = max
) %>%
dplyr::pull(pos)
f_individual_rang <- f_adj_chromosomal %>%
dplyr::select(min, max) %>%
tibble::add_column(Chr = f_ind_var$f_Chromosome) %>%
CopperGenomicFunctions::range_assembler()
f_ind_var <- Rpack.chlSab::read_orient(
f_col_tag = "LTR_cand",
f_df = f_ind_var,
f_tags = ge_strand_ori,
fie_forward_seq = ge_forward_seq,
fie_reverse_seq = ge_reverse_seq,
fie_forward_strand = ge_forward_strand,
fie_reverse_strand = ge_reverse_strand
)
# Additional anchors
f_ind_var$additional_anchors <- NA
#Probable duplicate
f_ind_var[duplicated( f_ind_var[c("Chromosomal_Position", "LTR_cand")] ), "additional_anchors"] <- "PD"
#Supplementary ERV alignment
f_ind_var[duplicated( f_ind_var[c("Read_ID", "LTR_cand")] ), "additional_anchors"] <- "SA"
f_sub_compar_matrix <- f_hit_df %>% dplyr::filter(Chr == f_Chromosome)
tmp_peak_ls <- list()
for(f_int_str in seq_along(ge_str_tag)){
f_ind_var[, ge_sum_names[f_int_str]] <- 0
f_sub_compar_matrix[, paste0("tmp_", ge_sum_names[f_int_str])] <- CopperGenomicFunctions::dec_tag(
f_sub_compar_matrix[[f_which_vervet]],
ge_str_tag[f_int_str]
)
f_sub_compar_matrix[, paste0("non_filt_", ge_sum_names[f_int_str])] <- 0
#
tmp_table <- CopperGenomicFunctions::concat_ls( CopperGenomicFunctions::slid_win_tov(f_ind_var %>%
dplyr::filter(LTR_cand == ge_strand_ori[f_int_str]) %>%
dplyr::select(Chromosomal_Position)
) )
#
tmp_pos <- match(names(tmp_table), f_sub_compar_matrix[["Positions"]])
f_sub_compar_matrix[tmp_pos[which(!is.na(tmp_pos))], paste0("non_filt_", ge_sum_names[f_int_str])] <- tmp_table[which(!is.na(tmp_pos))]
tmp_peak_coor <- which(f_sub_compar_matrix[[paste0("tmp_", ge_sum_names[f_int_str])]] > 0)
f_sub_compar_matrix[tmp_peak_coor, paste0("tmp_", ge_sum_names[f_int_str])] <- f_sub_compar_matrix[tmp_peak_coor, paste0("non_filt_", ge_sum_names[f_int_str])]
tmp_peak_ls[[f_int_str]] <- CopperGenomicFunctions::peak_iden(f_sub_compar_matrix[[paste0("tmp_", ge_sum_names[f_int_str])]])
tmp_peak_ls[[f_int_str]]$orientation <- ge_sum_names[f_int_str]
tmp_peak_ls[[f_int_str]]$Chr <- f_Chromosome
#
tmp_peak_ls[[f_int_str]]$lower_lim_nt <- f_sub_compar_matrix %>%
dplyr::slice(tmp_peak_ls[[f_int_str]]$lower_lim_ix) %>%
dplyr::pull(Positions)
#
tmp_peak_ls[[f_int_str]]$upper_lim_nt <- f_sub_compar_matrix %>%
dplyr::slice(tmp_peak_ls[[f_int_str]]$upper_lim_ix) %>%
dplyr::pull(Positions)
tmp_peak_ls[[f_int_str]][, c("link_inner_edge", "link_no", "dist_nt", "global_no", "global_link_no")] <- NA
global_peak_range <- f_global_peak_ls[[f_Chromosome]] %>%
dplyr::filter(orientation == ge_sum_names[f_int_str]) %>%
dplyr::select(lower_lim_ix, upper_lim_ix) %>%
dplyr::mutate(Chr = f_Chromosome) %>%
CopperGenomicFunctions::range_assembler()
peak_chr_rang_ix <- tmp_peak_ls[[f_int_str]] %>%
dplyr::select(lower_lim_ix, upper_lim_ix) %>%
dplyr::mutate(Chr = f_Chromosome) %>%
CopperGenomicFunctions::range_assembler()
global_peak_match <- IRanges::findOverlaps(peak_chr_rang_ix, global_peak_range, type = "within")
tmp_peak_ls[[f_int_str]]$global_no <- global_peak_match@to
peak_chr_rang_nt <- tmp_peak_ls[[f_int_str]] %>%
dplyr::select(lower_lim_nt, upper_lim_nt) %>%
tibble::add_column(Chr = paste0("chr", f_Chromosome)) %>%
CopperGenomicFunctions::range_assembler()
individual_match <- IRanges::findOverlaps(f_individual_rang, peak_chr_rang_nt)
f_ind_var[individual_match@from, ge_sum_names[f_int_str]] <- tmp_peak_ls[[f_int_str]][individual_match@to, "global_no"]
f_ind_var[individual_match@from, ge_sum_names[f_int_str]] <- individual_match@to
}
ind_sub_var <- f_ind_var[f_ind_var$LTR_cand != 0 & (f_ind_var$FS5 != 0 | f_ind_var$FS3 != 0 | f_ind_var$RS5 != 0 | f_ind_var$RS3 != 0), ]
# Junction Linking
# Forward strand
fs_link <- Rpack.chlSab::junc_link(
f_df = f_sub_compar_matrix,
f_upp_col = "tmp_FS5",
f_down_col = "tmp_FS3",
f_pos_col = "Positions",
f_thres = 1,
f_chrom_col = NULL,
gie_bin_overlaps = ge_bin_overlaps
)
f_fs_peak_ls <- Rpack.chlSab::junc_addcols(
fi_link = fs_link,
fi_tmp_peak_ls = tmp_peak_ls,
up_junc_pos = "FS5",
down_junc_pos = "FS3",
gie_sum_names = ge_sum_names
)
# Reverse strand
rs_link <- Rpack.chlSab::junc_link(
f_df = f_sub_compar_matrix,
f_upp_col = "tmp_RS3",
f_down_col = "tmp_RS5",
f_pos_col = "Positions",
f_thres = 1,
f_chrom_col = NULL,
gie_bin_overlaps = ge_bin_overlaps
)
f_rs_peak_ls <- Rpack.chlSab::junc_addcols(
fi_link = rs_link,
fi_tmp_peak_ls = tmp_peak_ls,
up_junc_pos = "RS3",
down_junc_pos = "RS5",
gie_sum_names = ge_sum_names
)
return(list(
ind = ind_sub_var,
peak = CopperGenomicFunctions::concat_ls(c(f_fs_peak_ls, f_rs_peak_ls))
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.