R/peak_linker.R

Defines functions peak_linker

Documented in peak_linker

#' @title Peak Linker
#'
#' @description
#' Description
#'
#' @inheritParams genomic_count
#'
#' @param f_hit_df Project enviorment option
#' @param f_global_peak_ls Global peak list
#' @param ge_forward_seq Project enviorment option
#' @param ge_reverse_seq Project enviorment option
#' @param ge_est_insert_size Project enviorment option
#' @param ge_str_tag Project enviorment option
#' @param ge_sum_names Project enviorment option
#' @param ge_strand_ori Project enviorment option
#'
#' @return
#' Return
#'
#' @seealso \pkg{BiocParallel}
#' @seealso \code{junc_link}
#' @seealso \code{junc_addcols}
#' @export
#'
#' @importFrom dplyr %>%
#' @export

peak_linker <- function(

	f_tmp_dir,
	f_which_vervet,
	f_Chromosome,
	f_hit_df,
	f_global_peak_ls,
	ge_bin_size,
	ge_bin_overlaps,
	ge_forward_seq,
	ge_reverse_seq,
	ge_forward_strand,
	ge_reverse_strand,
	ge_str_tag,
	ge_sum_names,
	ge_strand_ori,
	ge_est_insert_size
) {

	# Load SAM-like flat file - Silently
	f_in_ind_file <- paste0(f_tmp_dir, f_which_vervet, "_chr", f_Chromosome, ".txt")
	suppressMessages(f_ind_var <- readr::read_tsv(f_in_ind_file))

	# Adjust chromosomal position for read / peak correlation
	f_adj_chromosomal <- tibble::tibble(
		chr_pos = f_ind_var$Chromosomal_Position,
		tag = 1:dim(f_ind_var)[1]
	)
	f_adj_step <- ge_bin_size * ( (( 1:ge_bin_overlaps ) - 1) / ge_bin_overlaps )

	f_adj_pos_val_ls <- list()
	for(f_adjusting in 1:ge_bin_overlaps){
		#
		f_adj_pos_val_ls[[f_adjusting]] <- tibble::tibble(
			pos = CopperGenomicFunctions::slid_win( f_adj_chromosomal$chr_pos + f_adj_step[f_adjusting], ge_bin_size ) - f_adj_step[f_adjusting],
			tag = 1:dim(f_ind_var)[1]
		)
	}

	adj_pos_val <- tibble::as_tibble(CopperGenomicFunctions::concat_ls(f_adj_pos_val_ls))
	f_adj_chromosomal$min <- stats::aggregate(
		pos ~ tag,
		data = adj_pos_val,
		FUN = min
	) %>%
		dplyr::pull(pos)

	f_adj_chromosomal$max <- stats::aggregate(
		pos ~ tag,
		data = adj_pos_val,
		FUN = max
	) %>%
		dplyr::pull(pos)

	f_individual_rang <- f_adj_chromosomal %>%
		dplyr::select(min, max) %>%
		tibble::add_column(Chr = f_ind_var$f_Chromosome) %>%
		CopperGenomicFunctions::range_assembler()

	f_ind_var <- Rpack.chlSab::read_orient(
		f_col_tag = "LTR_cand",
		f_df = f_ind_var,
		f_tags = ge_strand_ori,
		fie_forward_seq = ge_forward_seq,
		fie_reverse_seq = ge_reverse_seq,
		fie_forward_strand = ge_forward_strand,
		fie_reverse_strand = ge_reverse_strand
	)

	# Additional anchors
	f_ind_var$additional_anchors <- NA

	#Probable duplicate
	f_ind_var[duplicated( f_ind_var[c("Chromosomal_Position", "LTR_cand")] ), "additional_anchors"] <- "PD"

	#Supplementary ERV alignment
	f_ind_var[duplicated( f_ind_var[c("Read_ID", "LTR_cand")] ), "additional_anchors"] <- "SA"

	f_sub_compar_matrix <- f_hit_df %>% dplyr::filter(Chr == f_Chromosome)

	tmp_peak_ls <- list()
	for(f_int_str in seq_along(ge_str_tag)){

		f_ind_var[, ge_sum_names[f_int_str]] <- 0
		f_sub_compar_matrix[, paste0("tmp_", ge_sum_names[f_int_str])] <- CopperGenomicFunctions::dec_tag(
			f_sub_compar_matrix[[f_which_vervet]],
			ge_str_tag[f_int_str]
		)

		f_sub_compar_matrix[, paste0("non_filt_", ge_sum_names[f_int_str])] <- 0
		#
		tmp_table <- CopperGenomicFunctions::concat_ls( CopperGenomicFunctions::slid_win_tov(f_ind_var %>%
			dplyr::filter(LTR_cand == ge_strand_ori[f_int_str]) %>%
			dplyr::select(Chromosomal_Position)
		) )
		#
		tmp_pos <- match(names(tmp_table), f_sub_compar_matrix[["Positions"]])
		f_sub_compar_matrix[tmp_pos[which(!is.na(tmp_pos))], paste0("non_filt_", ge_sum_names[f_int_str])] <- tmp_table[which(!is.na(tmp_pos))]
		tmp_peak_coor <- which(f_sub_compar_matrix[[paste0("tmp_", ge_sum_names[f_int_str])]] > 0)
		f_sub_compar_matrix[tmp_peak_coor, paste0("tmp_", ge_sum_names[f_int_str])] <- f_sub_compar_matrix[tmp_peak_coor, paste0("non_filt_", ge_sum_names[f_int_str])]

		tmp_peak_ls[[f_int_str]] <- CopperGenomicFunctions::peak_iden(f_sub_compar_matrix[[paste0("tmp_", ge_sum_names[f_int_str])]])
		tmp_peak_ls[[f_int_str]]$orientation <- ge_sum_names[f_int_str]
		tmp_peak_ls[[f_int_str]]$Chr <- f_Chromosome
		#
		tmp_peak_ls[[f_int_str]]$lower_lim_nt <- f_sub_compar_matrix %>%
			dplyr::slice(tmp_peak_ls[[f_int_str]]$lower_lim_ix) %>%
			dplyr::pull(Positions)
		#
		tmp_peak_ls[[f_int_str]]$upper_lim_nt <- f_sub_compar_matrix %>%
			dplyr::slice(tmp_peak_ls[[f_int_str]]$upper_lim_ix) %>%
			dplyr::pull(Positions)
		tmp_peak_ls[[f_int_str]][, c("link_inner_edge", "link_no", "dist_nt", "global_no", "global_link_no")] <- NA

		global_peak_range <- f_global_peak_ls[[f_Chromosome]] %>%
			dplyr::filter(orientation == ge_sum_names[f_int_str]) %>%
			dplyr::select(lower_lim_ix, upper_lim_ix) %>%
			dplyr::mutate(Chr = f_Chromosome) %>%
			CopperGenomicFunctions::range_assembler()

		peak_chr_rang_ix <- tmp_peak_ls[[f_int_str]] %>%
			dplyr::select(lower_lim_ix, upper_lim_ix) %>%
			dplyr::mutate(Chr = f_Chromosome) %>%
			CopperGenomicFunctions::range_assembler()

		global_peak_match <- IRanges::findOverlaps(peak_chr_rang_ix, global_peak_range, type = "within")
		tmp_peak_ls[[f_int_str]]$global_no <- global_peak_match@to


		peak_chr_rang_nt <- tmp_peak_ls[[f_int_str]] %>%
			dplyr::select(lower_lim_nt, upper_lim_nt) %>%
			tibble::add_column(Chr = paste0("chr", f_Chromosome)) %>%
			CopperGenomicFunctions::range_assembler()

		individual_match <- IRanges::findOverlaps(f_individual_rang, peak_chr_rang_nt)
		f_ind_var[individual_match@from, ge_sum_names[f_int_str]] <- tmp_peak_ls[[f_int_str]][individual_match@to, "global_no"]
		f_ind_var[individual_match@from, ge_sum_names[f_int_str]] <- individual_match@to
	}

	ind_sub_var <- f_ind_var[f_ind_var$LTR_cand != 0 & (f_ind_var$FS5 != 0 | f_ind_var$FS3 != 0 | f_ind_var$RS5 != 0 | f_ind_var$RS3 != 0), ]

	# Junction Linking
	# Forward strand
	fs_link <- Rpack.chlSab::junc_link(
		f_df = f_sub_compar_matrix,
		f_upp_col = "tmp_FS5",
		f_down_col = "tmp_FS3",
		f_pos_col = "Positions",
		f_thres = 1,
		f_chrom_col = NULL,
		gie_bin_overlaps = ge_bin_overlaps
	)

	f_fs_peak_ls <- Rpack.chlSab::junc_addcols(
		fi_link = fs_link,
		fi_tmp_peak_ls = tmp_peak_ls,
		up_junc_pos = "FS5",
		down_junc_pos = "FS3",
		gie_sum_names = ge_sum_names
	)

	# Reverse strand
	rs_link <- Rpack.chlSab::junc_link(
		f_df = f_sub_compar_matrix,
		f_upp_col = "tmp_RS3",
		f_down_col = "tmp_RS5",
		f_pos_col = "Positions",
		f_thres = 1,
		f_chrom_col = NULL,
		gie_bin_overlaps = ge_bin_overlaps
	)

	f_rs_peak_ls <- Rpack.chlSab::junc_addcols(
		fi_link = rs_link,
		fi_tmp_peak_ls = tmp_peak_ls,
		up_junc_pos = "RS3",
		down_junc_pos = "RS5",
		gie_sum_names = ge_sum_names
	)

	return(list(
		ind = ind_sub_var,
		peak = CopperGenomicFunctions::concat_ls(c(f_fs_peak_ls, f_rs_peak_ls))
	))
}
DanielRivasMD/Rpack.chlSab documentation built on Nov. 18, 2019, 12:01 a.m.