R/junc_link.R

Defines functions junc_link

Documented in junc_link

#' @title Junction Linker
#'
#' @description
#' Description
#'
#' @param f_df Project enviorment option
#' @param f_upp_col Project enviorment option
#' @param f_down_col Project enviorment option
#' @param f_pos_col Project enviorment option
#' @param f_thres Project enviorment option
#' @param f_chrom_col Project enviorment option
#' @param gie_bin_overlaps Project enviorment option
#'
#' @return
#' Return
#'
#' @seealso \code{peak_linker}
#' @export

junc_link <- function(

	f_df,
	f_upp_col,
	f_down_col,
	f_pos_col,
	f_thres = 1,
	f_chrom_col = NULL,
	gie_bin_overlaps
) {

	f_df <- as.data.frame(f_df)
	if ( is.null(f_chrom_col) ) {
		#
		f_chrom_no <- 1
		f_df_chr <- f_df
	}else{
		#
		f_chrom_no <- names(table(f_df[, f_chrom_col]))
	}

	f_ls <- list()
	for( f_i in f_chrom_no ){
		#
		if ( !is.null(f_chrom_col) ) {
			#
			f_df_chr <- f_df[which(f_df[, f_chrom_col] == f_i), ]
		}
		f_df_chr[which(f_df_chr[, f_upp_col] > f_thres), f_upp_col] <- 1
		f_df_chr[which(f_df_chr[, f_down_col] > f_thres), f_down_col] <- 1
		f_df_chr$sum <- f_df_chr[, f_upp_col] * 10 + f_df_chr[, f_down_col]
		f_tmp_down_pos <- NULL
		f_tmp_up_pos <- NULL

		if ( max(f_df_chr$sum) == 11 ) {
			# Overlaping peaks
			f_pre_peak <- CopperGenomicFunctions::peak_iden_seq(f_df_chr$sum)
			f_peak <- f_pre_peak[f_df_chr[f_pre_peak$lower_lim_ix, "sum"] == 10 & f_df_chr[f_pre_peak$upper_lim_ix, "sum"] == 1 & f_pre_peak$seq_max == 11 & f_pre_peak$peak_length_ix <= gie_bin_overlaps * 2, ]

			if ( dim(f_peak)[1] != 0 ) {
				#
				f_part_over <- CopperGenomicFunctions::coor_seq(
					f_peak$lower_lim_ix,
					f_peak$upper_lim_ix
				)
				f_tmp_over_df <- f_df_chr[f_part_over, c(f_pos_col, f_upp_col, f_down_col, "sum")]
				f_tmp_over_up <- CopperGenomicFunctions::peak_iden(f_tmp_over_df[, f_upp_col])
				f_tmp_over_dw <- CopperGenomicFunctions::peak_iden(f_tmp_over_df[, f_down_col])

				if ( dim(f_tmp_over_up)[1] == dim(f_tmp_over_dw)[1] ) {
					#
					f_tmp_vec_dw <- f_tmp_over_df[f_tmp_over_dw$lower_lim_ix, f_pos_col]
					f_tmp_vec_up <- f_tmp_over_df[f_tmp_over_up$upper_lim_ix, f_pos_col]
					f_tmp_down_pos <- match(f_tmp_vec_dw, f_df_chr[, f_pos_col])
					f_tmp_up_pos <- match(f_tmp_vec_up, f_df_chr[, f_pos_col])
				}else{
					#
					cat("Overlaping peaks do no coincide - There is disturbance in the Force", fill = TRUE)
				}
			}else{
				#
				cat("No overlaping peaks compatible - I have a bad feeling about this", fill = TRUE)
			}
			f_compl_over <- CopperGenomicFunctions::coor_seq(
				f_pre_peak[which(f_pre_peak$seq_max == 11), "lower_lim_ix"],
				f_pre_peak[which(f_pre_peak$seq_max == 11), "upper_lim_ix"]
			)
			f_tmp_df <- f_df_chr[-f_compl_over, ]
		}else{
			#
			f_tmp_df <- f_df_chr
		}
		# Non-overlaping peaks
		f_tmp_df <- f_tmp_df[which(f_tmp_df$sum >= 1), c(f_pos_col, f_upp_col, f_down_col, "sum")]
		f_tmp_ed <- f_tmp_df[which(f_tmp_df[, f_upp_col] >= f_thres) + 1, ]
		f_vec_ed <- f_tmp_ed[which(f_tmp_ed[, f_upp_col] < f_thres), f_pos_col]
		f_vec_eu <- f_tmp_ed[which(f_tmp_ed[, f_upp_col] < f_thres) - 1, f_pos_col]
		f_down_pos <- match(f_vec_ed, f_df_chr[, f_pos_col])
		f_up_pos <- match(f_vec_eu, f_df_chr[, f_pos_col])

		f_down_pos <- c(f_down_pos, f_tmp_down_pos)
		f_up_pos <- c(f_up_pos, f_tmp_up_pos)

		f_df_e <- f_df_chr[c(rbind(f_up_pos, f_down_pos)), c(f_pos_col, f_upp_col, f_down_col)]
		f_df_e[, "dist"] <- c(f_df_e[-1, f_pos_col], f_df_e[dim(f_df_e)[1], f_pos_col]) - f_df_e[, f_pos_col]
		f_df_e[which((1:(dim(f_df_e)[1]) %% 2) == 0), "dist"] <- NA

		# Cleaning for misscalls
		f_misscall <- which(f_df_e[, f_upp_col] == 0 & !is.na(f_df_e$dist))
		if ( length(f_misscall) > 0 ) {
			#
			cat("There were some peak misscalls - For your eyes only: ", f_misscall, fill = TRUE)
			f_df_e <- f_df_e[-(c(f_misscall, f_misscall + 1)), ]
		}

		f_ls[[f_i]] <- f_df_e
	}
	f_out_val <- CopperGenomicFunctions::concat_ls(f_ls)

	return(f_out_val)
}
DanielRivasMD/Rpack.chlSab documentation built on Nov. 18, 2019, 12:01 a.m.