R/genomic_count.R

Defines functions genomic_count

Documented in genomic_count

#' @title Genomic Count
#'
#' @description
#' Description
#'
#' @param f_Chromosome \pkg{BiocParallel} option
#' @param f_tmp_dir Directory from \strong{Uppmax SLURM}
#' @param f_which_vervet Indiviual as \strong{SLURM batch array}
#' @param ge_data_bin_dir Project Global Enviorment option
#' @param ge_vervet_ls Project Global Enviorment option
#' @param ge_bin_size Project Global Enviorment option
#' @param ge_bin_overlaps Project Global Enviorment option
#' @param ge_forward_seq Project Global Enviorment option
#' @param ge_reverse_seq Project Global Enviorment option
#' @param ge_forward_strand Project Global Enviorment option
#' @param ge_reverse_strand Project Global Enviorment option
#' @param ge_pos_margin Project Global Enviorment option
#' @param ge_est_insert_size Project Global Enviorment option
#' @param age_nombres Project As Global Enviorment option
#' @param age_strand Project As Global Enviorment option
#' @param b_ervVchr Boolen selector for \emph{'ERV'} or \emph{'CHR'}
#'
#' @return
#' Return
#'
#' @seealso \pkg{BiocParallel}
#' @export

genomic_count <- function(

	f_Chromosome,
	f_tmp_dir,
	f_which_vervet,
	ge_data_bin_dir,
	ge_vervet_ls,
	ge_bin_size,
	ge_bin_overlaps,
	ge_forward_seq,
	ge_reverse_seq,
	ge_forward_strand,
	ge_reverse_strand,
	ge_pos_margin,
	ge_est_insert_size,
	age_nombres,
	age_strand,
	b_ervVchr
) {

	# Load SAM-like flat file - Silently
	f_in_ind_file <- paste0(f_tmp_dir, f_which_vervet, "_chr", f_Chromosome, ".txt")
	suppressMessages( f_ind_var <- readr::read_tsv(f_in_ind_file) )

	f_count_dummy_file <- paste0(ge_data_bin_dir, "chlSab_chr", f_Chromosome, ".RData")
	f_nombres <- as.vector(outer(f_which_vervet, age_nombres, paste, sep = "_"))

	if ( f_which_vervet == names(ge_vervet_ls[1]) & b_ervVchr == "ERV" ) {
		#
		f_x_max <- max(f_ind_var[, "Chromosomal_Position"]) + (ge_pos_margin * ge_bin_size)
		Positions <- seq(from = 0, to = f_x_max, by = ge_bin_size / ge_bin_overlaps)
		count_matrix <- tibble::tibble(Chr = f_Chromosome, Positions = Positions)
		save(count_matrix, file = f_count_dummy_file)
		cat("Save count dummy", fill = TRUE)
	} else {
		#
		load(f_count_dummy_file)
		Positions <- count_matrix$Positions
		cat("Load count dummy", fill = TRUE)
	}

	if ( b_ervVchr == "ERV" ){
		#
		col_tag <- "LTR_cand"

		f_ind_var <- Rpack.chlSab::read_orient(
			fi_col_tag = col_tag,
			fi_df = f_ind_var,
			fi_tags = age_strand,
			fie_est_insert_size = ge_est_insert_size,
			fie_forward_seq = ge_forward_seq,
			fie_reverse_seq = ge_reverse_seq,
			fie_forward_strand = ge_forward_strand,
			fie_reverse_strand = ge_reverse_strand,
			bi_ervVchr = b_ervVchr
		)

		# Eliminate identical chromosomal anchoring positions & supplementary alignments (ERV)
		f_ind_var <- f_ind_var[!duplicated(f_ind_var[c("Chromosomal_Position", col_tag)]), ]
	} else if ( b_ervVchr == "CHR" ) {
		#
		col_tag <- "read_cand"

		f_ind_var <- Rpack.chlSab::read_orient(
			fi_col_tag = col_tag,
			fi_df = f_ind_var,
			fi_tags = age_strand,
			fie_forward_strand = ge_forward_strand,
			fie_reverse_strand = ge_reverse_strand,
			bi_ervVchr = b_ervVchr
		)
	}

	tmp_hit_df <- data.frame(hit_chr = paste0("chr", f_Chromosome), Positions = Positions)
	for ( strand in seq_along(f_nombres) ) {
		#
		tmp_hit_df[, f_nombres[strand]] <- 0
		hit_table <- CopperGenomicFunctions::concat_ls(
			CopperGenomicFunctions::slid_win_tov(
				f_ind_var[which(f_ind_var[, col_tag] == age_strand[strand]), "Chromosomal_Position"]
			)
		)
		tmp_hit_pos <- match(names(hit_table), tmp_hit_df[, "Positions"])
		tmp_hit_df[tmp_hit_pos[which(!is.na(tmp_hit_pos))], f_nombres[strand]] <- hit_table[which(names(hit_table) >= 0)]
	}
	count_matrix[, f_nombres] <- tmp_hit_df[, f_nombres]

	return(count_matrix)
}
DanielRivasMD/Rpack.chlSab documentation built on Nov. 18, 2019, 12:01 a.m.