#' Signal between all convergent genes genome-wide
#'
#' \strong{Deprecated! Use \code{\link{signal_at_intergen}} instead.} \cr
#' This function allows you to pull out the ChIP signal centered on midpoints of convergent genes.
#' It takes as input either the wiggle data as list of 16 chromosome (output of \code{\link{readall_tab}})
#' or complete genome in one data frame (for example loaded from .bed files).
#' @param inputData As a list of the 16 chr wiggle data (output of \code{\link{readall_tab}}) or a data frame
#' (in which case you must set \code{inputDataFrame = TRUE}). No default.
#' @param regionSize Number indicating the size (in bp) of the region to calculate.
#' Defaults to 1000 bp (+/- 500 bp).
#' @param saveFile Boolean indicating whether output should be written to a .txt file (in current working
#' directory). If \code{saveFile = FALSE}, output is returned to screen or an R object (if assigned).
#' Defaults to \code{FALSE}.
#' @param inputDataFrame Boolean indicating whether input data is a data frame. This is the case when you
#' have data loaded from a .bed format, typically nucleosome signal, as opposed to the standard wiggle data
#' in a list of 16 chromosomes. Defaults to \code{FALSE}.
#' @return A local data frame (dplyr data frame) with three columns: chr (chromosome number), position
#' (genome coordinate relative to midpoint of intergenic region) and signal (ChIP signal).
#' @examples
#' \dontrun{
#' signal_at_conv(WT)
#'
#' signal_at_conv(WT, regionSize = 1500, saveFile = TRUE, inputDataFrame = FALSE)
#' }
#' @export
signal_at_conv <- function(inputData, regionSize = 1000, saveFile = FALSE,
inputDataFrame = FALSE) {
ptm <- proc.time()
message('Note: This function is deprecated!\n',
'From version 0.2 on it is no longer maintained. Use signal_at_intergen() instead.')
#----------------------------------------------------------------------------#
# All data loaded below is internal to the package
# Generated using 'data-raw/data_internal.R'; stored in 'R/sysdata.rda'
#----------------------------------------------------------------------------#
# Check reference genome and load appropriate convergent gene regions
chrom_S288C <- c("I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X",
"XI", "XII", "XIII", "XIV", "XV", "XVI")
chrom_SK1 <- c('01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
'11', '12', '13', '14', '15', '16')
# Handle the case 'inputDataFrame = T'
if (inputDataFrame) {
check_S288C <- any(inputData[, 1] == 'chrI')
check_SK1 <- any(inputData[, 1] == 'chr01')
} else {
check_S288C <- any(grep('chrI.', names(inputData), fixed = TRUE))
check_SK1 <- any(grep('chr01.', names(inputData), fixed = TRUE))
}
# Load the data:
if (check_S288C) {
message('Detected ref. genome - S288C\n')
conv <- S288C_conv
chrom <- chrom_S288C
} else if (check_SK1) {
print('Detected ref. genome - SK1')
conv <- SK1_conv
chrom <- chrom_SK1
} else stop("Did not recognize reference genome.
Check that chromosome numbers are in the usual format, e.g. 'chrI' or 'chr01'.")
message('Preparing convergent gene region info...\n')
# Initialize object to collect final data
allData <- data.frame()
# Create data frame with conv region info (midpoints +/- regionSize in bp)
intergenicPos <- data.frame(matrix(nrow = nrow(conv), ncol = 4))
colnames(intergenicPos) <- c('chr', 'mid', 'up', 'dwn')
intergenicPos[, 'chr'] <- conv[, 'chr'] # chr
intergenicPos[, 'mid'] <- conv[, 'midpoint'] # mid
intergenicPos[, 'up'] <- conv[, 'midpoint'] - (regionSize / 2) # up
intergenicPos[, 'dwn'] <- conv[, 'midpoint'] + (regionSize / 2) # dwn
if (!requireNamespace("dplyr", quietly = TRUE)) {
stop("R package 'dplyr' needed for this function to work. Please install it.\n",
"install.packages('dplyr')", call. = FALSE)
}
message('Collecting signal...\n')
message('(Skip any regions whose coordinates are not found in input data)\n')
for(i in 1:length(chrom)) {
chrNum <- paste0('chr', chrom[i])
# Get ChIP data list item corresponding to chrom to analyze
# Either list of wiggles or dataFrame ('inputDataFrame = F / T')
if (inputDataFrame) {
chromData <- inputData[inputData[, 1] == paste0('chr', chrom[i]), ]
} else {
# Get index of ChIP data list item corresponding to chrom to analyze
# Add '.' to make it unique (otherwise e.g. 'chrI' matches 'chrII' too)
listIndex <- grep(paste0(chrNum, '.'), names(inputData), fixed = TRUE)
chromData <- inputData[[listIndex]]
colnames(chromData) <- c('position', 'signal')
}
finalChromData <- data.frame()
# Get subset of intergenic regions on chr i
intergenicPosChr <- intergenicPos[intergenicPos[, 'chr'] == chrNum, ]
# Count intergenic regions skipped due to missing coordinates in wiggle data
convGeneCount <- 0
for(j in 1:nrow(intergenicPosChr)) {
if(j == 1) {
message(paste0(chrNum, ':\n'))
}
# Skip if gene coordinates not in ChIPseq data
# Comparison below will not work if dplyr was loaded when the wiggle data was loaded
# (because of dplyr's non standard evaluation: data is in tbl_df class)
# Workaround: convert to data.frame first
# if(!intergenicPosChr[j, 'up'] %in% chromData[, 'position'] |
# !intergenicPosChr[j, 'dwn'] %in% chromData[, 'position']) {
if(!any(as.data.frame(chromData)[, 'position'] == as.data.frame(intergenicPosChr)[j, 'up']) |
!any(as.data.frame(chromData)[, 'position'] == as.data.frame(intergenicPosChr)[j, 'dwn'])) {
next
}
upPosition <- intergenicPosChr[j, 'up']
dwnPosition <- intergenicPosChr[j, 'dwn']
# position
position <- chromData[chromData[, 'position'] >= upPosition &
chromData[, 'position'] <= dwnPosition, 'position']
position <- position - intergenicPosChr[j, 'mid']
# signal
signal <- chromData[chromData[, 'position'] >= upPosition &
chromData[, 'position'] <= dwnPosition, 'signal']
# collect the data
finalChromData <- dplyr::bind_rows(finalChromData, data.frame(chrNum, position, signal))
# Increment non-skipped gene count
convGeneCount <- convGeneCount + 1
}
# Print number of skipped regions
message(paste0('... ', convGeneCount,
' intergenic regions (skipped ', j - convGeneCount, ')\n'))
colnames(finalChromData) <- c('chr', 'position', 'signal')
# Trim out NAs
finalChromData <- finalChromData[complete.cases(finalChromData), ]
allData <- dplyr::bind_rows(allData, finalChromData)
}
message(paste0('\nCompleted in ', round((proc.time()[3] - ptm[3]) / 60, 2), ' min.\n'))
if(saveFile) {
message(paste0('Saving file...\n'))
if(check_S288C) {
write.table(allData, paste0(deparse(substitute(inputData)),
"_S288C_conv.txt"), sep = "\t",
row.names = FALSE, quote = FALSE)
} else {
write.table(allData, paste0(deparse(substitute(inputData)),
"_SK1_conv.txt"), sep = "\t",
row.names = F, quote = FALSE)
}
} else {
return(allData)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.