R/makeRamdomSet.R

Defines functions makeRandomSet

Documented in makeRandomSet

#' @title Random control distribution analysis
#' 
#' @description This function generates random sets for random distribution analysis.
#'              In this process, random data are generated based on each chromosome length of host.
#' 
#' @usage makeRandomSet(organism = 'hg19', randomSize = 10000, outpath = '~')
#' 
#' @param organism a single character. This function serves 3 versions of organisms such as hg19, hg38 (Human)
#'                 and galGal6 (Chicken). Default is 'hg19'.
#' @param randomSize a integer vector. This number is random set size for random analysis. Default value is 10000.
#' @param outpath an string vector. Plots are saved in this path.
#' 
#' @return Return a random set for comparison with observed set and random set.
#' 
#'     
#' @export

makeRandomSet = function(organism = 'hg19', randomSize = 10000, outpath = '~'){
  library(stringr)
  library(plyr)
  
  cat('---------- Make a random set ----------\n')
  cat(paste0('Start time : ', date(), '\n'))
  
  #### 01. Generate random set
  set.seed(123456)
  
  ch_size = readRDS(file = system.file("extdata", paste0(organism, '_chrom.rds'), package = "IRFinder"))
  ch_size_num = as.numeric(ch_size$length)
  ch_start = cumsum(ch_size_num) - ch_size_num + 1
  ch_end = cumsum(ch_size_num)
  ch_ratio = (ch_size_num / sum(ch_size_num))
  
  random_set = sample(ch_size$chrom, size = randomSize, replace = TRUE, prob = ch_ratio)
  count_ch = plyr::count(random_set); row.names(count_ch) = count_ch$x
  count_ch = count_ch[ch_size$chrom,]
  count_ch = cbind(count_ch, ch_size_num)
  
  ran_set = apply(count_ch, 1, function(x){sample(c(1:x[3]), size = x[2], replace = FALSE, prob = NULL)})
  
  chr_ran = rep(paste0('chr', count_ch$x), count_ch$freq)
  pos_ran = unlist(ran_set)
  
  ran_tab = data.frame('Random' = c(1:randomSize), 'Random_chr' = chr_ran, 'Random_pos' = pos_ran, stringsAsFactors = FALSE)
  
  write.table(ran_tab, file = paste0(outpath, '/Random_set_', organism, '.txt'),
              quote = FALSE, append = FALSE, sep = '\t', na = '', row.names = FALSE, col.names = TRUE)
  
  cat('---------- Make a random set is finished. ----------\n')
  cat(paste0('Finish time : ', date(), '\n'))
  
  return(ran_tab)
}
bioinfo16/IRFinder documentation built on Aug. 19, 2019, 10:37 a.m.