R/nearBaitAnalysis.R

Defines functions ineqfunCis nearBaitAnalysis

#' function to get all interactions in the region surrounding the bait
#' @param obj 4C-ker object
#' @param k number of observed fragments in each window to build adaptive windows
ineqfunCis <- function(x, env=globalenv()){
 vec <- c((-x[16]+x[19]),(-x[13]+x[16]), x[14], x[17], x[20])
 return(vec)
}
nearBaitAnalysis <- function(obj, k){
  #lower and upper quantiles to separate the three HMM states
  lower <- 0.6
  upper <- 0.9
  ############
  num_samples <- length(obj@samples)
  #build adaptive windows
  window_counts <- buildAdaptiveWindowsCis(obj@data_nearbait, obj@bait_coord,
                                          obj@bait_chr, obj@bait_name, obj@output_dir,k, "nearbait")
  num_windows <- nrow(window_counts)
  #get count for each sample for each window
  counts_results <- getWindowCounts(obj@data_nearbait, window_counts, num_windows, obj@samples,obj@output_dir, "nearbait")
  #build synthetic samples by shuffling reads
  if(length(obj@samples) > 1)
    synth_counts_results <- generateSyntheticSamples(window_counts, num_windows, obj@data_nearbait, "nearbait")
  else
    synth_counts_results <- counts_results
  pars_valid <- FALSE
  ##while loop to change the quantile separation if the parameter estimation fails
  while(!pars_valid & lower > 0.1){
    starting_values <- startingValuesCis(window_counts, num_windows, obj@bait_coord,
                                        num_samples,synth_counts_results$norm_counts_log,
                                        synth_counts_results$dist_log,lower, upper)
    int_glm <- starting_values$int_glm
    lint_glm <- starting_values$int_glm
    nint_glm <- starting_values$int_glm

    par_est_results <- parameterEstimationCis(synth_counts_results$hmm_input,num_samples,
                                              starting_values$trstart,starting_values$respstart,
                                              starting_values$instart, ineqfunCis)
    is_valid <- validateParametersCis(starting_values$int_glm, par_est_results$pars,
                                      synth_counts_results$hmm_input)
    par_est_results$pars <- is_valid$pars
    int_glm$coefficients <- par_est_results$pars[19:20]
    lint_glm$coefficients <- par_est_results$pars[16:17]
    nint_glm$coefficients <- par_est_results$pars[13:14]
    if(all(par_est_results$pars[13:21] == starting_values$respstart) | !is_valid$valid){
      lower <- lower-0.05
      upper <- upper-0.05
    } else
      pars_valid <- TRUE
  }
####end par est####

  ####viterbi####
  print("Viterbi..")
  j <- 1
  l <- 1
  for(i in 1:length(obj@conditions)){
    reps = obj@replicates[i]
    samples = obj@samples[l:(l+reps-1)]
    hmm_input <- counts_results$hmm_input[j:(j+(num_windows*reps)-1),]
    viterbi_results <- viterbi3State(hmm_input, par_est_results$mod_fit,samples,
                                        window_counts, num_windows,
                                        paste(obj@bait_name, obj@conditions[i], sep = "_"),
                                        obj@bait_chr,reps, obj@output_dir, "nearbait")
    j <- j+(num_windows*reps)
    l <- l+reps
  }
  print(paste("BED file of highest interacting domains for near the bait on the cis chromosome are saved in ",
              obj@output_dir, sep = ""))
#plot average profile of conditions
  if(length(obj@samples) > 1){
    norm_counts_avg=NULL
    j=1
    for(i in 1:length(obj@replicates)){
      reps=obj@replicates[i]
      if(reps == 1){
        norm_counts_avg = rbind(norm_counts_avg, cbind(counts_results$norm_counts[,j], rep(obj@conditions[i],nrow(window_counts))))
      }
      else{
        norm_counts_avg = rbind(norm_counts_avg, cbind(rowMeans(counts_results$norm_counts[,j:(j+reps-1)]), rep(obj@conditions[i],nrow(window_counts))))
      }
      j=j+reps
    }
    norm_counts_avg = data.frame(cbind(rowMeans(window_counts[,2:3]),norm_counts_avg))
    norm_counts_avg[,1] =as.numeric(as.character(norm_counts_avg[,1]))
    norm_counts_avg[,2] =as.numeric(as.character(norm_counts_avg[,2]))
    colnames(norm_counts_avg) = c("Coord", "Count", "Condition")
    return(list(norm_counts_avg=norm_counts_avg, window_counts=counts_results$window_counts))
  }
  else{
    return(list(norm_counts_avg=counts_results$norm_counts, window_counts=counts_results$window_counts))
  }

}
rr1859/R.4Cker documentation built on Feb. 11, 2020, 12:48 p.m.