R/score_sample.R

Defines functions score_sample

#' @title Score a single sample
#'
#' @description Score a single sample. This function will search the data for a peak pattern best fitting an expected model defined by the user, select the in-frame peaks and score the pattern.
#'
#' @param symbol
#'
#' @return NULL
#'
#' @examples score_sample(sample, no.peaks)
#'
#' @export score_sample
score_sample <- function(sample, no.peaks, alt.scores = F, peak.margin=NULL, peak.window=NULL, window.size=NULL, plot.pattern.matching=F, plot.curve.fitting=F, plot.expected.model=T, plot=T) {

  assign("no.peaks", no.peaks, envir = .GlobalEnv)

  all_basepair_positions <- sample
  sample_name <- names(all_basepair_positions)

  threshold_analysis <- 500
  if(length(all_basepair_positions[[1]]$xx) <= 3001) {
    threshold_analysis <- 50
    peak.window <- c(0,200)
  }

  if(plot==F){
    plot.expected.model <-F
  }

  if(is.null(peak.margin)) {
    peak.margin <- 10
  }

  if(is.null(peak.window)) {
    peak.window <- c(100,300)
  }

  if(is.null(window.size)) {
    window.size <- 1
  }

  if(length(sample)>1) {
    current_sample <- score_dataset(sample, no.peaks, peak.margin=peak.margin, peak.window=peak.window, window.size=window.size, plot.pattern.matching = F, plot.curve.fitting = plot.curve.fitting, plot.expected.model = plot.expected.model, plot=plot, alt.scores=alt.scores)
  }

  else {

    three.bp.positions <- length(which((all_basepair_positions[[1]]$xx > peak.window[1]) & (all_basepair_positions[[1]]$xx < (peak.window[1]+3))))
    total.pattern.width <- three.bp.positions * no.peaks

    #Select window to search the pattern in from the whole dataset
    position_template <- which((all_basepair_positions[[1]]$xx > peak.window[1]) & (all_basepair_positions[[1]]$xx < peak.window[2]))
    assign("pos_template", position_template, envir = .GlobalEnv)


    sample_data <- all_basepair_positions[[1]]$yy[position_template]
    names(sample_data) <- sample_name

    if(sum(all_basepair_positions[[1]]$xx[1:100])/100 < -100) {
      message("Incompatible ladder")

      if(alt.scores == T) {
        current_sample <- data.frame(peak.score.peak_score = 0,peak.score.GOF = 0,peak.score.PI=0)
      }
      else{
        peak_score <- 0
        current_sample <- data.frame(peak.score=peak_score)
      }

      rownames(current_sample) <- sample_name

      if(plot==T) {
        plot( all_basepair_positions[[1]]$xx,  all_basepair_positions[[1]]$yy, type="l", xaxt="n",xlab="position in basepairs", ylab="" ,main=sample_name)
        axis(side=1,at=seq(0,length(all_basepair_positions[[1]]$xx),10))
      }
    }

    else if (max(sample_data) < threshold_analysis | (three.bp.positions < 20) ) {
      message("No peak pattern found")
      if(alt.scores == T) {
        current_sample <- data.frame(peak.score.peak_score = 0,peak.score.GOF = 0,peak.score.PI=0)
      }
      else{
        peak_score <- 0
        current_sample <- data.frame(peak.score=peak_score)
      }

      rownames(current_sample) <- sample_name

      if(plot==T) {
        plot( all_basepair_positions[[1]]$xx,  all_basepair_positions[[1]]$yy, type="l", xaxt="n",xlab="position in basepairs", ylab="" ,main=sample_name)
        axis(side=1,at=seq(0,length(all_basepair_positions[[1]]$xx),10))
      }

    } else {
      #Use a sliding window of the width of the pattern to match to slide through the dataset, performing DTW on every window
      #Two outcome values of DTW will be used to select pattern to use for scoring:
      #1-Width of matched pattern, the closer to the actual width of the pattern the better
      #2-Distance, which is the distance of the matrix value, the lowest distance is usually the best match.

      top_scores <- c()
      while(max(top_scores) <= 1) {

        artifical_WT <- generate_artifical_WT(no.peaks, plot=F, three.bp.positions = three.bp.positions)
        artifical_WT <- artifical_WT * (max(all_basepair_positions[[1]]$yy[pos_template] ) / max(artifical_WT))
        query <- artifical_WT

        #sliding window for matched pattern
        start <- peak.window[1]
        min.width.pattern <- (no.peaks * 4) + 2
        all_widths <- c()
        all_starts <- c()
        all_distances <- c()

        while(start < peak.window[2]-min.width.pattern) {

          window_start <- which(all_basepair_positions[[1]]$xx > start)[1]
          window_end <- which(all_basepair_positions[[1]]$xx > start+min.width.pattern)[1]
          sample_data <- all_basepair_positions[[1]]$yy[window_start:window_end]

          if (max(sample_data) > threshold_analysis) {
            pattern_alignment <- align_peak_pattern(query, all_basepair_positions, start=start, pattern.width = min.width.pattern, window.size = window.size,  plot=F)

            matched_pattern_range <- all_basepair_positions[[1]]$xx[pattern_alignment$x + which(all_basepair_positions[[1]]$xx > start)[1]]
            matched_pattern.width <- matched_pattern_range[length(matched_pattern_range)] - matched_pattern_range[1]

            all_widths <- c(all_widths,matched_pattern.width)
            all_distances <- c(all_distances, pattern_alignment$distance[1])

            start <- start + 1
            all_starts <- c(all_starts, start)



          } else {
            start <- start + 1
            all_starts <- c(all_starts, start)
            matched_pattern.width <- 0
            all_widths <- c(all_widths,matched_pattern.width)
            distance <- 100000
            all_distances <- c(all_distances, distance)
          }

        }

        #####
        #Get best match
        #####

        #A minimal width of a pattern has to be used
        if (max(all_widths)  < 10) {
          message("No peak pattern found")
          if(alt.scores == T) {
            current_sample <- data.frame(peak.score.peak_score = 0,peak.score.GOF = 0,peak.score.PI=0)
          }
          else{
            peak_score <- 0
            current_sample <- data.frame(peak.score=peak_score)
          }

          rownames(current_sample) <- sample_name

          if(plot==T) {
            plot( all_basepair_positions[[1]]$xx,  all_basepair_positions[[1]]$yy, type="l", xaxt="n",xlab="position in basepairs", ylab="" ,main=sample_name)
            axis(side=1,at=seq(0,length(all_basepair_positions[[1]]$xx),10))
          }


        } else {
          start <- (peak.window[1] -1) + which(all_distances == min(all_distances))[1]

          score_distances <- all_distances
          score_distances[order(score_distances)] <- 1 : length(all_widths)


          top_distances <- order(score_distances)[1:3]

          top_scores <- c()
          pms <- c()

          for (t in 1:length(top_distances)) {
            start <- (peak.window[1] -1) + top_distances[t]

            if(start <= (peak.window[1]+10)) {
              top_scores <- c(top_scores, 0)
            }

            else {

              window_start <- which(all_basepair_positions[[1]]$xx > start)[1]
              window_end <- which(all_basepair_positions[[1]]$xx > start+min.width.pattern)[1]
              sample_data <- all_basepair_positions[[1]]$yy[window_start:window_end]

              pattern_alignment <- align_peak_pattern(query, all_basepair_positions, start=start, pattern.width = min.width.pattern, window.size = window.size,  plot=F)
              bp_positions_dtw_peaks <- get_peak_positions(pattern_alignment, all_basepair_positions, peak.margin = peak.margin, plot=F)


              if (nrow(pattern_alignment) > three.bp.positions * (no.peaks + 1)) {
                total.pattern.width <- nrow(pattern_alignment)
              } else {
                total.pattern.width <- three.bp.positions * (no.peaks + 1)
              }


              pm <- peak.margin
              while (min(diff(bp_positions_dtw_peaks)) < 1 | max(diff(bp_positions_dtw_peaks)) > 4) {
                pm <- pm - 1
                bp_positions_dtw_peaks <- get_peak_positions(pattern_alignment, all_basepair_positions, peak.margin = pm, plot=F)
                if(pm ==0 ){

                  break
                }
              }

              if(pm == 0 ){
                pm <- 1
              }

              peak_score <- score_pattern_alignment(bp_positions_dtw_peaks, no.peaks, three.bp.positions, total.pattern.width, alt.scores = F, plot.curve.fitting = F, plot.expected.model=F, plot=F)
              top_scores <- c(top_scores, peak_score)
              pms <- c(pms, pm)


            }

          }

          if (length(which(is.na(top_scores)) > 0)) {
            top_scores[which(is.na(top_scores))] <- 0
          }

          if(max(top_scores[1:3]) > 0) {
            top_scores <- top_scores[1:3]
          }



        }


        #window.size <- window.size + 1
        if(peak.margin > 1) {
          peak.margin <- peak.margin - 1
        } else{
          peak.margin <- 10
          window.size <- window.size + 1

          if (window.size == 5) {
            break
          }

        }

      }

      #calculate final score of pattern with the best distance and the best score
      if (max(top_scores) > 0) {

        start <- (peak.window[1] -1) + top_distances[which(top_scores == max(top_scores))[1]]
        window_start <- which(all_basepair_positions[[1]]$xx > start)[1]
        window_end <- which(all_basepair_positions[[1]]$xx > start+min.width.pattern)[1]
        sample_data <- all_basepair_positions[[1]]$yy[window_start:window_end]


        artifical_WT <- generate_artifical_WT(no.peaks, plot=F, three.bp.positions = three.bp.positions)
        artifical_WT <- artifical_WT * (max(all_basepair_positions[[1]]$yy[window_start:window_end] ) / max(artifical_WT))
        query <- artifical_WT

        pattern_alignment <- align_peak_pattern(query, all_basepair_positions, start=start, pattern.width=min.width.pattern, window.size= window.size, plot=plot.pattern.matching)

        pm <- pms[which(top_scores == max(top_scores))]

        bp_positions_dtw_peaks <- get_peak_positions(pattern_alignment, all_basepair_positions, peak.margin = pm, plot=plot, plot.curve.fitting = plot.curve.fitting, plot.expected.model=plot.expected.model)


        if (nrow(pattern_alignment) > three.bp.positions * (no.peaks + 1)) {
          total.pattern.width <- nrow(pattern_alignment)
        } else {
          total.pattern.width <- three.bp.positions * (no.peaks + 1)
        }

        peak_score <- score_pattern_alignment(bp_positions_dtw_peaks, no.peaks, three.bp.positions,total.pattern.width, plot.curve.fitting=plot.curve.fitting, plot.expected.model=plot.expected.model , plot=plot, alt.scores=alt.scores)
        plot_score <- peak_score
        message("Scoring complete")

        current_sample <- data.frame(peak.score=peak_score)
        rownames(current_sample) <- sample_name

      } else{

        if(alt.scores == T) {
          current_sample <- data.frame(peak.score.peak_score = 0,peak.score.GOF = 0,peak.score.PI=0)
        }
        else{
          peak_score <- 0
          current_sample <- data.frame(peak.score=peak_score)
        }

        rownames(current_sample) <- sample_name

        message("No peak pattern found")

        if(plot==T) {
          plot( all_basepair_positions[[1]]$xx,  all_basepair_positions[[1]]$yy, type="l", xaxt="n",xlab="position in basepairs", ylab="" ,main=sample_name)
          axis(side=1,at=seq(0,length(all_basepair_positions[[1]]$xx),10))
        }

      }


    }



    pos_template <- NULL

    current_sample

  }

}
martijn-cordes/ImSpectR documentation built on Oct. 1, 2019, 5:10 a.m.