R/peak_sd_intensity_corrections.R

Defines functions sd_ratio_fractions correct_peak_sd_height correct_peak correct_mean correct_variance Z psi erf

Documented in correct_mean correct_peak correct_peak_sd_height correct_variance

erf <- function(x){
  2 * pnorm(x * sqrt(2)) - 1
}

psi <- function(x){
  (1 / sqrt(2 * pi)) * exp(-.5 * (x^2))
}

Z <- function(x){
  1 - pnorm(x)
}

#' correct peak variance
#'
#' Given a variance observed from a truncated normal distribution, correct it
#' assuming that it should have had 100% observaionts
#'
#' @param observed_variance the observed **variance**
#' @param fraction  what fraction was it observed in
#'
#'
#' @references https://en.wikipedia.org/wiki/Truncated_normal_distribution
#'
#' @return corrected variance
#' @export
#'
#' @seealso correct_mean correct_peak
correct_variance <- function(observed_variance, fraction){
  if (fraction >= 1) {
    return(observed_variance)
  }
  x <- qnorm(1 - fraction)
  A <- (x * psi(x)) / Z(x)
  B <- (psi(x) / Z(x))^2

  # A <- (x * psi(x)) / fraction
  # B <- (psi(x) / fraction)^2

  corrected_variance <- observed_variance / fraction

  return(corrected_variance)
}


#' correct peak mean
#'
#' Given a corrected SD, corrects the mean assuming that it is the result of
#' a truncated normal distribution.
#'
#' @param observed_mean the observed mean
#' @param corrected_sd a corrected sd, generated by `correct_variance`
#' @param fraction the fraction of total observations
#'
#' @references https://en.wikipedia.org/wiki/Truncated_normal_distribution
#'
#' @return corrected mean
#' @export
#'
#' @seealso correct_peak correct_variance
correct_mean <- function(observed_mean, corrected_sd, fraction){
  if ((fraction >= 1) || (is.na(corrected_sd))) {
    return(observed_mean)
  }
  x <- qnorm(1 - fraction)
  numerator <- corrected_sd * psi(x)
  denominator <- Z(x)
  corrected_mean <- observed_mean - (numerator / denominator)

  return(corrected_mean)

}


#' correct peak sd and mean
#'
#' Assuming that an observed mean (intensity) and sd are from a truncated normal distribution
#' that is truncated on one side only.
#'
#' @param observed_mean the observed mean
#' @param observed_sd the observed sd
#' @param n_observed how many observations went into this mean
#' @param n_should_observe how many observations *should* there have been?
#'
#' @references https://en.wikipedia.org/wiki/Truncated_normal_distribution
#'
#' @return data.frame, with corrected mean and sd
#' @export
#'
#' @seealso correct_mean correct_variance
#'
correct_peak <- function(observed_mean, observed_sd, n_observed, n_should_observe){
  fraction <- n_observed / (n_should_observe)
  corrected_sd <- sqrt(correct_variance(observed_sd^2, fraction))
  corrected_mean <- correct_mean(observed_mean, corrected_sd, fraction)
  return(data.frame(mean = corrected_mean, sd = corrected_sd))
}

#' correct peak height and sd
#'
#' @param original_height the original height estimate to correct
#' @param list_of_heights the set of peak heights
#' @param n_observed how many were observed
#' @param n_should_observe how many should have been observed
#'
#' @return data.frame
#' @export
#'
correct_peak_sd_height <- function(original_height, list_of_heights, n_observed, n_should_observe){
  model_peaks <- list_of_heights[which(n_observed == n_should_observe)]
  fractions <- seq(0.05, 0.95, 0.05)

  fractional_data <- purrr::map_df(model_peaks, sd_ratio_fractions, fractions = fractions)

  fractional_model <- lm(sd_ratio ~ fraction + I(fraction^2) + I(fraction^3), data = fractional_data)

  sd_estimates <- purrr::map_dbl(list_of_heights, sd)
  peak_fractions <- data.frame(fraction = n_observed / n_should_observe)

  sd_correction_factor <- predict.lm(fractional_model, peak_fractions)

  corrected_sd <- sd_estimates / sd_correction_factor

  corrected_height <- correct_mean(original_height, corrected_sd, peak_fractions$fraction)

  data.frame(OriginalHeight = original_height,
             OriginalSD = sd_estimates,
             Fraction = peak_fractions$fraction,
             CorrectedHeight = corrected_height,
             CorrectedSD = corrected_sd)
}

sd_ratio_fractions <- function(point_data, fractions = seq(0.05, 0.95, 0.05)){
  point_data <- sort(point_data, decreasing = TRUE)
  full_stats <- data.frame(mean = mean(point_data),
                           sd = sd(point_data))


  out_stats <- purrr::map_df(fractions, function(in_frac){
    n_data <- (round(in_frac * length(point_data)))
    use_data <- point_data[1:n_data]
    bad_estimates <- data.frame(mean = mean(use_data),
                                sd = sd(use_data),
                                type = "original",
                                fraction = in_frac,
                                full_mean = full_stats$mean,
                                full_sd = full_stats$sd)
    bad_estimates
  })
  out_stats$sd_ratio <- out_stats$sd / full_stats$sd
  out_stats$mean_ratio <- out_stats$mean / full_stats$mean
  out_stats
}
MoseleyBioinformaticsLab/FTMS.peakCharacterization documentation built on April 27, 2022, 3:32 a.m.