# merge data from several proteins together into one combined protein
#' extract_ints
#'
#' Cut every slice into tiny subslices.
#'
#' @param ints Vector of intensities for every slice.
#' @param mass_fit_params Parameter estimates from the polynomial fit.
#' @param cut_size Number of subslices per slice (default is 100)
#' @examples
#' mass_fit_params <- c(2.90230588635708,-0.0990513683718742,0.00309367801794186,-4.05829976900451e-05)
#' ints <- runif(42,0,1)
#' extr_ints <- extract_ints(ints, mass_fit_params)
#' @export
extract_ints <- function(ints, mass_fit_params, cut_size = 100){
fit_func <- function(x) { mass_fit_params[1] + mass_fit_params[2] * x + mass_fit_params[3] * x^2 + mass_fit_params[4] * x^3 }
my_limits <- head(seq(from=0.5, to=length(ints)+0.5, by=1), -1)
d <- as.numeric(ints)
cutted_ints <- lapply(1:length(my_limits), function(i){
l <- my_limits[i]
my_cuts <- head(seq(from=l, to=l+1, by=1/(cut_size)), -1)
#my.ints <- rep(d[i]/cut.size, cut.size)
my_ints <- rep(d[i], cut_size)
my_weights <- unlist(lapply(my_cuts, fit_func))
data.frame(cuts=my_cuts, ints=my_ints, mol_mass=my_weights)
})
do.call("rbind", cutted_ints)
}
#' sum_slides
#'
#' Sum up slides which were generated by extract_ints
#'
#' @param extracted_data A list of data extracted by extract_ints
#' @export
sum_slides <- function(extracted_data){
nr_samples <- length(extracted_data)
# we take the first as reference
ref <- extracted_data[[1]]
# loop over the mol.mass from the reference and add the other ones to it
ref_length <- nrow(ref)
summed_ints <- lapply(1:ref_length, function(i){
ref_mass <- ref$mol_mass[i]
# define the borders of the current slice
dist_left <- if(i > 1) ref$mol_mass[i-1] - ref_mass else ref_mass - ref$mol_mass[i+1]
dist_right <- if(i < ref_length) ref_mass - ref$mol_mass[i+1] else dist_left #ref$mol_mass[i+1] - ref_mass
limit_left <- ref_mass - dist_left/2
limit_right <- ref_mass + dist_right/2
res <- ref$ints[i]
if(nr_samples > 1){
# look for entries within this border in the other samples
sample_ints <- lapply(2:nr_samples, function(idx){
sample_data <- extracted_data[[idx]]
sample_slices_ok <- which(sample_data$mol_mass >= limit_left & sample_data$mol_mass < limit_right)
if(length(sample_slices_ok)) {
sample_data$ints[sample_slices_ok]
}else{
0
}
})
# give back the summed ints
res <- sum(unlist(sample_ints)) + res
}
res
})
# construct the data.frame with the results
data.frame(mol_mass=ref$mol_mass, ints=unlist(summed_ints)/nr_samples)
}
#' merge_proteins
#'
#' Cut every slice into tiny subslices.
#'
#' @param data Named list containing mass_fit_params and ints for every
#' replicate.
#' @param cut_size Number of subslices per slice (default is 100)
#' @param loess_span The span parameter used by the loess curve fitting
#' function.
#' @examples
#' data <- list()
#' data[[1]] <- list()
#' data[[1]][['mass_fit_params']] <- c(2.92541101792906,-0.0856835305957989,
#' 0.00229499350478187,-2.85888515674011e-05)
#' data[[1]][['ints']] <- c(1930800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
#' 0,0,0,0,0,0,0,0,0,0,0,3145700,4036800,6036000,6696500000,8679700000,
#' 63454000,0,0,0,0,0,0,0,0,0)
#' data[[2]] <- list()
#' data[[2]][['mass_fit_params']] <- c(2.93157752998003,-0.0911120501100777,
#' 0.00261614574546816,-3.38080473782906e-05)
#' data[[2]][['ints']] <- c(0,0,0,424060,0,0,26561000,0,0,0,8257400,0,0,0,0,
#' 0,0,0,0,4896700,3668500,0,0,0,0,52009000,0,0,0,0,0,0,5325700,2393800,2973300,
#' 4061200000,3175600000,35474000,25662000,18546000,0,0,7599400,0,0,5936300,0)
#' data[[3]] <- list()
#' data[[3]][['mass_fit_params']] <- c(2.86255232915833,-0.0860407095427795,
#' 0.00249869975354482,-3.31808222647113e-05)
#' data[[3]][['ints']] <- c(1053800,0,0,0,0,0,0,0,3159900,1061300,0,4672100,
#' 0,0,2268100,0,0,0,0,0,0,0,0,0,3289300,0,0,0,5302800,4653300,0,7113600,
#' 24130000,6285400,460980000,1.3405e+10,3747900000,17037000,15226000,0,0,
#' 17088000,3872700,0,14897000,8817200)
#' merged_protein <- merge_proteins(data, cut_size=100, loess_span=0.05)
#' @export
#12345678901234567890123456789012345678901234567890123456789012345678901234567890
merge_proteins <- function(data, cut_size = 100, loess_span=0.05){
extracted_data <- lapply(data, function(d){
extract_ints(d[['ints']], d[['mass_fit_params']], cut_size)
})
summed_ints <- sum_slides(extracted_data)
# make a loess fit
loess_fit <- stats::loess(ints ~ mol_mass, summed_ints, span=loess_span)
j <- order(summed_ints$mol_mass)
loess_fit_y <- loess_fit$fitted[j]
loess_fit_y[loess_fit_y < 0] <- 0
data.frame(x=summed_ints$mol_mass[j], y=loess_fit_y)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.