R/normfluodbf_utils.R
In normfluodbf: Cleans and Normalizes FLUOstar DBF and DAT Files from 'Liposome' Flux Assays

Documented in actual_cols_used decimal_scaling decimal_scaling_df fix_threshold_output .fluor_threshold_check fluor_threshold_check fluor_threshold_check_na generic_identifier is_threshold_violated log_transformation min_max_norm min_max_norm_df min_max_norm_percent min_max_norm_percent_df norm_applier norm_z norm_z_df roundfluor time_attribute unique_identifier

## normfluodbf - R package that Cleans and Normalizes FLUOstar DBF and DAT Files
## Copyright (C) 2024 Tingwei Adeck

#' A function to append a unique identifier attribute to any data frame within the normfluodbf package.
#' @description
#' The function in the context of normfluodbf creates an attribute called Cycle_Number
#' and appends this attribute to the cleaned or wrangled data frame derived from the dirty DBF file.
#' @author Tingwei Adeck
#' @param df A data frame with 1:n number of rows.
#' @return A data frame with the Cycle_Number attribute appended to the end of the data frame.
#' @export
#' @seealso [normfluodat()]
#' @note The function operates in a closed system,
#' meaning it is primarily designed to work with this package ONLY.
#' Other use cases are simply a coincidence.
#' @examples \dontrun{
#' test_df <- as.data.frame(c(seq(40)))
#' colnames(test_df) <- "test"
#' unique_identifier(test_df)}
#' @rdname normfluodbf_utils
unique_identifier <- function(df){
  for(i in 1:nrow(df)){
    x <- 0
    x <- x + i
    df$Cycle_Number[i] <- x
  }
  return(df)
}

#' A function to append a generic identifier attribute to any data frame, but users supply a name for said attribute.
#' @description
#'A function that creates an attribute of seq(numrows) with a step size of 1, where the user provides the attribute name.
#' @author Tingwei Adeck
#' @param numrows The number of rows the user intends to have in the created data frame.
#' @param col_name The desired attribute name.
#' @return A user-named single attribute data frame with nrow = numrows.
#' @export
#' @examples \dontrun{
#' generic_identifier(40, col_name="Cycle_No")}
#' @rdname normfluodbf_utils
generic_identifier <- function(numrows, col_name){
  vect <- seq(numrows)
  df <- as.data.frame(vect)
  colnames(df) <- c(col_name)
  return(df)
}

#' Normalizing Agents
#' @family normfluodbf_utils
#' @param x value(s)
#' @param df data frame
#' @return A normalized value when applied to a single value or a normalized attribute with values between the normalizing range.
#' @examples
#' \dontrun{
#' test_df <- as.data.frame(c(seq(40)))
#' colnames(test_df) <- "test"
#' test_df_norm <- lapply(test_df[1:ncol(test_df)], min_max_norm)}
#' @name normalizingagents
NULL

#' @rdname normalizingagents
#' @return normalized value (0-1)
#' @export
min_max_norm <- function(x){
  (x - min(x)) / (max(x) - min(x))
}

#' @rdname normalizingagents
#' @return normalized value (0-1)
#' @export
min_max_norm_df <- function(df) {
  return(as.data.frame(lapply(df, function(x) (x - min(x)) / (max(x) - min(x)))))
}

#' @rdname normalizingagents
#' @return normalized value (0-100)
#' @export
min_max_norm_percent <- function(x){
  ((x - min(x)) / (max(x) - min(x))) * 100
}

#' @rdname normalizingagents
#' @return normalized value (0-100)
#' @export
min_max_norm_percent_df <- function(df) {
  return(as.data.frame(lapply(df, function(x) ((x - min(x)) / (max(x) - min(x))) * 100)))
}

#' @rdname normalizingagents
#' @return normalized value (Z = N (0,1))
#' @export
norm_z <- function(x){
  (x - mean(x)) / stats::sd(x)
}

#' @rdname normalizingagents
#' @return normalized value (Z = N (0,1))
#' @export
norm_z_df <- function(df) {
  return(as.data.frame(lapply(df, function(x) (x - mean(x)) / stats::sd(x))))
}

#' @rdname normalizingagents
#' @return normalized value
#' @export
decimal_scaling <- function(x){
  max_abs <- max(abs(x))
  power <- ceiling(log10(max_abs))
  x/(10^power)
}

#' @rdname normalizingagents
#' @return normalized value
#' @export
decimal_scaling_df <- function(df) {
  return(as.data.frame(lapply(df, function(x) x / 10^ceiling(log10(max(abs(x)))))))
}

#' @rdname normalizingagents
#' @return log value
#' @export
log_transformation <- function(x){
  log(x)
}

#' @rdname normalizingagents
#' @return rounded value
#' @export
roundfluor <- function(x){
  round(x, 5)
}

#' A normalization applier built on lapply.
#' @description
#' Applies a function over a list of attributes.
#' @param df A data frame.
#' @param norm_scale This parameter takes sub-parameters: 'raw’ , hundred’ , 'one’ , 'z-score' , or 'decimal’ ,
#' which denotes the normalization type or scale.
#' @return A data frame with attribute values obtained from the applied function using lapply.
#' @export
#' @examples \dontrun{
#' test_df <- as.data.frame(c(seq(40)))
#' colnames(test_df) <- "test"
#' test_df_norm <- norm_applier(test_df,norm_scale = 'one')}
#' @rdname normfluodbf_utils
norm_applier <- function(df, norm_scale= c('one','hundred','z-score','raw','decimal')){
  df <- as.data.frame(df)
  if('raw' %in% norm_scale){
    df <- as.data.frame(lapply(df[1:ncol(df)], as.numeric))
    return(df)
  } else if ('one' %in% norm_scale){
    df <- as.data.frame(lapply(df[1:ncol(df)], min_max_norm))
    df <- as.data.frame(lapply(df[1:ncol(df)], roundfluor))
  } else if ('hundred' %in% norm_scale){
    df <- as.data.frame(lapply(df[1:ncol(df)], min_max_norm_percent))
    df <- as.data.frame(lapply(df[1:ncol(df)], roundfluor))
  } else if ('z-score' %in% norm_scale){
    df <- as.data.frame(lapply(df[1:ncol(df)], norm_z))
    df <- as.data.frame(lapply(df[1:ncol(df)], roundfluor))
  } else if('decimal' %in% norm_scale){
    df <- as.data.frame(lapply(df[1:ncol(df)], decimal_scaling))
    df <- as.data.frame(lapply(df[1:ncol(df)], roundfluor))
  }

}

#' A time attribute generating function
#' @description
#' A function to help the researcher reproduce the time attribute.
#' @param interval The time interval chosen for the assay often in seconds.
#' @param first_end The end time of the initial run, often the pause for the introduction of a new substance. This can be the cycle number chosen for the initial stop.
#' @param pause_duration The time between the first end (pause) and resumption of the assay.
#' @param end_time The final end time of the assay.
#' @param cycles The number of cycles in the assay as selected by the user or researcher.
#' @return The time attribute.
#' @keywords internal
#' @note
#' The original function had an option for minutes which was for less time conscious people
#' but the final version for this package has no such option. Users MUST provide numbers in
#' seconds.
#' @examples \dontrun{
#' time_test = time_attribute(30,8,136,1276,40)
#' time_test = time_attribute(60,8,136,2460,40)}
#' @rdname normfluodbf_utils
time_attribute = function(interval= NULL, first_end = NULL, pause_duration=NULL, end_time=NULL, cycles=NULL){

  start_time = 0

  if(is.null(interval)){
    warning('Enter the cycle interval in seconds as setup in the machine')
  }

  if(pause_duration < interval || is.null(pause_duration)){
    pause_duration = interval
  } else{
    pause_duration = pause_duration
  }

  # if('cycles' %in% time_unit || is.null(time_unit) && !is.null(first_end))

  if(!is.null(first_end) && !is.null(end_time) && !is.null(pause_duration)){

    first_end = (first_end-1) * interval

    #before_pause
    first_end = seq(from=start_time,to=first_end,by=interval)

    #new sequence start
    timer_resume = tail(first_end,1)  + pause_duration

    #after_pause
    after_pause = seq(from=timer_resume,to=end_time,by=interval)

    #final time attribute
    assay_time = append(first_end,after_pause)
    assay_time = assay_time[1:cycles]
    assay_time = as.data.frame(assay_time)
    colnames(assay_time) = c('Time')
    return(assay_time)
  } else {
    assay_time = seq(from=start_time,  by = interval, along.with=seq(cycles))
    assay_time = as.data.frame(assay_time)
    colnames(assay_time) = c('Time')

    return(assay_time)
  }
}

#' A function to get the actual columns used in the assay.
#' @description
#' A function that facilitates a users' workflow by helping extract the actual columns used in the assay.
#' @author Tingwei Adeck
#' @param dat A string ("dat_1.dat") if the file is found within the present working directory (pwd) OR a path pointing directly to a ".dat" file.
#' @return Returns a numeric vector denoting the columns used in the assay.
#' @export
#' @examples \dontrun{
#' fpath <- system.file("extdata", "dat_1.dat", package = "normfluodbf", mustWork = TRUE)
#' acutest <- actual_cols_used(fpath)}
#' @rdname normfluodbf_utils
actual_cols_used <- function(dat){
  df <- utils::read.table(dat)
  df <- clean_odddat_optimus(df)
  colnames(df) <- c(1:ncol(df))
  acu <- names(which(colSums(!is.na(df)) > 0))
  acu <- as.numeric(as.vector(acu))
  return(acu)
}

#' A fluorescence quantification Quality Control (QC) function.
#' @family normfluodbf_utils
#' @description
#' A function designed to check that fluorescence values do not exceed the upper limit (2^15 or 32768)
#' OR fall below the lower limit (2^11 or 2048).
#' Fluorescence values that exceed these thresholds are considered noisy
#' and lead to incorrect interpretation of analysis results.
#' @author Tingwei Adeck
#' @param clean_df A cleaned data frame.
#' @param fun A parameter used for Boolean expressions.
#' @import emojifont
#' @return A polite warning message to the researcher.
#' @note
#' Experimental issues should be investigated at very high or very low fluorescence values.
#' The most common experimental issues arise when ACMA concentrations are out of the tolerated range.
#' Based on my experience, ACMA concentrations between 2 and 5 Micromolar will suffice
#' to get fluorescence values within the tolerance threshold.
#' ACMA concentrations as low as 0.2 Micromolar or as high as 20 Micromolar have proven problematic based on my research experience.
#' A second issue linked to the FLUOstar instrument revolves around setting the right “gain”
#' to ensure the right level of sensitivity in machine readings.
#' A very high “gain” setting results in increased machine sensitivity even
#' at the right ACMA concentrations and vice versa. In short,
#' we want the machine to be primed to read exactly what we feed it, no more, no less.
#' This function provides the attribute(s) and tuple(s) for the values that need investigation.
#' These deductions were obtained from my experimental hiccups and my characterization of the liposome flux assay system.
#' @examples
#' \dontrun{
#' fpath <- system.file("extdata", "dat_1.dat", package = "normfluodbf", mustWork = TRUE)
#' dat_df <- read.table(file=fpath)
#' nocomma_dat <- clean_odddat_optimus(dat_df)
#' resampled_scaled <- resample_dat_scale(nocomma_dat, tnp=3, cycles=40)
#' resampled_scaled <- resampled_scaled[,c(1:4)]
#' fluor_threshold_check(resampled_scaled)}
#' @name fluorthresholdcheck
NULL

#' @rdname fluorthresholdcheck
#' @return outlier wells
#' @export
fix_threshold_output <- function(outlier_wells){
  well_vector <- unlist(outlier_wells)
  cleaned_well_vector <- well_vector[well_vector != "Time" & well_vector != "Cycle_Number"]
  cleaned_well_list <- as.list(cleaned_well_vector)
  cleaned_well_list
}

#' @rdname fluorthresholdcheck
#' @return outlier wells list
#' @keywords internal
.fluor_threshold_check <- function(clean_df, fun = NA){
  load.emojifont(font = "EmojiOne.ttf")
  nofun <- is.na(fun)
  clean_df <- rbind(clean_df, NA)
  clean_df <- cbind(clean_df, NA)

  outlier_wells <- list()

    for(i in 1:nrow(clean_df)){
      for(j in 1:ncol(clean_df)){
        if ( clean_df[i,j] >= (2^15) && is.na(clean_df[i,j]) != nofun ){
          message(c(emoji('pig'), emoji('camel'), emoji('lion')))
          message(paste("YIKES, value > 2^15, Watch in future experimental designs",'column:', j , 'row:', i))
          outlier_wells <- c(outlier_wells, names(clean_df)[j])
        } else if ( clean_df[i,j] <= (2^11) && is.na(clean_df[i,j]) != nofun ){
          message(c(emoji('pig'), emoji('camel')))
          message(paste("YIKES, value < 2^11, Watch in future experimental designs",'column:', j , 'row:', i))
          outlier_wells <- c(outlier_wells, names(clean_df)[j])
        }
      }
    }
  outlier_wells <- unique(outlier_wells)
  print("Outlier wells (Mixtures might be problematic and should be investigated with mixtools)")
  print(outlier_wells)
  return(fix_threshold_output(outlier_wells))
}

#' @rdname fluorthresholdcheck
#' @return outlier wells list
#' @export
fluor_threshold_check <- function(clean_df, fun = NA){
  load.emojifont(font = "EmojiOne.ttf")
  nofun <- is.na(fun)
  clean_df <- rbind(clean_df, NA)
  clean_df <- cbind(clean_df, NA)

  outlier_wells <- list()

  for(i in 1:nrow(clean_df)){
    for(j in 1:ncol(clean_df)){
      if ( clean_df[i,j] >= (2^15) && is.na(clean_df[i,j]) != nofun ){
        outlier_wells <- c(outlier_wells, names(clean_df)[j])
      } else if ( clean_df[i,j] <= (2^11) && is.na(clean_df[i,j]) != nofun ){
        outlier_wells <- c(outlier_wells, names(clean_df)[j])
      }
    }
  }
  outlier_wells <- unique(outlier_wells)
  message(paste("Crikee, some values in your original data violate thresholds", emoji('pig'), emoji('camel')))
  print("Outlier wells (Mixtures might be problematic and should be investigated with mixtools)")
  print(outlier_wells)
  return(fix_threshold_output(outlier_wells))
}

#' @rdname fluorthresholdcheck
#' @return outlier wells
#' @export
fix_threshold_output <- function(outlier_wells){
  well_vector <- unlist(outlier_wells)
  cleaned_well_vector <- well_vector[well_vector != "Time" & well_vector != "Cycle_Number"]
  cleaned_well_list <- as.list(cleaned_well_vector)
  cleaned_well_list
}

#' @rdname fluorthresholdcheck
#' @return warning printout
#' @note designed to check for NA values
#' @export
is_threshold_violated <- function(clean_df, fun = NA){
  load.emojifont(font = "EmojiOne.ttf")
  nofun <- is.na(fun)
  for(i in 1:nrow(clean_df)){
    for(j in 1:ncol(clean_df)){
      if( (clean_df[i,j] >= 2^15 || clean_df[i,j] <= 2^11)  && is.na(clean_df[i,j]) != nofun ){
        warning(paste("Crikee, some values in your original data violate thresholds", emoji('pig'), emoji('camel')))
        return(TRUE)
      } else{
        return(FALSE)
      }
    }
  }
}

#' @rdname fluorthresholdcheck
#' @return warning printout
#' @note designed to check for NA values
#' @export
fluor_threshold_check_na <- function(clean_df, fun = NA){
  load.emojifont(font = "EmojiOne.ttf")
  nofun <- is.na(fun)
  for(i in 1:nrow(clean_df)){
    for(j in 1:ncol(clean_df)){
      if( (clean_df[i,j] <= 2^15 || clean_df[i,j] >= 2^11)  && is.na(clean_df[i,j]) != nofun ){
        #nothing
      } else{
        warning("Crikee, some values in your original data are NA values")
        warning(c(emoji('pig'), emoji('camel')))
        warning(paste(j,i))
      }
    }
  }
}

Any scripts or data that you put into this service are public.

normfluodbf documentation built on Sept. 28, 2024, 1:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

normfluodbf
Cleans and Normalizes FLUOstar DBF and DAT Files from 'Liposome' Flux Assays

R/normfluodbf_utils.R
In normfluodbf: Cleans and Normalizes FLUOstar DBF and DAT Files from 'Liposome' Flux Assays

Try the normfluodbf package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

normfluodbf Cleans and Normalizes FLUOstar DBF and DAT Files from 'Liposome' Flux Assays

R/normfluodbf_utils.R In normfluodbf: Cleans and Normalizes FLUOstar DBF and DAT Files from 'Liposome' Flux Assays

Try the normfluodbf package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

normfluodbf
Cleans and Normalizes FLUOstar DBF and DAT Files from 'Liposome' Flux Assays

R/normfluodbf_utils.R
In normfluodbf: Cleans and Normalizes FLUOstar DBF and DAT Files from 'Liposome' Flux Assays