dpcR: Digital PCR Analysis

Documented in read_amp read_BioMark read_dpcr read_QX100 read_QX200 read_redf

#' Read digital PCR data
#'
#' Reads digital PCR data in various formats.
#'
#' @param input name of the input file (\code{character}) or input object
#' (\code{data.frame}).
#' @param format of the file, for example: \code{"raw"}, \code{"QX100"},
#' \code{"BioMark"}, \code{"amp"} (raw amplitude compressed using \code{.zip}).
#' @param ext extension of the file ().
#' @param ... additional parameters for the appropriate function. For example, if
#' \code{format} has value \code{"raw"}, the additional parameter must be
#' \code{adpcr}.
#' @author Michal Burdukiewcz, Stefan Roediger
#' @details Input files may have .csv, .xls or .xlsx extension. In case of Excel files
#' with multiple sheets, only the first sheet will be analyzed.
#' @return Always an object of \code{\linkS4class{adpcr}} or
#' \code{\linkS4class{dpcr}} type.
#' @export
#' @keywords utilities
#' @seealso
#' Read raw format files: \code{\link{read_redf}}.
#' Read BioMark format files: \code{\link{read_BioMark}}.
#' Read QX100 format files: \code{\link{read_QX100}}.
#' Read QX200 format files: \code{\link{read_QX200}}.

read_dpcr <- function(input, format, ext = NULL, ...) {
  if (!(format %in% c("redf", "QX100", "QX200", "BioMark", "amp"))) {
    stop("Unknown value of 'format' parameter.")
  }

  switch(format,
    redf = read_redf(input, ext = ext, ...),
    QX100 = read_QX100(input, ext = ext),
    QX200 = read_QX200(input, ext = ext),
    BioMark = read_BioMark(input, ext = ext, ...),
    amp = read_amp(input, ext = ext, ...)
  )
}


#' Read digital PCR amplitude raw data
#'
#' Reads digital PCR amplitude data.
#'
#' @details The amplitude data means a compressed directory of
#' amplification.
#' @inheritParams create_dpcr
#' @inheritParams read_dpcr
#' @return An object of \code{\linkS4class{adpcr}}.
#' @author Michal Burdukiewcz, Stefan Roediger
#' @keywords utilities
#' @export

read_amp <- function(input, ext = NULL) {
  amp_data <- read_zipped_amps(input)
  amp2dpcr(amp_data)
}

#' Read digital PCR raw data
#'
#' Reads REDF (Raw Exchange Digital PCR format) data.

#' @inheritParams create_dpcr
#' @inheritParams read_dpcr
#' @details REDF (Raw Exchange Digital PCR format) data is preferably a .csv file
#' with following columns:
#' \describe{
#' \item{experiment}{names of experiments}
#' \item{replicate}{indices of replicates}
#' \item{assay}{names of assays}
#' \item{k}{number of positive partitions}
#' \item{n}{total number of partitions}
#' \item{v}{volume of partition (nL)}
#' \item{uv}{uncertainty of partition's volume (nL)}
#' \item{threshold}{partitions with \code{k} equal or higher than threshold are
#' treated as positve.}
#' \item{panel_id}{indices of panels}
#' }
#' Column \code{panel_id} should be specified only in case of
#' array-based dPCR.
#' @return An object of \code{\linkS4class{adpcr}} or \code{\linkS4class{dpcr}} type,
#' depends on the value of \code{adpcr} parameter.
#' @author Michal Burdukiewcz, Stefan Roediger
#' @keywords utilities
#' @export

read_redf <- function(input, ext = NULL) {
  dat <- read_input(input, ext)

  # n <- rowSums(!apply(dat, 1, is.na))
  #
  # exp_rep <- matrix(unlist(strsplit(colnames(dat), ".", fixed = TRUE)), ncol = 2, byrow = TRUE)
  #
  # create_dpcr(data = as.matrix(dat), n = n, exper = exp_rep[, 1], replicate = exp_rep[, 2], type = "np",
  #             adpcr = adpcr)
  df2dpcr(dat)
}


#' Read QX100
#'
#' Reads digital PCR data from the QX100 Droplet Digital PCR System (Bio-Rad).
#'
#' @inheritParams read_dpcr
#' @author Michal Burdukiewcz, Stefan Roediger
#' @seealso See \code{\link{read_dpcr}} for detailed description of input files.
#'
#' Example of QX100 data: \code{\link{pds}}.
#' @return An object of \code{\linkS4class{adpcr}} class.
#' @note The volume and its uncertainty are taken from the literature (see
#' references).
#' @references
#' Corbisier, P. et al (2015). DNA copy number concentration measured by digital
#' and droplet digital quantitative PCR using certified reference materials.
#' Analytical and Bioanalytical Chemistry 407, 1831-1840.
#' @keywords utilities
#' @export

read_QX100 <- function(input, ext = NULL) {
  dat <- read_input(input, ext)

  n <- dat[["AcceptedDroplets"]]
  counts <- matrix(dat[["Positives"]], nrow = 1)
  well <- as.character(dat[["Well"]])
  exper <- dat[["Experiment"]]
  replicate <- paste0(well, ".", dat[["Sample"]])

  assay <- if (is.null(dat[["Assay"]])) {
    dat[["TargetType"]]
  } else {
    dat[["Assay"]]
  }

  # ids of panels
  row_id <- as.numeric(substr(well, nchar(well) - 1, nchar(well)))
  col_id <- substr(well, 0, 1)
  col_names <- 1L:8
  names(col_names) <- LETTERS[1L:8]

  create_adpcr(
    data = matrix(dat[["Positives"]], nrow = 1), n = n,
    exper = exper, replicate = replicate, type = "tnp",
    assay = assay, v = 0.834, uv = 0.017,
    col_names = names(col_names),
    row_names = as.character(1L:12),
    row_id = row_id,
    col_id = col_names[col_id],
    panel_id = as.factor(assay), threshold = 1
  )
}


#' Read QX200
#'
#' Reads digital PCR data from the QX200 Droplet Digital PCR System (Bio-Rad).
#'
#' @inheritParams read_dpcr
#' @author Michal Burdukiewcz, Stefan Roediger
#' @seealso See \code{\link{read_dpcr}} for detailed description of input files.
#'
#' @return An object of \code{\linkS4class{adpcr}} class.
#' @source Droplet Digital PCR Applications Guide, Rev. A, Bulletin 6407, Biorad,
#' accessed on 28.10.2016,
#' \url{http://www.bio-rad.com/webroot/web/pdf/lsr/literature/Bulletin_6407.pdf}.
#' @note The volume and its uncertainty are taken from the literature (see
#' references).
#' @references
#' Corbisier, P. et al (2015). DNA copy number concentration measured by digital
#' and droplet digital quantitative PCR using certified reference materials.
#' Analytical and Bioanalytical Chemistry 407, 1831-1840.
#' @keywords utilities
#' @export

read_QX200 <- function(input, ext = NULL) {
  dat <- read_input(input, ext)

  n <- dat[["AcceptedDroplets"]]
  counts <- matrix(dat[["Positives"]], nrow = 1)
  exper <- dat[["Experiment"]]
  replicate <- dat[["Sample"]]
  well <- as.character(dat[["Well"]])

  if (all(is.na(replicate))) {
    all_reps <- as.vector(table(dat[["TargetType"]], exper))
    replicate <- unlist(lapply(all_reps, function(single_rep) 1L:single_rep))
  }

  # ids of panels
  row_id <- as.numeric(substr(well, nchar(well) - 1, nchar(well)))
  col_id <- substr(well, 0, 1)
  col_names <- 1L:8
  names(col_names) <- LETTERS[1L:8]

  create_adpcr(
    data = matrix(dat[["Positives"]], nrow = 1), n = n,
    exper = exper, replicate = replicate, type = "tnp",
    assay = dat[["TargetType"]], v = 0.85, uv = 0.017,
    col_names = LETTERS[1L:8],
    row_names = as.character(1L:12),
    row_id = row_id,
    col_id = col_names[col_id],
    panel_id = as.factor(dat[["TargetType"]]), threshold = 1
  )
}

#' Read BioMark
#'
#' Reads digital PCR data from the BioMark (Fluidigm).
#'
#' @inheritParams read_dpcr
#' @param detailed logical, if \code{TRUE}, the input file is processed as if it was
#' 'Detailed Table Results'. In the other case, the expected input file structure is
#' 'Summary Table Results'.
#' @author Michal Burdukiewcz, Stefan Roediger
#' @return An object of \code{\linkS4class{adpcr}} class.
#' @seealso See \code{\link{read_dpcr}} for detailed description of input files.
#' @references
#' Dong, L. et al (2015). Comparison of four digital PCR platforms for accurate
#' quantification of DNA copy number of a certified plasmid DNA reference material.
#' Scientific Reports. 2015;5:13174.
#' @keywords utilities
#' @export

read_BioMark <- function(input, ext = NULL, detailed = FALSE) {
  if (detailed) {
    dat <- read_input(input, ext, skip = 11)

    exper <- rep(paste(
      as.character(sapply(0L:47, function(id_panel) {
        dat[770 * id_panel + 1, "Name"]
      })),
      as.character(sapply(0L:47, function(id_panel) {
        dat[770 * id_panel + 1, "Type"]
      })),
      sep = "_"
    ), 2)

    wells <- t(sapply(strsplit(levels(dat[["Chamber.ID"]]), "-", fixed = TRUE), function(i) {
      c(
        x = as.numeric(substr(i[[2]], 2, 3)),
        y = as.numeric(substr(i[[3]], 2, 3))
      )
    }))

    replicate <- c(as.character(sapply(0L:47, function(id_panel) {
      dat[770 * id_panel + 1, "Type.1"]
    })), as.character(sapply(0L:47, function(id_panel) {
      dat[770 * id_panel + 1, "Type.2"]
    })))

    replicate[replicate == "Test"] <- paste(replicate[replicate == "Test"],
      1L:length(replicate[replicate == "Test"]),
      sep = "_"
    )
    replicate[replicate == "Blank"] <- paste(replicate[replicate == "Blank"],
      1L:length(replicate[replicate == "Blank"]),
      sep = "_"
    )

    assay <- c(as.character(sapply(0L:47, function(id_panel) {
      dat[770 * id_panel + 1, "Name.1"]
    })), as.character(sapply(0L:47, function(id_panel) {
      dat[770 * id_panel + 1, "Name.2"]
    })))

    run_dat <- do.call(cbind, lapply(c("Target", "Target.1"), function(single_assay) {
      do.call(cbind, lapply(0L:47, function(id_panel) {
        do.call(rbind, lapply(1L:70, function(id_x) {
          # matrix(..., ncol = 1) instead of [,,drop = FALSE], because I use as.numeric
          matrix(as.numeric(dat[770 * id_panel + id_x + rev(0L:10 * 70), single_assay] == "Hit"), ncol = 1)
        }))
      }))
    }))

    create_adpcr(run_dat, 770L,
      exper = exper,
      replicate = replicate,
      col_names = as.character(1L:70),
      row_names = as.character(1L:11),
      type = "np",
      panel_id = as.factor(1L:96),
      row_id = wells[, "y"],
      col_id = wells[, "x"],
      v = 0.85, uv = 0.00595, threshold = 1
    )
  } else {
    dat <- data.frame(read_input(input, ext))

    data_range <- dat[-c(1L:9), ]

    # dat[apply(dat, 1, function(row) sum(is.na(row))) == 0, ]

    names1 <- as.vector(dat[8, ])
    names2 <- as.vector(dat[9, ])

    # exper
    exper <- rep(paste0(
      data_range[, which(names1 == "Sample Information" & names2 == "Name")], "_",
      data_range[, which(names1 == "Sample Information" & names2 == "Type")]
    ), 2)

    # replicate
    replicate <- paste0(
      unlist(lapply(c("VIC-TAMRA", "FAM-MGB"), function(channel_name) {
        data_range[, names1 == channel_name & names2 == "Type"]
      })),
      rep(data_range[, names1 == "Panel" & names2 == "ID"], 2)
    )

    # dat[data_range, names1 == "Sample Information" & names2 == "rConc."]

    # assay
    assay <- unlist(lapply(c("VIC-TAMRA", "FAM-MGB"), function(channel_name) {
      data_range[, names1 == channel_name & names2 == "Name"]
    }))

    # data
    count_data <- unlist(lapply(c("VIC-TAMRA", "FAM-MGB"), function(channel_name) {
      as.numeric(data_range[, names1 == channel_name & names2 == "Count"])
    }))

    res <- create_adpcr(
      data = matrix(count_data, nrow = 1), n = rep(765, length(count_data)),
      exper = exper, replicate = replicate, type = "tnp",
      assay = assay, row_names = as.character(1L:4),
      col_names = as.character(1L:12),
      panel_id = factor(c(rep(1, length(exper) / 2), rep(2, length(exper) / 2))),
      threshold = 1, v = 0.85, uv = 0.00595
    )

    names_df <- data.frame(table(slot(res, "panel_id"), slot(res, "assay")))
    levels(slot(res, "panel_id")) <- as.character(sapply(
      levels(names_df[["Var1"]]),
      function(single_name) {
        sub_data <- names_df[names_df[["Var1"]] == single_name, ]
        sub_data[which.max(sub_data[["Freq"]]), "Var2"]
      }
    ))
    res
  }
}


# checks the extension and returns proper read function
read_input <- function(input, ext = NULL, skip = 0) {
  if (is.character(input)) {
    if (is.null(ext)) {
      ext <- strsplit(input, ".", fixed = TRUE)[[1]]
    }

    # maybe add multisheet excel

    fun <- switch(ext[[length(ext)]],
      csv = read.csv,
      xls = read_excel,
      xlsx = read_excel,
      zip = read_zipped_amps
    )

    raw_read <- fun(input, skip = skip)

    # read_excel sometimes reads empty rows, workaround
    if (nrow(raw_read) == 65535 | all(unlist(apply(tail(raw_read, 1), 1, is.na)))) {
      nas <- apply(raw_read[1L:min(c(100000, nrow(raw_read))), ], 1, function(i) sum(is.na(i)))
      raw_read[1L:(which.min(nas != ncol(raw_read)) - 1), ]
    } else {
      raw_read
    }
  } else {
    input
  }
}