## ddpcr - R package for analysis of droplet digital PCR data
## Copyright (C) 2015 Dean Attali
# This file contains functions related to reading in data. None of them are exported
# as these functions should only be used by the original package.
# Read a plate data from a directory or list of files
read_plate <- function(plate, dir, data_files, meta_file) {
if (!missing(dir)) {
read_dir(plate, dir)
} else if (!missing(data_files)) {
read_files(plate, data_files, meta_file)
} else {
err_msg("either `dir` or `data_files` must be specified")
}
}
# Read a plate data from a given directory
read_dir <- function(plate, dir) {
stopifnot(plate %>% inherits("ddpcr_plate"))
if (!is_dir(dir)) {
err_msg(sprintf("could not find directory `%s`", dir))
}
# find the data files in the directory and use them to read the plate
data_files <- find_data_files(dir)
if (length(data_files) == 0) {
err_msg("Could not find any valid data files (ddpcr expects the \"_Amplitude\" files exported by QuantaSoft)")
}
name <- suppressWarnings(get_consensus_name_from_data_files(data_files))
meta_file <- find_meta_file(dir, name)
read_files(plate, data_files, meta_file)
}
# Read a plate data from a given list of files
read_files <- function(plate, data_files, meta_file) {
stopifnot(plate %>% inherits("ddpcr_plate"))
# make sure all given data files are valid data files
if (missing(data_files) || length(data_files) == 0) {
err_msg("no data files provided")
}
all_files <-
vapply(data_files, is_file, FUN.VALUE = logical(1), USE.NAMES = FALSE) %>%
all
if (!all_files) {
err_msg("could not find all data files")
}
if (!all(grepl(DATA_FILE_REGEX, data_files))) {
err_msg(paste("not all data files provided are valid data files",
"(ddpcr expects only the \"_Amplitude\" files exported by QuantaSoft)"))
}
# make sure metadata file is a valid path
if (missing(meta_file)) {
meta_file <- NULL
warn_msg("no metadata file provided")
} else if (!is.null(meta_file) && !is_file(meta_file)) {
err_msg("could not find metadata file")
}
step_begin("Reading data files into plate")
# read the droplets data
# I purposely keep the wells as character rather than factor because
# the data.frame is large and it's much faster to search through it using
# dplyr::filter when using character
tryCatch({
plate_data <-
lapply(data_files, function(x) {
wellNum <- get_well_from_data_file(x)
wdat <- readr_read_csv(x, progress = FALSE, col_types = readr::cols())
wdat <- dplyr::select(wdat, 2:1)
wdat[['well']] <- wellNum
wdat
}) %>%
dplyr::bind_rows() %>%
move_front("well") %>%
dplyr::arrange(.data[["well"]])
},
error = function(err) {
err_msg("there was a problem reading one or more of the data files")
})
plate_data(plate) <- plate_data
# Read the metadata file if one was given
if (!is.null(meta_file)) {
tryCatch({
# read meta header
cn <-
utils::read.csv(meta_file, stringsAsFactors = FALSE,
nrows = 1, header = FALSE) %>%
unlist() %>%
unname() %>%
tolower()
# read first 5 lines to determine column number of meta data
plate_meta_tmp <- utils::read.csv(meta_file,
stringsAsFactors = FALSE,
skip = 1,
nrows = 5,
header = FALSE)
# meta files from version 1.7.4 end with ','
# blank column names are ignored
# add 'dummy' as additional column name, if header is missing one column
if (length(cn) == (ncol(plate_meta_tmp) - 1)){
cn <- c(cn, 'dummy')
}
# read meta data with adjusted column names
plate_meta <- utils::read.csv(meta_file,
stringsAsFactors = FALSE,
skip = 1,
header = FALSE,
col.names = cn)
},
error = function(err) {
err_msg("there was a problem with the metadata file")
})
plate_meta(plate) <- plate_meta
}
# set the plate's name based on the file paths
name(plate) <- get_consensus_name_from_data_files(data_files)
step_end()
plate
}
# -------- helper functions
# regex for droplet data file
DATA_FILE_REGEX <- "^(.*)_([A-H][0-1][0-9])_Amplitude.csv$"
DATA_FILE_REGEX_NAME <- "\\1"
DATA_FILE_REGEX_WELL <- "\\2"
# extract the name of a file from a filename
get_name_from_data_file <- function(data_file) {
gsub(DATA_FILE_REGEX, DATA_FILE_REGEX_NAME, data_file)
}
# extract the well ID from a filename
get_well_from_data_file <- function(data_file) {
gsub(DATA_FILE_REGEX, DATA_FILE_REGEX_WELL, data_file)
}
# extract the most common filename from a list of files
get_consensus_name_from_data_files <- function(data_files) {
data_files_names <- get_name_from_data_file(data_files)
if (data_files_names %>% unique %>% length > 1) {
warn_msg("not all data files have the same name")
}
# in case there are multiple file names, use the most common one
name <-
table(data_files_names) %>%
sort(decreasing = TRUE) %>%
names %>%
.[1] %>%
basename
name
}
# find the data files in a directory
find_data_files <- function(dir) {
list.files(dir, pattern = DATA_FILE_REGEX, full.names = TRUE)
}
# find the metadata file in a directory, or return NULL if it doesn't exist
find_meta_file <- function(dir, name) {
meta_file <- file.path(dir, sprintf("%s.csv", name))
if (is_file(meta_file)) {
return(meta_file)
} else {
warn_msg(sprintf("could not find metadata file; looked for `%s`", meta_file))
return(NULL)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.