Nothing
#'
#' Locate Variable Lists and Related Information in a JCAMP-DX File
#'
#' This function is NOT EXPORTED.
#' Users would not normally call this function. See \code{\link{readJDX}}.
#' Documentation is provided for developers wishing to contribute to the package.
#'
#' @param jdx Character. A vector of character strings which hopefully contains
#' one or more variable lists. Each string is a line of the complete original file.
#'
#' @param debug Integer. See \code{\link{readJDX}} for details.
#'
#' @return A list.
#' \itemize{
#' \item A data frame giving information about the structure of the file. Columns will be
#' Format, FirstLine, LastLine. Serves as a Data Guide.
#' \item The metadata
#' \item The line numbers of comments (excluding comments in the metadata).
#' \item Each variable list that was found, with the format pre-pended.
#' }
#'
#' @importFrom stats na.omit
#'
#' @noRd
#'
findVariableLists <- function(jdx, debug = 0) {
# A data set is defined by a variable list.
# The following is structured to make it easy to add other options.
# Add other variable list "fmt" short names here
# These are used to tweak the lines selected
VL_fmts <- c(
"XYY", # IR, UV, Vis, Raman, maybe others
"XRR", # real NMR
"XII", # imaginary NMR
"NMR_2D", # real 2D NMR in NTUPLE format
"LC_MS", # LC or GC-MS data in NTUPLE format
"XYXY", # PEAK TABLE, equal to the next one in practice
"XYXY" # XYXY
)
nf <- length(VL_fmts)
# Add other variable list START patterns here (each associated with a specific VL_fmts entry)
# Must search for things that are sufficiently unique
ST_pats <- c(
"^\\s*##XYDATA\\s*=\\s*\\(X\\+\\+\\(Y\\.\\.Y\\)\\)$",
"^\\s*##PAGE\\s*=\\s*N=1",
"^\\s*##PAGE\\s*=\\s*N=2",
"^\\s*##PAGE\\s*=\\s*F1=",
"^\\s*##PAGE\\s*=\\s*T=",
"^\\s*##PEAK TABLE\\s*=\\s*\\(XY\\.\\.XY\\)",
"^\\s*##XYPOINTS\\s*=\\s*\\(XY\\.\\.XY\\)"
)
# Add other END patterns here (each associated with a specific VL_fmts entry)
END_pats <- c(
"^\\s*##END\\s*=",
"^\\s*##PAGE\\s*=\\s*N=2",
"^\\s*##END\\s{1}NTUPLES\\s*=",
"^\\s*##PAGE\\s*=\\s*F1=", # In NTUPLES there are several/many of these
"^\\s*##PAGE\\s*=\\s*T=", # In NTUPLES there are several/many of these
"^\\s*##END\\s*=",
"^\\s*##END\\s*="
)
# Developer sanity checks:
if (length(ST_pats) != nf) stop("Wrong number of ST_pats")
if (length(END_pats) != nf) stop("Wrong number of END_pats")
# Find the beginning & end of each variable list.
# We are checking for any and all formats in the file
# We will capture some meta-information for completeness,
# and drop it in a later step.
spec_st <- NA_integer_
spec_end <- NA_integer_
fmt <- NA_character_
for (i in 1:nf) {
gST <- grep(ST_pats[i], jdx)
if (length(gST) == 0L) next
gEND <- grep(END_pats[i], jdx)
if (length(gEND) == 0L) next
# the above ensures that if we don't find both strings we don't proceed
spec_st <- c(spec_st, gST)
spec_end <- c(spec_end, gEND)
fmt <- c(fmt, rep(VL_fmts[i], length(gST)))
}
spec_st <- spec_st[-1] # remove NAs
spec_end <- spec_end[-1]
fmt <- fmt[-1]
# Run some checks...
if (length(spec_st) == 0L) { # Check to see if we actually found any variable lists
fmts <- paste(VL_fmts, collapse = ", ")
msg <- paste("Couldn't find any variable lists. Supported formats are:", fmts, sep = " ")
stop(msg)
}
if (length(spec_end) == 0L) stop("Found the start of a variable list, but not the end")
# We do not check for if (length(spec_st) > 1L) or if (length(spec_end) > 1L) because
# NTUPLE formats have repeated VL for each F1 or time point
# Organize the return values
metadata <- jdx[1:(spec_st[1] - 1)]
Format <- c("metadata", fmt)
FirstLine <- c(1, spec_st)
LastLine <- c(spec_st[1] - 1, spec_end)
DF <- data.frame(Format, FirstLine, LastLine, stringsAsFactors = FALSE)
# Find all comment-only lines exclusive of metadata; these cause a variety of problems.
# Keep original line numbers. CURRENTLY NOT USED OTHER THAN THIS FUNCTION, but is returned.
# There is a multi-line comment label "##=" but we do not search for this (not sure I've ever seen it).
comOnly <- grep("^\\$\\$", jdx)
comOnly <- setdiff(comOnly, 1:(spec_st[1] - 1))
# Check to see if this is 2D NMR data, if so find the vendor, as adjustments will be necessary
# Alternative way to check for 2D NMR data: nD <- grepl("^\\s*##NTUPLES=\\s*nD", jdx)
vendor <- NULL
if (any(Format == "NMR_2D")) {
if (any(grepl("JEOL NMR", jdx))) vendor <- "JEOL"
if (any(grepl("Bruker BioSpin GmbH", jdx))) vendor <- "Bruker"
if (is.null(vendor)) warning("Looks like 2D NMR but could not identify vendor, continuing")
}
# Check to see if this is LC-MS or GC-MS data in NTUPLE format
ms <- FALSE
if (any(Format == "LC_MS")) {
if (any(grepl("##NTUPLES=\\s*MASS\\s{1}SPECTRUM", jdx))) ms <- TRUE
if (!ms) warning("This looks like LC-MS or GC-MS data, but it is not declared as such, continuing")
}
# Up to this point, processing has been generic & spec_st, spec_end reflect grep'ing of patterns.
# Now we need to tweak things depending upon the format & vendor, to narrow the actual variable list
# as much as possible.
for (i in 1:nrow(DF)) {
if (DF$Format[i] == "XRR") {
DF$LastLine[i] <- DF$LastLine[i] - 1 # removes the ##PAGE= N=2 line
}
if ( (DF$Format[i] == "NMR_2D") | (DF$Format[i] == "LC_MS") ) {
# Remove the ##PAGE= N=2 line (NMR) or ##PAGE = T= line (MS)
if (i != nrow(DF)) DF$LastLine[i] <- DF$FirstLine[i + 1] - 1
# Next line removes
# ##END NTUPLES=
# ##END=
# from the end of the file
if (i == nrow(DF)) DF$LastLine[i] <- length(jdx) - 2
}
# Check to see if the apparent last row(s) of a variable list is actually a comment,
# and adjust accordingly. Occurs for example in BRUKERNTUP.DX
while (DF$LastLine[i] %in% comOnly) DF$LastLine[i] <- DF$LastLine[i] - 1
}
if (debug == 2) {
cat("\nApparent data chunks:\n")
print(DF)
}
VL <- vector("list", nrow(DF) + 2)
VL[[1]] <- DF
VL[[2]] <- metadata
VL[[3]] <- comOnly
for (i in 4:length(VL)) {
st_line <- DF$FirstLine[i - 2]
end_line <- DF$LastLine[i - 2]
keep_lines <- st_line:end_line
# first entry is fmt (for later dispatch to the right processing)
VL[[i]] <- c(DF$Format[i - 2], jdx[keep_lines])
name_vec <- c("fmt", paste("Line", keep_lines, sep = "_"))
names(VL[[i]]) <-name_vec # name it for debugging purposes downstream
}
# The generic VL_X names are replaced when these results are passed back to readJDX
names(VL) <- c("DataGuide", "Metadata", "Comments", paste("VL", 1:(length(VL) - 3), sep = "_"))
return(VL)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.