Nothing
#' Full pattern summation using linear regression
#'
#' \code{fps_lm} returns a simple fit of a given pattern using linear regression,
#' where coefficients may be either positive or negative. Does not return quantitative
#' data. For quantitative results use \code{fps} or \code{afps}.
#'
#' Requires a \code{powdRlib} library of reference patterns. Mineral concentrations
#' are not quantified and therefore reference intensity ratios are not required.
#'
#' @param lib A \code{powdRlib} object representing the reference library. Created using the
#' \code{powdRlib} constructor function.
#' @param smpl A data frame. First column is 2theta, second column is counts
#' @param harmonise logical parameter defining whether to harmonise the \code{lib} and \code{smpl}.
#' Default = \code{TRUE}. When \code{TRUE} the function harmonises the \code{lib} and \code{smpl}
#' data to the intersecting 2theta range at the coarsest resolution available using natural splines.
#' @param refs A character string of reference pattern IDs or names from the specified library.
#' The IDs or names supplied must be present within the \code{lib$phases$phase_id} or
#' \code{lib$phases$phase_name} columns. If missing from the function call then all phases in
#' the reference library will be used.
#' @param std The phase ID (e.g. "QUA.1") to be used as internal
#' standard. Must match an ID provided in the \code{refs} parameter.
#' @param tth_align A vector defining the minimum and maximum 2theta values to be used during
#' alignment (e.g. \code{c(5,65)}). If not defined, then the full range is used.
#' @param align The maximum shift that is allowed during initial 2theta
#' alignment (degrees). Default = 0.1.
#' @param manual_align A logical operator denoting whether to optimise the alignment within the
#' negative/position 2theta range defined in the \code{align} argument, or to use the specified
#' value of the \code{align} argument for alignment of the sample to the standards. Default
#' = \code{FALSE}, i.e. alignment is optimised.
#' @param tth_fps A vector defining the minimum and maximum 2theta values to be used during
#' full pattern summation (e.g. \code{c(5,65)}). If not defined, then the full range is used.
#' @param shift A single numeric value denoting the maximum (positive or negative) shift,
#' in degrees 2theta, that is allowed during the shifting of selected phases. Default = 0.
#' @param p a numeric parameter between 0 and 1 specifying the p-value limit for coefficients.
#' Any reference patterns with a p-value greater than this value will be omitted from the
#' linear regression and results recomputed. Must be greater than 0.000001 but no greater than 1.
#' @param ... Other arguments
#'
#' @return a powdRlm object with components:
#' \item{tth}{a vector of the 2theta scale of the fitted data}
#' \item{fitted}{a vector of the fitted XRPD pattern}
#' \item{measured}{a vector of the original XRPD measurement (aligned)}
#' \item{residuals}{a vector of the residuals (fitted vs measured)}
#' \item{phases}{a dataframe of the phases used to produce the fitted pattern}
#' \item{phases_grouped}{the phases dataframe grouped by phase_name and summed}
#' \item{weighted_pure_patterns}{a dataframe of reference patterns used to produce the fitted pattern.
#' All patterns have been weighted according to the coefficients used in the fit}
#' \item{coefficients}{a named vector of coefficients used to produce the fitted pattern}
#' \item{inputs}{a list of input arguments used in the function call}
#'
#' @examples
#' data(rockjock)
#' data(rockjock_mixtures)
#'
#' #Compute the PCA and loadings
#' x1 <- xrpd_pca(rockjock_mixtures,
#' mean_center = TRUE,
#' bin_size = 1,
#' root_transform = 1)
#'
#' \dontrun{
#' fps_lm_out <- fps_lm(rockjock,
#' smpl = data.frame("x" = x1$loadings$tth,
#' "y" = x1$loadings$Dim.1),
#' refs = rockjock$phases$phase_id,
#' std = "QUARTZ",
#' align = 0.3,
#' p = 0.01)
#'
#' plot(fps_lm_out,
#' wavelength = "Cu",
#' interactive = TRUE,
#' group = TRUE)
#'
#' }
#'
#' @export
fps_lm <- function(lib, smpl, harmonise, refs, std,
tth_align, align, manual_align,
tth_fps, shift, p, ...) {
UseMethod("fps_lm")
}
#' Full pattern summation using linear regression
#'
#' \code{fps_lm.powdRlib} returns a simple fit of a given pattern using linear regression,
#' where coefficients may be either positive or negative. Does not return quantitative
#' data. For quantitative results use \code{fps} or \code{afps}.
#'
#' Requires a \code{powdRlib} library of reference patterns. Mineral concentrations
#' are not quantified and therefore reference intensity ratios are not required.
#'
#' @param lib A \code{powdRlib} object representing the reference library. Created using the
#' \code{powdRlib} constructor function.
#' @param smpl A data frame. First column is 2theta, second column is counts
#' @param harmonise logical parameter defining whether to harmonise the \code{lib} and \code{smpl}.
#' Default = \code{TRUE}. When \code{TRUE} the function harmonises the \code{lib} and \code{smpl}
#' data to the intersecting 2theta range at the coarsest resolution available using natural splines.
#' @param refs A character string of reference pattern IDs or names from the specified library.
#' The IDs or names supplied must be present within the \code{lib$phases$phase_id} or
#' \code{lib$phases$phase_name} columns. If missing from the function call then all phases in
#' the reference library will be used.
#' @param std The phase ID (e.g. "QUA.1") to be used as internal
#' standard. Must match an ID provided in the \code{refs} parameter.
#' @param tth_align A vector defining the minimum and maximum 2theta values to be used during
#' alignment (e.g. \code{c(5,65)}). If not defined, then the full range is used.
#' @param align The maximum shift that is allowed during initial 2theta
#' alignment (degrees). Default = 0.1.
#' @param manual_align A logical operator denoting whether to optimise the alignment within the
#' negative/position 2theta range defined in the \code{align} argument, or to use the specified
#' value of the \code{align} argument for alignment of the sample to the standards. Default
#' = \code{FALSE}, i.e. alignment is optimised.
#' @param tth_fps A vector defining the minimum and maximum 2theta values to be used during
#' full pattern summation (e.g. \code{c(5,65)}). If not defined, then the full range is used.
#' @param shift A single numeric value denoting the maximum (positive or negative) shift,
#' in degrees 2theta, that is allowed during the shifting of selected phases. Default = 0.
#' @param p a numeric parameter between 0 and 1 specifying the p-value limit for coefficients.
#' Any reference patterns with a p-value greater than this value will be omitted from the
#' linear regression and results recomputed. Must be greater than 0.000001 but no greater than 1.
#' @param ... Other arguments
#'
#' @return a powdRlm object with components:
#' \item{tth}{a vector of the 2theta scale of the fitted data}
#' \item{fitted}{a vector of the count intensities of the fitted XRPD pattern}
#' \item{measured}{a vector of the original count intensities of the XRPD measurement (aligned)}
#' \item{residuals}{a vector of the residuals (fitted vs measured)}
#' \item{phases}{a dataframe of the phases used to produce the fitted pattern and their concentrations}
#' \item{phases_grouped}{the phases dataframe grouped by phase_name and concentrations summed}
#' \item{weighted_pure_patterns}{a dataframe of reference patterns used to produce the fitted pattern.
#' All patterns have been weighted according to the coefficients used in the fit}
#' \item{coefficients}{a named vector of coefficients used to produce the fitted pattern}
#' \item{inputs}{a list of input arguments used in the function call}
#'
#' @examples
#' data(rockjock)
#' data(rockjock_mixtures)
#'
#' #Compute the PCA and loadings
#' x1 <- xrpd_pca(rockjock_mixtures,
#' mean_center = TRUE,
#' bin_size = 1,
#' root_transform = 1)
#'
#' \dontrun{
#' fps_lm_out <- fps_lm(rockjock,
#' smpl = data.frame("x" = x1$loadings$tth,
#' "y" = x1$loadings$Dim.1),
#' refs = rockjock$phases$phase_id,
#' std = "QUARTZ",
#' align = 0.3,
#' p = 0.01)
#'
#' plot(fps_lm_out,
#' wavelength = "Cu",
#' interactive = TRUE,
#' group = TRUE)
#'
#' }
#'
#' @export
fps_lm.powdRlib <- function(lib, smpl, harmonise, refs, std,
tth_align, align, manual_align,
tth_fps, shift, p, ...) {
#---------------------------------------------------
#Conditions
#---------------------------------------------------
if (missing(p)) {
p <- 1
}
if (p > 1 | p < 0.000001) {
stop("\n-The value specified in the p argument but be greater than 0.000001 and less
than or equal to 1.")
}
#Make sure the reference library is formatted correctly:
if (!identical(names(lib$xrd), lib$phases$phase_id)) {
stop("The names of the lib$xrd do not match the phase IDs in lib$phases$phase_id")
}
#Make sure the reference library is formatted correctly:
if (!length(names(lib$xrd)) == length(unique(names(lib$xrd)))) {
stop("The reference library contains duplicate phase IDs. Make sure that they
are all unique.")
}
#Set harmonise = TRUE as the default if missing
if (missing(harmonise)) {
harmonise <- TRUE
}
#Make sure harmonise is logical
if (!is.logical(harmonise)) {
stop("The harmonise argument must be logical.",
call. = FALSE)
}
#Make sure that the user knows to use harmonise if needed
if (harmonise == FALSE & !identical(lib$tth, smpl[[1]])) {
stop("The 2theta scale of the library and sample do not match. Try
setting the harmonise argument to TRUE.",
call. = FALSE)
}
#If tth_align is missing then use the maximum tth range
if(missing(tth_align)) {
tth_align <- c(min(smpl[[1]]), max(smpl[[1]]))
}
#If align is missing then set it to default
if(missing(align)) {
align = 0.1
}
#Set the default alignment type to automated
if(missing(manual_align)) {
manual_align <- FALSE
}
#Make sure manual_align is logical
if(!is.logical(manual_align)) {
stop("The manual_align argument must be logical",
call. = FALSE)
}
#If shift is missing then set it to default
if(missing(shift)) {
shift = 0
}
#If refs are not defined then use all of them
if(missing(refs)) {
cat("\n-Using all reference patterns in the library")
refs = lib$phases$phase_id
}
#Create a warning message if the shift is greater than 0.5, since this can confuse the optimisation
if (abs(align) > 0.5 & manual_align == FALSE) {
warning("Be cautious of large 2theta shifts. These can cause issues in sample alignment.",
call. = FALSE)
}
#Check that none of the refs are spelt wrong
wrong_spellings <- which(!refs %in% lib$phases$phase_id & !refs %in% lib$phases$phase_name)
if (length(wrong_spellings) > 0) {
stop(paste(c("\nThe following reference patterns specified in the refs argument are not in the library:\n",
paste(c(refs[wrong_spellings]), collapse = ", "))),
call. = FALSE)
}
#-------------------------------------------------------------
#END OF CONDITIONS, NOW SUBSET LIBRARY
#-------------------------------------------------------------
#subset lib according to the refs and force vector
lib <- subset(lib, refs = refs, mode = "keep")
sd0 <- which(unlist(lapply(lib$xrd, stats::sd)) == 0)
if (length(sd0) > 0) {
cat("\n-Removing", length(sd0), "phases from the library and refs argument
that have a standard deviation of zero")
lib <- subset(lib, refs = names(sd0), mode = "remove")
#remove those reference patterns from the refs string i they're in there
if (length(which(refs %in% names(sd0))) > 0) {
refs <- refs[-which(refs %in% names(sd0))]
}
}
#Make sure that the phase identified as the internal standard is contained within the reference library
if (!std == "none" & !std %in% lib$phases$phase_id) {
stop("The phase you have specified as the internal standard is not in the subset reference library",
call. = FALSE)
}
#Harmonise libraries
if (harmonise == TRUE & !identical(lib$tth, smpl[[1]])) {
harmonised <- .harmoniser(lib = lib, smpl = smpl)
smpl <- harmonised$smpl
lib <- harmonised$lib
}
#--------------------------------------------------------
#Alignment
#--------------------------------------------------------
if (!align == 0) {
#align the data
cat("\n-Aligning sample to the internal standard")
smpl <- .xrd_align(smpl = smpl,
standard = data.frame(tth = lib$tth,
counts = lib$xrd[, which(lib$phases$phase_id == std)],
check.names = FALSE),
xmin = tth_align[1],
xmax = tth_align[2], xshift = align,
manual = manual_align)
#If the alignment is close to the limit, provide a warning
if (sqrt(smpl[[1]]^2) > (align*0.95) & manual_align == FALSE) {
warning("The optimised shift used in alignment is equal to the maximum shift defined
in the function call. We advise visual inspection of this alignment.",
call. = FALSE)
}
#smpl becomes a data frame
smpl <- smpl[[2]]
#Extract the aligned sample
smpl <- smpl[which(smpl[[1]] >= min(lib$tth) & smpl[[1]] <= max(lib$tth)), ]
#Define a 2TH scale to harmonise all data to
smpl_tth <- smpl[[1]]
} else {
names(smpl) <- c("tth", "counts")
smpl_tth <- smpl[[1]]
}
#If tth_fps isn't defined, then define it here
if(missing(tth_fps)) {
tth_fps <- c(min(smpl_tth), max(smpl_tth))
}
if (align > 0) {
#Ensure that samples in the reference library are on the same scale as the sample
cat("\n-Interpolating library to same 2theta scale as aligned sample")
lib$xrd <- data.frame(lapply(lib$xrd,
function(n) stats::spline(x = lib$tth,
y = n,
method = "natural",
xout = smpl_tth)[[2]]),
check.names = FALSE)
}
#Replace the library tth with that of the sample
lib$tth <- smpl_tth
#### decrease 2TH scale to the range defined in the function call
#smpl <- smpl[which(smpl$tth >= tth_fps[1] & smpl$tth <= tth_fps[2]), ]
#Subset the xrd dataframe too
#lib$xrd <- lib$xrd[which(lib$tth >= tth_fps[1] & lib$tth <= tth_fps[2]), , drop = FALSE]
#Replace the tth in the library with the shortened one
#lib$tth <- smpl[, 1]
#-----------------------------------------------------------
#Initial linear regression
#-----------------------------------------------------------
cat("\n-Applying linear regression")
#Get the index of the values within tth_fps
tth_index <- which(lib$tth > tth_fps[1] & lib$tth < tth_fps[2])
lm_out <- stats::lm(x~., data= data.frame("x" = smpl[tth_index, 2],
lib$xrd[tth_index, ],
check.names = FALSE))
x <- lm_out$coefficients[-1]
#Extract the p-values
x_p <- summary(lm_out)$coefficients[-1, 4]
names(x_p) <- names(x)
intercept <- lm_out$coefficients[1]
#if(!identical(names(x), names(lib$xrd))) {
# stop("The names of the coefficients do not match those in the library")
#}
#if(!identical(names(x), names(x_p))) {
# stop("The names of the coefficients do not match the names of the p-values")
#}
while (length(which(x_p > p)) > 0) {
#Remove the p-values > p
remove_p <- which(x_p > p)
cat("\n-Removing coefficients with p-value greater than", p)
x <- x[-remove_p]
lib$xrd <- lib$xrd[-remove_p]
x_p <- x_p[-remove_p]
#Recompute linear regression
lm_out <- stats::lm(x~., data= data.frame("x" = smpl[tth_index, 2],
lib$xrd[tth_index, ],
check.names = FALSE))
x <- lm_out$coefficients[-1]
#Extract the p-values
x_p <- summary(lm_out)$coefficients[-1, 4]
#Extract the intercept
intercept <- lm_out$coefficients[1]
}
#----------------------------------------------------
# Shifting
#----------------------------------------------------
#Shift and then another optimisation ONLY if the shift parameter
#is greater than zero and the correct solver arguments are used
if(shift > 0 & length(x) > 1) {
#This will replace the grid search shifting
cat("\n-Optimising shifting coefficients...")
x_s <- rep(0, length(x))
names(x_s) <- names(x)
o <- stats::optim(par = x_s, .fullpat_shift_seq,
weightings = x,
method = "BFGS", lib = lib,
smpl = smpl, obj = "R",
tth_fps = tth_fps)
x_s <- o$par
#Make sure any large shifts are avoided
if (length(which(x_s > shift | x_s < -shift)) > 0) {
x_s[which(x_s > shift | x_s < -shift)] <- 0
}
#Extract the shifted data
cat("\n-Harmonising library and sample to same 2theta axis")
shifted <- .fullpat_shift(smpl = smpl, lib = lib,
par_shift = x_s)
lib <- shifted$lib
smpl <- shifted$smpl
#-----------------------------------------------------------
#Recalculate linear regression
#-----------------------------------------------------------
cat("\n-Re-applying linear regression after shifting")
lm_out <- stats::lm(x~., data= data.frame("x" = smpl[tth_index, 2],
lib$xrd[tth_index, ],
check.names = FALSE))
x <- lm_out$coefficients[-1]
#Extract the p-values
x_p <- summary(lm_out)$coefficients[-1, 4]
names(x_p) <- names(x)
intercept <- lm_out$coefficients[1]
#if(!identical(names(x), names(lib$xrd))) {
# stop("The names of the coefficients do not match those in the library")
#}
#if(!identical(names(x), names(x_p))) {
# stop("The names of the coefficients do not match the names of the p-values")
#}
while (length(which(x_p > p)) > 0) {
#Remove the p-values > p
remove_p <- which(x_p > p)
cat("\n-Removing coefficients with p-value greater than", p)
x <- x[-remove_p]
lib$xrd <- lib$xrd[-remove_p]
x_p <- x_p[-remove_p]
#Recompute linear regression
lm_out <- stats::lm(x~., data= data.frame("x" = smpl[tth_index, 2],
lib$xrd[tth_index, ],
check.names = FALSE))
x <- lm_out$coefficients[-1]
#Extract the p-values
x_p <- summary(lm_out)$coefficients[-1, 4]
#extract the intercept
intercept <- lm_out$coefficients[1]
}
}
#-----------------------------
#Summarise the data
#-----------------------------
df <- data.frame("phase_id" = names(x),
"coefficient" = unname(x),
"p" = unname(x_p))
df <- plyr::join(df,
lib$phases[c(1:2)],
by = "phase_id")
df <- df[c("phase_id", "phase_name", "coefficient", "p")]
dfs <- data.frame(stats::aggregate(coefficient ~ phase_name, data = df, FUN = sum),
stringsAsFactors = FALSE)
#--------------------------------------------------------
#Compute fitted pattern and quantify
#--------------------------------------------------------
#compute fitted pattern and residuals
fitted_pattern <- apply(sweep(as.matrix(lib$xrd), 2, x, "*"), 1, sum) + intercept
resid_x <- smpl[, 2] - fitted_pattern
#Extract the xrd data
xrd <- data.frame(lib$xrd,
check.names = FALSE)
#Scale them by the optimised weightings
xrd <- sweep(xrd, 2, x, "*")
#If only 1 pattern is used in the fit, then rename it
if (ncol(xrd) == 1) {
names(xrd) <- df$phase_id[1]
}
#Create a list of the input arguments
inputs <- list("harmonise" = harmonise,
"refs" = refs,
"std" = std,
"force" = force,
"tth_align" = tth_align,
"align" = align,
"manual_align" = manual_align,
"tth_fps" = tth_fps,
"shift" = shift)
#Define a list that becomes the function output
out <- list("tth" = smpl[,1],
"fitted" = unname(fitted_pattern),
"measured" = smpl[,2],
"residuals" = unname(resid_x),
"phases" = df,
"phases_grouped" = dfs,
"weighted_pure_patterns" = xrd,
"coefficients" = x,
"inputs" = inputs)
#Define the class
class(out) <- "powdRlm"
cat("\n***Full pattern summation complete***\n")
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.