Nothing
# ID RnmrTools.R
# Copyright (C) 2017-2022 INRAE
# Authors: D. Jacob
#
lbALIGN <- 'align'
lbGBASELINE <- 'gbaseline'
lbBASELINE <- 'baseline'
lbQNMRBL <- 'qnmrbline'
lbAIRPLS <- 'airpls'
lbBIN <- 'binning'
lbCALIB <- 'calibration'
lbNORM <- 'normalisation'
lbFILTER <- 'denoising'
lbWARP <- 'warp'
lbSHIFT <- 'shift'
lbCLUPA <- 'clupa'
lbBUCKET <- 'bucket'
lbZERO <- 'zero'
EOL <- 'EOL'
# returns string w/o leading or trailing whitespace
.N <- function(x) { as.numeric(as.vector(x)) }
.C <- function(x) { as.vector(x) }
Write.LOG <- function(logfile=stdout(), ...) {
cat(sprintf(...), sep='', file=logfile, append=TRUE)
}
fitdistr <- function(...) {
MASS::fitdistr(...)
}
#------------------------------
# airPLS
#------------------------------
.WhittakerSmooth <- function(x,w,lambda,differences=1)
{
x=matrix(x,nrow = 1, ncol=length(x))
L=length(x)
E=Matrix::spMatrix(L,L,i=seq(1,L),j=seq(1,L),rep(1,L))
D=methods::as(diff(E,1,differences),"dgCMatrix")
W=methods::as(Matrix::spMatrix(L,L,i=seq(1,L),j=seq(1,L),w),"dgCMatrix")
background=solve((W+lambda*t(D)%*%D),t((w*x)));
return(as.vector(background))
}
.airPLS <- function(x, lambda=100, porder=1, itermax=8)
{
x = as.vector(x)
m = length(x)
w = rep(1,m)
i = 1
repeat {
z = .WhittakerSmooth(x,w,lambda,porder)
d = x-z
sum_smaller = abs(sum(d[d<0]))
if(sum_smaller<0.001*sum(abs(x))||i==itermax) break
w[d>=0] = 0
w[d<0] = exp(i*abs(d[d<0])/sum_smaller)
w[1] = exp(i*max(d[d<0])/sum_smaller)
w[m] = exp(i*max(d[d<0])/sum_smaller)
i=i+1
}
return(z)
}
#------------------------------
# Peak detection for spectra
#------------------------------
# Input parameters
# - X: spectral dataset in matrix format in which each row contains a single sample
# - nDivRange: size of a single small segment after division of spectra, Default value: 64
# - baselineThresh: removal of all the peaks with intensity lower than this threshold, Default value: 50000
# Output parameters
# - peak lists of the spectra
.detectSpecPeaks <- function (X, nDivRange, scales=seq(1, 16, 2), baselineThresh, SNR.Th=-1)
{
nFea <- ncol(X)
nSamp <- nrow(X)
noiseEsp <- 0.005
if (SNR.Th < 0) SNR.Th <- max(scales) * 0.05
i<-0
pList <- foreach::foreach(i=1:nSamp) %dopar% {
myPeakRes <- NULL
mySpec <- X[i, ]
for (k in 1:length(nDivRange)) {
divR <- nDivRange[k]
for (j in 1:(trunc(nFea/divR) - 3)) {
startR <- (j - 1) * divR + 1
if (startR >= nFea) startR <- nFea
endR <- (j + 3) * divR
if (endR > nFea) endR <- nFea
xRange <- mySpec[startR:endR]
xMean <- mean(xRange)
xMedian <- stats::median(xRange)
if ((xMean == xMedian) || abs(xMean - xMedian)/((xMean + xMedian) * 2) < noiseEsp) next
peakInfo <- MassSpecWavelet::peakDetectionCWT(mySpec[startR:endR], scales = scales, SNR.Th = SNR.Th)
majorPeakInfo <- peakInfo$majorPeakInfo
if (length(majorPeakInfo$peakIndex) > 0) myPeakRes <- c(myPeakRes, majorPeakInfo$peakIndex + startR - 1)
}
}
plst <- list(myPeakRes)
plst[[1]] <- sort(unique(plst[[1]]))
plst[[1]] <- plst[[1]][which(mySpec[ plst[[1]] ] > baselineThresh)]
plst[[1]] <- sort(plst[[1]])
plst
}
pList <- simplify2array(pList)
return(pList)
}
#------------------------------
# CluPA function for two spectra.
#------------------------------
# This function implements the idea of the CluPA algorithm to align the target spectrum against the reference spectrum.
.hClustAlign <-function(refSpec, tarSpec, peakList, peakLabel, startP, endP,
distanceMethod="average", maxShift=0, acceptLostPeak=FALSE){
minpeakList=min(peakList)[1];
maxpeakList=max(peakList)[1];
startCheckP=startP+which.min(tarSpec[startP:(minpeakList-1)])[1]-1;
if (is.na(startCheckP)) {
startCheckP=startP;
}
if(startCheckP < 1){
startCheckP <- startP
}
endCheckP=maxpeakList+ which.min(tarSpec[(maxpeakList+1):endP])[1];
if (is.na(endCheckP)){
endCheckP=endP;
}
if(endCheckP > length(tarSpec)){
endCheckP <- endP
}
if ((endCheckP-startCheckP)<2) {
return (list(tarSpec=tarSpec,peakList=peakList));
}
adj=.findShiftStepFFT(refSpec[startCheckP:endCheckP],
tarSpec[startCheckP:endCheckP],maxShift=maxShift);
if (adj$stepAdj!=0){
if (acceptLostPeak) isImplementShift=TRUE
else isImplementShift=(adj$stepAdj<0&&adj$stepAdj+
minpeakList >=startCheckP )||(adj$stepAdj>0&&adj$stepAdj+
maxpeakList<=endCheckP);
if (isImplementShift)
{
newTargetSpecRegion=.doShift(tarSpec[startCheckP:endCheckP],adj$stepAdj);
tarSpec[startCheckP:endCheckP]=newTargetSpecRegion;
peakListTarget=which(peakLabel==0);
peakList[peakListTarget]=peakList[peakListTarget]+adj$stepAdj;
lostPeaks <- which(peakList <= 0 | peakList > length(tarSpec))
if (length(lostPeaks) >0){
peakList=peakList[-lostPeaks];
peakLabel=peakLabel[-lostPeaks];
}
}
}
if (length(peakList)<3) {return (list(tarSpec=tarSpec,peakList=peakList));}
hc=stats::hclust(stats::dist(peakList),method=distanceMethod)
clusterLabel=stats::cutree(hc,h=hc$height[length(hc$height)-1]);
if (length(unique(clusterLabel))<2){
return (list(tarSpec=tarSpec,peakList=peakList));
}
labelID1=which(clusterLabel==1);
subData1=peakList[labelID1];
subLabel1=peakLabel[labelID1];
labelID2=which(clusterLabel==2);
subData2=peakList[labelID2];
subLabel2=peakLabel[labelID2];
maxsubData1=max(subData1)[1];
minsubData2=min(subData2)[1];
if (maxsubData1<minsubData2){
endP1=maxsubData1+which.min(tarSpec[(maxsubData1+1) :(minsubData2-1)])[1];
if (is.na(endP1)) endP1=maxsubData1;
if(endP1 > length(tarSpec)){
endP1 <- maxsubData1
}
startP2=endP1+1;
if (length(unique(subLabel1))>1){
res=.hClustAlign(refSpec,tarSpec,subData1,subLabel1,startP,endP1,
distanceMethod=distanceMethod,maxShift=maxShift,
acceptLostPeak=acceptLostPeak);
tarSpec=res$tarSpec;
if(length(labelID1) == length(res$peakList)){
peakList[labelID1] <- res$peakList
} else{
res$peakList <- c(res$peakList, rep(res$peakList[1], length(labelID1)- length(res$peakList) ) )
peakList[labelID1] <- res$peakList
}
}
if (length(unique(subLabel2))>1){
res=.hClustAlign(refSpec,tarSpec,subData2,subLabel2,startP2,endP,
distanceMethod=distanceMethod,maxShift=maxShift,
acceptLostPeak=acceptLostPeak);
tarSpec=res$tarSpec;
if(length(labelID2) == length(res$peakList)){
peakList[labelID2]=res$peakList
} else{
res$peakList <- c(res$peakList, rep(res$peakList[1], length(labelID2)- length(res$peakList) ) )
peakList[labelID2]=res$peakList
}
}
}else{
maxsubData2=max(subData2)[1];
minsubData1=min(subData1)[1];
endP2=maxsubData2+which.min(tarSpec[(maxsubData2+1) :(minsubData1-1)])[1];
if (is.na(endP2)) endP2=maxsubData2;
if(endP2 > length(tarSpec)){
endP2 <- maxsubData2
}
startP1=endP2+1;
if (length(unique(subLabel2))>1){
res=.hClustAlign(refSpec,tarSpec,subData2,subLabel2,startP,endP2,
distanceMethod=distanceMethod,maxShift=maxShift,
acceptLostPeak=acceptLostPeak);
tarSpec=res$tarSpec;
if(length(labelID2) == length(res$peakList)){
peakList[labelID2]=res$peakList
} else{
res$peakList <- c(res$peakList, rep(res$peakList[1], length(labelID2)- length(res$peakList) ) )
peakList[labelID2]=res$peakList
}
}
if (length(unique(subLabel1))>1){
res=.hClustAlign(refSpec,tarSpec,subData1,subLabel1,startP1,endP,
distanceMethod=distanceMethod,maxShift=maxShift,
acceptLostPeak=acceptLostPeak);
tarSpec=res$tarSpec;
if(length(labelID1) == length(res$peakList)){
peakList[labelID1] <- res$peakList
} else{
res$peakList <- c(res$peakList, rep(res$peakList[1], length(labelID1)- length(res$peakList) ) )
peakList[labelID1] <- res$peakList
}
}
}
return (list(tarSpec=tarSpec,peakList=peakList));
}
# Segment shift
# Move a spectral segment of a sample shiftStep points to right or left
.doShift <-function(specSeg, shiftStep){
nFea=length(specSeg);
newSegment=double(nFea);
for (j in 1:nFea)
if (shiftStep+j>0&&shiftStep+j<=nFea)
newSegment[shiftStep+j]=specSeg[j];
if (shiftStep>0){
for (j in 1: shiftStep) newSegment[j]=newSegment[shiftStep+1];
}
else{
for (j in (nFea+shiftStep): nFea)
newSegment[j]=newSegment[(nFea+shiftStep-1)];
}
return (newSegment);
}
# Finding the shift-step by using Fast Fourier Transform cross- correlation
# This function uses Fast Fourier Transform cross-correlation to find out the shift step between two spectra.
.findShiftStepFFT<-function (refSpec, tarSpec, maxShift = 0, scale=NULL)
{
#do scaling if the spectra are low abundant
refScale=stats::median(abs(refSpec)); if (refScale==0) refScale=mean(abs(refSpec))
tarScale=stats::median(abs(tarSpec)); if (tarScale==0) tarScale=mean(abs(tarSpec))
scaleFactor=10^round(-log10(min(tarScale,refScale)))
if (is.null(scale) & scaleFactor > 1) scale=TRUE
if (is.null(scale)) scale=FALSE
if (scale){
refScale=refScale*scaleFactor
tarScale=tarScale*scaleFactor
}
M = length(refSpec)
zeroAdd = 2^ceiling(log2(M)) - M
r = c(refSpec*1e6, double(zeroAdd))
s = c(tarSpec*1e6, double(zeroAdd))
M = M + zeroAdd
fftR = stats::fft(r)
fftS = stats::fft(s)
R = fftR * Conj(fftS)
R = R/M
rev = stats::fft(R, inverse = TRUE)/length(R)
vals = Re(rev)
maxi = -1
maxpos = 1
lenVals <- length(vals)
if ((maxShift == 0) || (maxShift > M))
maxShift = M
if (anyNA(vals)) {
lag = 0
return(list(corValue = maxi, stepAdj = lag))
}
for (i in 1:maxShift) {
if (vals[i] > maxi) {
maxi = vals[i]
maxpos = i
}
if (vals[lenVals - i + 1] > maxi) {
maxi = vals[lenVals - i + 1]
maxpos = lenVals - i + 1
}
}
if (maxi < 0.1) {
lag = 0
return(list(corValue = maxi, stepAdj = lag))
}
if (maxpos > lenVals/2)
lag = maxpos - lenVals - 1
else lag = maxpos - 1
return(list(corValue = maxi, stepAdj = lag))
}
#------------------------------
# CluPA alignment for multiple spectra
#------------------------------
# Input parameters
# - X: spectral dataset in the matrix format in which each row contains a single sample
# - peakList: peak lists of the spectra
# - refInd: index of the reference spectrum
# - maxShift: maximum number of the points for a shift step
# Output parameters
# - aligned spectra: same format as input
.dohCluster <- function (X, peakList, refInd = 1, maxShift = 50, acceptLostPeak = TRUE)
{
refSpec = X[refInd, ]
tarInd<-0
Y <- foreach::foreach(tarInd=1:nrow(X), .combine=rbind) %dopar% {
if (tarInd == refInd) { refSpec; } else {
tarSpec <- X[tarInd, ]
myPeakList <- c(peakList[[refInd]], peakList[[tarInd]])
myPeakLabel <- double(length(myPeakList))
myPeakLabel[1:length(peakList[[refInd]])] <- 1
res <- .hClustAlign(refSpec, tarSpec, myPeakList, myPeakLabel, 1, length(tarSpec), maxShift = maxShift, acceptLostPeak = TRUE)
res$tarSpec
}}
return(Y)
}
#------------------------------
# Spectra alignment
#------------------------------
# Input parameters
# - data: n x p datamatrix
# - nDivRange: size of a single small segment after division of the whole spectrum, Default value: 64
# - reference: number of the spectrum reference; if NULL, automatic detection, Default value: NULL
# - baselineThresh: removal of all the peaks with intensity lower than this threshold, Default value: 50000
# Output parameters
# - Y: n x p datamatrix
.CluPA <- function(data, reference=reference, nDivRange, scales = seq(1, 16, 2), baselineThresh, SNR.Th = -1, maxShift=50, DEBUG=FALSE)
{
LOGMSG <- ""
## Peak picking
if( DEBUG ) LOGMSG <- paste0(LOGMSG, paste("Rnmr1D: --- Peak detection : nDivRange =",nDivRange,"\n"));
startTime <- proc.time()
peakList <- .detectSpecPeaks(X=data, nDivRange=nDivRange, scales=scales, baselineThresh=baselineThresh, SNR.Th = SNR.Th)
endTime <- proc.time()
if( DEBUG ) LOGMSG <- paste0(LOGMSG, paste("Rnmr1D: --- Peak detection time: ",(endTime[3]-startTime[3])," sec\n"));
## Reference spectrum determination
if (reference == 0) {
V <- bestref(data, optim.crit="WCC")
refInd <- V$best.ref
} else {
refInd=reference
}
if( DEBUG ) LOGMSG <- paste0(LOGMSG, paste("Rnmr1D: --- The reference spectrum is: ",refInd,"\n"));
## Spectra alignment to the reference
if( DEBUG ) LOGMSG <- paste0(LOGMSG, paste("Rnmr1D: --- Spectra alignment to the reference: maxShift =",maxShift,"\n"));
startTime <- proc.time()
Y <- .dohCluster(data, peakList=peakList, refInd=refInd, maxShift=maxShift, acceptLostPeak=TRUE)
endTime <- proc.time()
if( DEBUG ) LOGMSG <- paste0(LOGMSG, paste("Rnmr1D: --- Spectra alignment time: ",(endTime[3]-startTime[3])," sec\n"));
## Output
return(list(M=Y, LOGMSG=LOGMSG))
}
#------------------------------
# Calibration ot the PPM Scale
#------------------------------
RCalib1D <- function(specMat, PPM_NOISE_AREA, zoneref, ppmref)
{
i1<-length(which(specMat$ppm>max(zoneref)))
i2<-which(specMat$ppm<=min(zoneref))[1]
# PPM calibration of each spectrum
for ( i in 1:specMat$nspec ) {
i0 <- i1 + which(specMat$int[i, i1:i2]==max(specMat$int[i, i1:i2])) - 1
ppm0 <- specMat$ppm_max - (i0-1)*specMat$dppm
dppmref <- ppm0 - ppmref
decal <- 0
sig <- C_estime_sd(specMat$int[i, 1:specMat$size],128)
if (abs(dppmref) > specMat$dppm) {
decal <- trunc(dppmref/specMat$dppm)
dppmref <- dppmref - decal*specMat$dppm
}
if (abs(dppmref) > 0.5*specMat$dppm) {
decal <- decal + trunc(2*dppmref/specMat$dppm)
dppmref <- dppmref - trunc(2*dppmref/specMat$dppm)*specMat$dppm
}
if (decal==0) next
if (decal<0) {
specMat$int[i, 1:(specMat$size-abs(decal))] <- specMat$int[i,(1+abs(decal)):specMat$size]
specMat$int[i, (specMat$size-abs(decal)+1):specMat$size] <- stats::rnorm(length((specMat$size-abs(decal)+1):specMat$size), mean=specMat$int[i,specMat$size-abs(decal)-1], sd=sig)
}
if (decal>0) {
specMat$int[i,(1+abs(decal)):specMat$size] <- specMat$int[i, 1:(specMat$size-abs(decal))]
specMat$int[i, 1:abs(decal)] <- stats::rnorm(length(1:abs(decal)), mean=specMat$int[i,abs(decal)+1], sd=sig)
}
}
return(specMat)
}
#------------------------------
# Normalisation of the Intensities
#------------------------------
RNorm1D <- function(specMat, normmeth, zones)
{
N <- dim(zones)[1]
if (normmeth=='CSN') {
# 1/ Integration of each zone ...
i <- 0
SUM <- foreach::foreach(i=1:N, .combine='+') %dopar% {
i1<-length(which(specMat$ppm>max(zones[i,])))
i2<-which(specMat$ppm<=min(zones[i,]))[1]
simplify2array(lapply( 1:specMat$nspec, function(x) {
0.5*(specMat$int[x, i1] + specMat$int[x, i2]) + sum(specMat$int[x,(i1+1):(i2-1)])
}))
}
COEFF <- SUM/mean(SUM)
}
if (normmeth=='PQN') {
# 1/ Get spectra values for each zone ...
i <- 0
SUBMAT <- foreach::foreach(i=1:N, .combine=cbind) %dopar% {
i1<-length(which(specMat$ppm>max(zones[i,])))
i2<-which(specMat$ppm<=min(zones[i,]))[1]
# .. for each spectrum
t(simplify2array(lapply( 1:specMat$nspec, function(x) { specMat$int[x,i1:i2] })))
}
# Calculate the most probabe quotient
V <- apply(SUBMAT, 2, stats::median)
MQ <- t(t(SUBMAT)/V)
COEFF <- apply(MQ,1, stats::median)
}
# 2/ Apply to each spectrum, its corresponding coefficient
V <- lapply( 1:specMat$nspec, function(x) { specMat$int[x,] <<- specMat$int[x,]/COEFF[x] } )
return(specMat)
}
#------------------------------
# Global Baseline Correction
#------------------------------
RGbaseline1D <- function(specMat,PPM_NOISE_AREA, zone, WS, NEIGH)
{
NFAC <- 1.5
NEGFAC <- 10
NBPASS <- 2
i1 <- ifelse( max(zone)>=specMat$ppm_max, 1, length(which(specMat$ppm>max(zone))) )
i2 <- ifelse( min(zone)<=specMat$ppm_min, specMat$size - 1, which(specMat$ppm<=min(zone))[1] )
TD <- specMat$size
# Baseline Estimation for each spectrum
i <- 0
BLList <- foreach::foreach(i=1:specMat$nspec, .combine=cbind) %dopar% {
sig <- fitdistr(specMat$int[i, length(which(specMat$ppm>PPM_NOISE_AREA[2])):(which(specMat$ppm<=PPM_NOISE_AREA[1])[1])], "normal")$estimate[2]
mv <- simplify2array(lapply( c(3:61), function(x) { mean(abs(specMat$int[i, ((x-1)*TD/64):(x*TD/64)])); }))
mmoy<-min(mv); mv <- NULL
if (min(specMat$int[i, ])< -NEGFAC*mmoy) {
Vthreshold <- max(specMat$int[ i, specMat$int[i, ]/mmoy < -NEGFAC ])
specMat$int[ i, specMat$int[i, ]/mmoy < -NEGFAC ] <- Vthreshold
}
V <- specMat$int[i, ]
if (NBPASS==1) {
BL <- C_Estime_LB (V, i1, i2, WS, NEIGH, NFAC*sig)
} else {
BL <- 0*rep(1:length(V))
for( n in 1:NBPASS) {
# Estimation of Baseline
BLn <- C_Estime_LB (V, i1, i2, WS, NEIGH, NFAC*sig)
V <- V - BLn
BL <- BL + BLn
}
}
BL
}
gc()
# Baseline Correction for each spectrum
for ( i in 1:specMat$nspec ) {
mv <- simplify2array(lapply( c(3:61), function(x) { mean(abs(specMat$int[i, ((x-1)*TD/64):(x*TD/64)])); }))
mmoy<-min(mv); mv <- NULL
if (min(specMat$int[i, ])< -NEGFAC*mmoy) {
Vthreshold <- max(specMat$int[ i, specMat$int[i, ]/mmoy < -NEGFAC ])
specMat$int[ i, specMat$int[i, ]/mmoy < -NEGFAC ] <- Vthreshold
}
if( is.null(dim(BLList)) ) { BL <- BLList; } else { BL <- BLList[,i]; }
specMat$int[i,c(i1:i2)] <- specMat$int[i,c(i1:i2)] - BL[c(i1:i2)]
}
return(specMat)
}
#------------------------------
# Local Baseline Correction (deprecated)
#------------------------------
RBaseline1D <- function(specMat,PPM_NOISE_AREA, zone, WINDOWSIZE)
{
i1 <- ifelse( max(zone)>=specMat$ppm_max, 1, length(which(specMat$ppm>max(zone))) )
i2 <- ifelse( min(zone)<=specMat$ppm_min, specMat$size - 1, which(specMat$ppm<=min(zone))[1] )
SMOOTHSIZE <- round(WINDOWSIZE/2)
ALPHA <- 0.2
# Ajust the window size parameter
n <- i2-i1+1
ws <- WINDOWSIZE
dws <- 0
signdws <- ifelse ( n/ws>round(n/ws), 1, -1 )
dmin <- 1
if (n>WINDOWSIZE) {
repeat {
d <- abs(n/(ws+signdws*dws)-round(n/(ws+signdws*dws)))
if ( d>dmin ) { dws <- dws - signdws; break }
dmin <- d; dws <- dws + signdws;
}
WINDOWSIZE <- ws+signdws*dws
n2 <- round(n/(ws+signdws*dws))*(ws+signdws*dws)
if (n2<n) i2 <- i2 - (n-n2)
} else {
WINDOWSIZE <- n
}
# Baseline Estimation for each spectrum
i<-0
BLList <- foreach::foreach(i=1:specMat$nspec, .combine=cbind) %dopar% {
specSig <- fitdistr(specMat$int[i,length(which(specMat$ppm>PPM_NOISE_AREA[2])):(which(specMat$ppm<=PPM_NOISE_AREA[1])[1])], "normal")$estimate[2]
x <- specMat$int[i,c(i1:i2)]
xmat <- matrix(x, nrow=WINDOWSIZE)
ymin <- apply(xmat, 2, min) + 1.2*specSig
y1 <- rep(ymin, each = WINDOWSIZE)
y2 <- Smooth(y1,SMOOTHSIZE)
BL <- simplify2array(lapply(c(1:length(x)), function(k) { min(y1[k], y2[k]); }))
BL <- lowpass1(BL, ALPHA)
BL
}
# Baseline Correction for each spectrum
for ( i in 1:specMat$nspec ) {
if( is.null(dim(BLList)) ) { BL <- BLList; } else { BL <- BLList[,i]; }
specMat$int[i,c(i1:i2)] <- specMat$int[i,c(i1:i2)] - BL
}
return(specMat)
}
#------------------------------
# q-NMR Baseline Correction
#------------------------------
Rqnmrbc1D <- function(specMat, PPM_NOISE_AREA, zone)
{
i1 <- ifelse( max(zone)>=specMat$ppm_max, 1, length(which(specMat$ppm>max(zone))) )
i2 <- ifelse( min(zone)<=specMat$ppm_min, specMat$size - 1, which(specMat$ppm<=min(zone))[1] )
n <- i2-i1+1
NLOOP <- 5
dN <- round(0.00075/specMat$dppm)
CSIG <- 5
# Baseline Estimation for each spectrum
i<-0
BLList <- foreach::foreach(i=1:specMat$nspec, .combine=cbind) %dopar% {
specSig <- fitdistr(specMat$int[i,length(which(specMat$ppm>PPM_NOISE_AREA[2])):(which(specMat$ppm<=PPM_NOISE_AREA[1])[1])], "normal")$estimate[2]
x <- specMat$int[i,c(i1:i2)]
bc <- 0*rep(1:n)
for (l in 1:NLOOP) {
bci <- C_GlobSeg(x, dN, CSIG*specSig)
x <- x - bci
bc <- bc + bci
}
bc
}
# Baseline Correction for each spectrum
for ( i in 1:specMat$nspec ) {
if( is.null(dim(BLList)) ) { BL <- BLList; } else { BL <- BLList[,i]; }
V <- specMat$int[i,c(i1:i2)] - BL
specMat$int[i,c(i1:i2)] <- V
}
return(specMat)
}
#------------------------------
# airPLS : Local Baseline Correction
#------------------------------
RairPLSbc1D <- function(specMat, zone, clambda, porder=1)
{
i1 <- ifelse( max(zone)>=specMat$ppm_max, 1, length(which(specMat$ppm>max(zone))) )
i2 <- ifelse( min(zone)<=specMat$ppm_min, specMat$size - 1, which(specMat$ppm<=min(zone))[1] )
n <- i2-i1+1
cmax <- switch(porder, 6, 7, 8)
lambda <- ifelse (clambda==cmax, 5, 10^(cmax-clambda) )
# Baseline Estimation for each spectrum
BLList <- foreach::foreach(i=1:specMat$nspec, .combine=cbind) %dopar% {
x <- specMat$int[i,c(i1:i2)]
bc <- .airPLS(x, lambda, porder)
gc()
bc
}
# Baseline Correction for each spectrum
for ( i in 1:specMat$nspec ) {
if( is.null(dim(BLList)) ) { BL <- BLList; } else { BL <- BLList[,i]; }
V <- specMat$int[i,c(i1:i2)] - BL
specMat$int[i,c(i1:i2)] <- V
}
return(specMat)
}
#------------------------------
# Denoising the selected PPM ranges
#------------------------------
RFilter1D <- function(specMat,zone, FILTORD, FILTLEN)
{
i1 <- ifelse( max(zone)>=specMat$ppm_max, 1, length(which(specMat$ppm>max(zone))) )
i2 <- ifelse( min(zone)<=specMat$ppm_min, specMat$size - 1, which(specMat$ppm<=min(zone))[1] )
sgfilt <- sgolay(p=FILTORD, n=FILTLEN)
# Denoising each spectrum
for ( i in 1:specMat$nspec ) {
x <- specMat$int[i,c(i1:i2)]
SpecMat_sg <- filter(sgfilt,x)
specMat$int[i,c(i1:i2)] <- SpecMat_sg
}
return(specMat)
}
#------------------------------
# Zeroing the selected PPM ranges
#------------------------------
RZero1D <- function(specMat, zones, DEBUG=FALSE)
{
# Zeroing each PPM range
N <- dim(zones)[1]
LOGMSG <- ""
for ( i in 1:N ) {
i1<-length(which(specMat$ppm>max(zones[i,])))
i2<-which(specMat$ppm<=min(zones[i,]))[1]
specMat$int[,c(i1:i2)] <- matrix(0,specMat$nspec,(i2-i1+1))
if( DEBUG ) LOGMSG <- paste0(LOGMSG, paste("Rnmr1D: Zone",i,"= (",min(zones[i,]),",",max(zones[i,]),")\n"))
}
specMat$LOGMSG <- LOGMSG
return(specMat)
}
#------------------------------
# LS : Alignment of the selected PPM ranges
#------------------------------
RAlign1D <- function(specMat, zone, RELDECAL=0.35, idxSref=0, Selected=NULL, fapodize=FALSE)
{
# Alignment of each PPM range
NBPASS <- 3
i1 <- ifelse( max(zone)>=specMat$ppm_max, 1, length(which(specMat$ppm>max(zone))) )
i2 <- ifelse( min(zone)<=specMat$ppm_min, specMat$size - 1, which(specMat$ppm<=min(zone))[1] )
apodize <- ifelse(fapodize,1,0)
decal <- round((i2-i1)*RELDECAL)
for( n in 1:NBPASS) {
ret <- align_segment(specMat$int, segment_shifts( specMat$int, idxSref, decal, i1-1, i2-1, Selected-1), i1-1, i2-1, apodize, Selected-1)
}
return(specMat)
}
#------------------------------
# CluPA : Alignment of the selected PPM ranges
#------------------------------
RCluPA1D <- function(specMat, zonenoise, zone, resolution=0.02, SNR=3, idxSref=0, Selected=NULL, DEBUG=FALSE)
{
i1 <- ifelse( max(zone)>=specMat$ppm_max, 1, length(which(specMat$ppm>max(zone))) )
i2 <- ifelse( min(zone)<=specMat$ppm_min, specMat$size - 1, which(specMat$ppm<=min(zone))[1] )
# Noise estimation
PPM_NOISE_AREA <- c(min(zonenoise), max(zonenoise))
idx_Noise <- c( length(which(specMat$ppm>PPM_NOISE_AREA[2])),(which(specMat$ppm<=PPM_NOISE_AREA[1])[1]) )
Vref <- spec_ref(specMat$int)
ynoise <- C_noise_estimation(Vref,idx_Noise[1],idx_Noise[2])
# Parameters
baselineThresh <- SNR*mean( C_noise_estimate(specMat$int, idx_Noise[1],idx_Noise[2], 1) )
nDivRange <- max( round(resolution/specMat$dppm,0), 64 )
maxshift <- min( round(0.01/specMat$dppm), round(nDivRange/4) )
# Subpart of spectra
if( is.null(Selected)) M<-specMat$int[, c(i1:i2) ] else M<-specMat$int[Selected, c(i1:i2) ];
out <- .CluPA(M, reference=idxSref, nDivRange, scales = seq(1, 8, 2),
baselineThresh, SNR.Th = 0.1, maxShift=maxshift, DEBUG=DEBUG)
if( is.null(Selected)) specMat$int[ ,c(i1:i2)] <- out$M else specMat$int[ Selected,c(i1:i2)] <- out$M
specMat$LOGMSG <- out$LOGMSG
return(specMat)
}
#------------------------------
# PTW : Alignment of the selected PPM ranges
#------------------------------
RWarp1D <- function(specMat, zone, idxSref=0, warpcrit=c("WCC","RMS"), Selected=NULL)
{
i1 <- ifelse( max(zone)>=specMat$ppm_max, 1, length(which(specMat$ppm>max(zone))) )
i2 <- ifelse( min(zone)<=specMat$ppm_min, specMat$size - 1, which(specMat$ppm<=min(zone))[1] )
if( is.null(Selected)) M<-specMat$int[, c(i1:i2) ] else M<-specMat$int[Selected, c(i1:i2) ];
if (is.null(Selected)) nspec <- specMat$nspec else nspec <- length(Selected);
if (idxSref==0 || ( !is.null(Selected) && !(idxSref %in% Selected) )) {
V <- bestref(M, optim.crit=warpcrit)
refid <- V$best.ref
} else {
refid <- idxSref
}
ref <- M[refid, ]
ssampl <- M[ c(1:nspec)[-refid], ]
out <- ptw(ref, ssampl, warp.type = "individual", mode = "forward", optim.crit=warpcrit)
#out <- ptw(ref, ssampl, warp.type = "global", mode = "forward", init.coef = c(0, 1, 0), optim.crit=warpcrit)
M <- out$warped.sample
M[is.na(M)] <- 0
if( is.null(Selected)) specMat$int[ c(1:nspec)[-refid],c(i1:i2)] <- M else specMat$int[ Selected[-refid],c(i1:i2)] <- M
return(specMat)
}
#------------------------------
# Shift of the selected PPM ranges
#------------------------------
RShift1D <- function(specMat, zone, RELDECAL=0, Selected=NULL)
{
i1 <- ifelse( max(zone)>=specMat$ppm_max, 1, length(which(specMat$ppm>max(zone))) )
i2 <- ifelse( min(zone)<=specMat$ppm_min, specMat$size - 1, which(specMat$ppm<=min(zone))[1] )
di <- round(RELDECAL / specMat$dppm,0);
j1 <- i1 - di
j2 <- i2 - di
if( is.null(Selected) ) {
M <- specMat$int[, c(i1:i2) ]
specMat$int[, c(i1:i2) ] <- 0
specMat$int[, c(j1:j2) ] <- M
} else {
M <- specMat$int[Selected, c(i1:i2) ]
specMat$int[Selected, c(i1:i2) ] <- 0
specMat$int[Selected, c(j1:j2) ] <- M
}
return(specMat)
}
#------------------------------
# Bucket : Apply the bucketing based on the 'Algo' algorithm with the resolution 'resol'.
# Then elinate buckets with a SNR under the threshold given by 'snr'
# Append to / or Write upon the bucket file depending the 'appendBuc' value
#------------------------------
RBucket1D <- function(specMat, Algo, resol, snr, zones, zonenoise, appendBuc, DEBUG=FALSE)
{
# Limit size of buckets
MAXBUCKETS<-2000
LOGMSG <- ""
if (Algo %in% c('aibin','erva','unif')) {
# Noise estimation
if (sum(is.na(zonenoise))) {
PPM_NOISE_AREA <- c(10.2, 10.5)
} else {
PPM_NOISE_AREA <- c(min(zonenoise), max(zonenoise))
}
idx_Noise <- c( length(which(specMat$ppm>PPM_NOISE_AREA[2])),(which(specMat$ppm<=PPM_NOISE_AREA[1])[1]) )
Vref <- spec_ref(specMat$int)
ynoise <- C_noise_estimation(Vref,idx_Noise[1],idx_Noise[2])
Vnoise <- abs( C_noise_estimate(specMat$int, idx_Noise[1],idx_Noise[2], 1) )
}
if (Algo %in% c('aibin')) {
bdata <- list()
bdata$ynoise <- ynoise
bdata$vnoise <- NULL
bdata$inoise_start <- idx_Noise[1]
bdata$inoise_end <- idx_Noise[2]
bdata$R <- resol
bdata$dppm <- specMat$dppm
bdata$VREF <- 1
if (specMat$nuc == "1H") {
bdata$noise_fac <- 3
bdata$bin_fac <- 0.5
bdata$peaknoise_rate <- 15
bdata$BUCMIN <- 0.003
} else {
bdata$noise_fac <- 2
bdata$bin_fac <- 0.1
bdata$peaknoise_rate <- 5
bdata$BUCMIN <- 0.05
}
}
if (Algo %in% c('erva')) {
bdata <- list()
bdata$bucketsize <- resol
bdata$noise_fac <- 1
bdata$dppm <- specMat$dppm
bdata$ppm_min <- specMat$ppm_min
bdata$BUCMIN <- 0.001
}
# For each PPM range
buckets_zones <- NULL
N <- dim(zones)[1]
i<-0
buckets_zones <- foreach::foreach(i=1:N, .combine=rbind) %dopar% {
i2<-which(specMat$ppm<=min(zones[i,]))[1]
i1<-length(which(specMat$ppm>max(zones[i,])))
if (Algo=='aibin') {
Mbuc <- matrix(, nrow = MAXBUCKETS, ncol = 2)
Mbuc[] <- 0
buckets_m <- C_aibin_buckets(specMat$int, Mbuc, Vref, bdata, i1, i2)
}
if (Algo=='erva') {
Mbuc <- matrix(, nrow = MAXBUCKETS, ncol = 2)
Mbuc[] <- 0
buckets_m <- C_erva_buckets(specMat$int, Mbuc, Vref, bdata, i1, i2)
}
if (Algo=='unif') {
seq_buc <- seq(i1, i2, round(resol/specMat$dppm))
n_bucs <- length(seq_buc) - 1
buckets_m <- cbind ( seq_buc[1:n_bucs], seq_buc[2:(n_bucs+1)])
}
if (Algo=='vsb') {
buckets_m <- matrix( c( i1, i2 ), nrow=1, ncol=2, byrow=T )
}
LOGMSG <- paste("Rnmr1D: Zone",i,"= (",min(zones[i,]),",",max(zones[i,]),"), Nb Buckets =",dim(buckets_m)[1],"\n")
if (dim(buckets_m)[1]>1) {
# Keep only the buckets for which the SNR average is greater than 'snr'
MaxVals <- C_maxval_buckets (specMat$int, buckets_m)
buckets_m <- buckets_m[ which( apply(t(MaxVals/(2*Vnoise)),1,stats::quantile)[4,]>snr), ]
}
cbind( specMat$ppm[buckets_m[,1]], specMat$ppm[buckets_m[,2]], LOGMSG )
}
if( DEBUG ) LOGMSG <- paste0(LOGMSG, paste(unique(buckets_zones[,3]), collapse=""))
buckets_zones <- cbind( .N(buckets_zones[,1]), .N(buckets_zones[,2]) )
if( DEBUG ) LOGMSG <- paste0(LOGMSG, paste("Rnmr1D: Total Buckets =",dim(buckets_zones)[1],"\n"))
if( buckets_zones[1,1]>buckets_zones[1,2] ) { colnames(buckets_zones) <- c('max','min') }
else { colnames(buckets_zones) <- c('min','max') }
if (appendBuc==1) {
specMat$buckets_zones <- rbind(specMat$buckets_zones, buckets_zones )
} else {
specMat$buckets_zones <- buckets_zones
}
specMat$LOGMSG <- LOGMSG
return(specMat)
}
#' checkMacroCmdFile
#'
#' \code{checkMacroCmdFile} Check if the macro-commands included in the input file (commandfile)
#' are compliant with the allowed commands.
#'
#' @param commandfile The macro-commands file - the allowed commands are : 'align', 'warp',
#' 'clupa', 'gbaseline', 'baseline', 'qnmrbline', 'airpls', 'binning', 'calibration',
#' 'normalisation', 'denoising', 'bucket', 'zero'.
#'
#' @return return 1 if the macro-commands included in the input file are compliant, 0 if not.
#'
#' @examples
#' data_dir <- system.file("extra", package = "Rnmr1D")
#' CMDFILE <- file.path(data_dir, "NP_macro_cmd.txt")
#' ret <- checkMacroCmdFile(CMDFILE)
#'
#' @seealso the NMRProcFlow online documentation \url{https://nmrprocflow.org/} and especially
#' the Macro-command Reference Guide (\url{https://nmrprocflow.org/themes/pdf/Macrocommand.pdf})
checkMacroCmdFile <- function(commandfile) {
ret <- 1
allowKW <- c( 'align', 'warp', 'clupa', 'shift', 'gbaseline', 'baseline', 'qnmrbline', 'airpls', 'binning', 'calibration', 'normalisation', 'denoising', 'bucket', 'zero', 'EOL' )
tryCatch({
# Read the macrocommand file
CMDTEXT <- gsub("\t", "", readLines(commandfile))
CMDTEXT <- CMDTEXT[ grep( "^[^ ]", CMDTEXT ) ]
CMD <- CMDTEXT[ grep( "^[^#]", CMDTEXT ) ]
CMD <- gsub("^ ", "", gsub(" $", "", gsub(" +", ";", CMD)))
L <- unique(sort(gsub(";.*$","", CMD)))
L <- L[ grep( "^[^0-9-]", L)]
ret <- ifelse( sum(L %in% allowKW)==length(L), 1, 0 )
}, error=function(e) {
ret <- 0
})
return(ret)
}
#' RWrapperCMD1D
#'
#' \code{RWrapperCMD1D} belongs to the low-level functions group - it serves as a wrapper to
#' call internale functions for processing.
#'
#' @param cmdName the name of internal function
#' @param specMat a 'specMat' object
#' @param ... specific parameters of the requested function
#' @return
#' \code{specMat} : a 'specMat' object
RWrapperCMD1D <- function(cmdName, specMat, ...)
{
repeat {
if (cmdName == lbCALIB) {
specMat <- RCalib1D(specMat, ...)
break
}
if (cmdName == lbNORM) {
specMat <- RNorm1D(specMat, ...)
break
}
if (cmdName == lbGBASELINE) {
specMat <- RGbaseline1D(specMat, ...)
break
}
if (cmdName == lbBASELINE) {
specMat <- RBaseline1D(specMat, ...)
break
}
if (cmdName == lbQNMRBL) {
specMat <- Rqnmrbc1D(specMat, ...)
break
}
if (cmdName == lbAIRPLS) {
specMat <- RairPLSbc1D(specMat, ...)
break
}
if (cmdName == lbFILTER) {
specMat <- RFilter1D(specMat, ...)
break
}
if (cmdName == lbZERO) {
specMat <- RZero1D(specMat, ...)
break
}
if (cmdName == lbALIGN) {
specMat <- RAlign1D(specMat, ...)
break
}
if (cmdName == lbCLUPA) {
specMat <- RCluPA1D(specMat, ...)
break
}
if (cmdName == lbWARP) {
specMat <- RWarp1D(specMat, ...)
break
}
if (cmdName == lbSHIFT) {
specMat <- RShift1D(specMat, ...)
break
}
if (cmdName == lbBUCKET) {
specMat <- RBucket1D(specMat, ...)
break
}
break
}
return(specMat)
}
#' doProcCmd
#'
#' \code{doProcCmd} it process the Macro-commands string array specified at input.
#'
#' @param specObj a complex list return by \code{doProcessing} function. See the manual
#' page of the \code{\link{doProcessing}} function for more details on its structure.
#' @param cmdstr the Macro-commands string array; See the Macro-command Reference Guide
#' (\url{https://nmrprocflow.org/themes/pdf/Macrocommand.pdf}) to have more details about
#' macro-commands.
#' @param ncpu The number of cores [default: 1]
#' @param debug a boolean to specify if we want the function to be more verbose.
#' @return
#' \code{specMat} : a 'specMat' object - See the manual page of the \code{\link{doProcessing}}
#' function for more details on its structure
#' @examples
#' \donttest{
#' data_dir <- system.file("extra", package = "Rnmr1D")
#' CMDFILE <- file.path(data_dir, "NP_macro_cmd.txt")
#' SAMPLEFILE <- file.path(data_dir, "Samples.txt")
#' out <- Rnmr1D::doProcessing(data_dir, cmdfile=CMDFILE,
#' samplefile=SAMPLEFILE, ncpu=2)
#' # Apply an intelligent bucketing (AIBIN)
#' specMat.new <- Rnmr1D::doProcCmd(out,
#' c("bucket aibin 10.2 10.5 0.3 3 0",
#' "9.5 4.9",
#' "4.8 0.5",
#' "EOL"
#' ),ncpu=2, debug=TRUE)
#' out$specMat <- specMat.new
#' }
doProcCmd <- function(specObj, cmdstr, ncpu=1, debug=FALSE)
{
# specMat
specMat <- specObj$specMat
# specParams
specParamsDF <- as.data.frame(specObj$infos, stringsAsFactors=FALSE)
SI <- as.numeric(specParamsDF$SI[1])
# Samples
samples <- specObj$samples
# Macro-commands
CMDTEXT <- cmdstr[ grep( "^[^ ]", cmdstr ) ]
CMD <- CMDTEXT[ grep( "^[^#]", CMDTEXT ) ]
CMD <- gsub("^ ", "", gsub(" $", "", gsub(" +", ";", CMD)))
specMat$nuc <- specObj$nuc
specMat$fWriteSpec <- FALSE
LOGFILE <- globvars$LOGFILE
cl <- parallel::makeCluster(ncpu)
doParallel::registerDoParallel(cl)
Sys.sleep(1)
while ( length(CMD)>0 && CMD[1] != EOL ) {
cmdLine <- CMD[1]
cmdPars <- unlist(strsplit(cmdLine[1],";"))
cmdName <- cmdPars[1]
repeat {
if (cmdName == lbCALIB) {
params <- as.numeric(cmdPars[-1])
if (length(params)>=3) {
PPMRANGE <- c( min(params[1:2]), max(params[1:2]) )
PPMREF <- params[3]
PPM_NOISE <- ifelse( length(params)==5, c( min(params[4:5]), max(params[4:5]) ), c( 10.2, 10.5 ) )
Write.LOG(LOGFILE, paste0("Rnmr1D: Calibration: PPM REF =",PPMREF,", Zone Ref = (",PPMRANGE[1],",",PPMRANGE[2],")\n"));
specMat <- RWrapperCMD1D(cmdName,specMat, PPM_NOISE, PPMRANGE, PPMREF)
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
}
break
}
if (cmdName == lbNORM) {
params <- cmdPars[-1]
if (length(params)==2) {
params <- as.numeric(params)
PPMRANGE <- c( min(params[1:2]), max(params[1:2]) )
Write.LOG(LOGFILE,paste0("Rnmr1D: Normalisation: Zone Ref = (",PPMRANGE[1],",",PPMRANGE[2],")\n"));
specMat <- RWrapperCMD1D(cmdName,specMat, normmeth='CSN', zones=matrix(PPMRANGE,nrow=1, ncol=2))
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
}
if (length(params)==1) {
NORM_METH <- params[1]
CMD <- CMD[-1]
zones <- NULL
while(CMD[1] != EOL) {
zones <- rbind(zones, as.numeric(unlist(strsplit(CMD[1],";"))))
CMD <- CMD[-1]
}
Write.LOG(LOGFILE,"Rnmr1D: Normalisation of the Intensities based on the selected PPM ranges...\n")
Write.LOG(LOGFILE,paste0("Rnmr1D: Method =",NORM_METH,"\n"))
specMat <- RWrapperCMD1D(cmdName,specMat, normmeth=NORM_METH, zones=zones)
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
}
break
}
if (cmdName == lbGBASELINE || cmdName == lbBASELINE) {
params <- as.numeric(cmdPars[-1])
if (length(params)==6) {
PPM_NOISE <- c( min(params[1:2]), max(params[1:2]) )
PPMRANGE <- c( min(params[3:4]), max(params[3:4]) )
Write.LOG(LOGFILE,paste0("Rnmr1D: Baseline Correction: PPM Range = ( ",min(PPMRANGE)," , ",max(PPMRANGE)," )\n"));
if (cmdName == lbBASELINE) {
if (params[5]>0) { # to be compliant with version<1.1.4
BCMETH <- params[5]
WSFAC <- (7-params[6])/4
WINDOWSIZE <- round(WSFAC*SI/(BCMETH*64))
} else {
WINDOWSIZE <- round(( 1/2^(params[6]-2) )*(SI/64))
}
Write.LOG(LOGFILE,paste0("Rnmr1D: Type=Local - Window Size = ",WINDOWSIZE,"\n"));
specMat <- RWrapperCMD1D(cmdName,specMat,PPM_NOISE, PPMRANGE, WINDOWSIZE)
} else {
WS <- params[5]
NEIGH <- params[6]
Write.LOG(LOGFILE,paste0("Rnmr1D: Type=Global - Smoothing Parameter=",WS," - Window Size=",NEIGH,"\n"));
specMat <- RWrapperCMD1D(cmdName,specMat,PPM_NOISE, PPMRANGE, WS, NEIGH)
}
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
}
break
}
if (cmdName == lbQNMRBL) {
params <- as.numeric(cmdPars[-1])
if (length(params)==4) {
PPM_NOISE <- c( min(params[1:2]), max(params[1:2]) )
PPMRANGE <- c( min(params[3:4]), max(params[3:4]) )
Write.LOG(LOGFILE,paste0("Rnmr1D: Baseline Correction: PPM Range = ( ",min(PPMRANGE)," , ",max(PPMRANGE)," )\n"))
Write.LOG(LOGFILE,paste0("Rnmr1D: Type=q-NMR\n"))
specMat <- RWrapperCMD1D(cmdName,specMat,PPM_NOISE, PPMRANGE)
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
}
break
}
if (cmdName == lbAIRPLS) {
params <- as.numeric(cmdPars[-1])
if (length(params)>=3) {
porder <- 1
PPMRANGE <- c( min(params[1:2]), max(params[1:2]) )
LAMBDA <- params[3]
if (length(params)==4) porder <- params[4]
Write.LOG(LOGFILE,paste0("Rnmr1D: Baseline Correction: PPM Range = ( ",min(PPMRANGE)," , ",max(PPMRANGE)," )\n"))
Write.LOG(LOGFILE,paste("Rnmr1D: Type=airPLS, lambda=",LAMBDA, ", order=",porder, "\n"))
specMat <- RWrapperCMD1D(cmdName,specMat, PPMRANGE, LAMBDA, porder=porder)
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
}
break
}
if (cmdName == lbFILTER) {
params <- as.numeric(cmdPars[-1])
if (length(params)==4) {
PPMRANGE <- c( min(params[1:2]), max(params[1:2]) )
FORDER <- params[3]
FLENGTH <- params[4]
Write.LOG(LOGFILE,paste0("Rnmr1D: Denoising: PPM Range = ( ",min(PPMRANGE)," , ",max(PPMRANGE)," )\n"));
Write.LOG(LOGFILE,paste0("Rnmr1D: Filter Order=",FORDER," - Filter Length=",FLENGTH,"\n"));
specMat <- RWrapperCMD1D(cmdName,specMat,PPMRANGE, FORDER, FLENGTH)
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
}
break
}
if (cmdName == lbALIGN) {
params <- as.numeric(cmdPars[-1])
Selected <- NULL
if (length(params)==6 && (params[5]<2 || params[6])) {
level <- unique(samples[ order(samples[, params[5]+1]), params[5]+1 ])[params[6]]
Selected <- .N(rownames(samples[ samples[, params[5]+1]==level, ]))
}
if (length(params)>=4) {
PPMRANGE <- c( min(params[1:2]), max(params[1:2]) )
RELDECAL= params[3]
idxSref=params[4]
Write.LOG(LOGFILE,paste0("Rnmr1D: Alignment: PPM Range = ( ",min(PPMRANGE)," , ",max(PPMRANGE)," )\n"))
Write.LOG(LOGFILE,paste0("Rnmr1D: Rel. Shift Max.=",RELDECAL," - Reference=",idxSref,"\n"))
specMat <- RWrapperCMD1D(cmdName,specMat, PPMRANGE, RELDECAL, idxSref, Selected=Selected, fapodize=FALSE)
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
}
break
}
if (cmdName == lbWARP) {
params <- cmdPars[-1]
Selected <- NULL
if (length(params)==6 && (.N(params[5])<2 || .N(params[6]))) {
level <- unique(samples[ order(samples[, .N(params)[5]+1]), .N(params)[5]+1 ])[.N(params[6])]
Selected <- .N(rownames(samples[ samples[, .N(params[5])+1]==level, ]))
}
if (length(params)>=4) {
PPMRANGE <- c( min(.N(params[1:2])), max(.N(params[1:2])) )
idxSref=.N(params[3])
warpcrit=.C(params[4])
Write.LOG(LOGFILE,paste0("Rnmr1D: Alignment: PPM Range = ( ",min(PPMRANGE)," , ",max(PPMRANGE)," )\n"))
Write.LOG(LOGFILE,paste0("Rnmr1D: Parametric Time Warping Method - Reference=",idxSref," - Optim. Crit=",warpcrit,"\n"))
specMat <- RWrapperCMD1D(cmdName,specMat, PPMRANGE, idxSref, warpcrit, Selected=Selected)
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
}
break
}
if (cmdName == lbSHIFT) {
params <- as.numeric(cmdPars[-1])
Selected <- NULL
if (length(params)==5 && (params[4]<2 || params[5])) {
level <- unique(samples[ order(as.character(samples[, params[4]+1])), params[4]+1 ])[params[5]]
Selected <- .N(rownames(samples[ samples[, params[4]+1]==level, ]))
}
if (length(params)>=3) {
PPMRANGE <- c( min(params[1:2]), max(params[1:2]) )
RELDECAL= params[3]
Write.LOG(LOGFILE,paste0("Rnmr1D: Shift: PPM Range = ( ",min(PPMRANGE)," , ",max(PPMRANGE)," )\n"))
Write.LOG(LOGFILE,paste0("Rnmr1D: Shift value =",RELDECAL,"\n"))
specMat <- RWrapperCMD1D(cmdName,specMat, PPMRANGE, RELDECAL, Selected=Selected)
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
}
break
}
if (cmdName == lbCLUPA) {
params <- as.numeric(cmdPars[-1])
Selected <- NULL
if (length(params)==9 && (params[8]<2 || params[9])) {
level <- unique(samples[ order(samples[, params[8]+1]), params[8]+1 ])[params[9]]
Selected <- .N(rownames(samples[ samples[, params[8]+1]==level, ]))
}
if (length(params)>=7) {
PPM_NOISE <- c( min(params[1:2]), max(params[1:2]) )
PPMRANGE <- c( min(params[3:4]), max(params[3:4]) )
RESOL= params[5]
SNR= params[6]
idxSref=params[7]
Write.LOG(LOGFILE,paste0("Rnmr1D: Alignment: PPM Range = ( ",min(PPMRANGE)," , ",max(PPMRANGE)," )\n"))
Write.LOG(LOGFILE,paste0("Rnmr1D: CluPA - Resolution =",RESOL," - SNR threshold=",SNR, " - Reference=",idxSref,"\n"))
specMat <- RWrapperCMD1D(cmdName,specMat, PPM_NOISE, PPMRANGE, RESOL, SNR, idxSref, Selected=Selected, DEBUG=debug)
if (debug) Write.LOG(LOGFILE, specMat$LOGMSG )
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
}
break
}
if (cmdName == lbZERO) {
CMD <- CMD[-1]
zones2 <- NULL
while(CMD[1] != EOL) {
zones2 <- rbind(zones2, as.numeric(unlist(strsplit(CMD[1],";"))))
CMD <- CMD[-1]
}
Write.LOG(LOGFILE,"Rnmr1D: Zeroing the selected PPM ranges ...\n")
specMat <- RWrapperCMD1D(cmdName,specMat, zones2, DEBUG=debug)
if (debug) Write.LOG(LOGFILE, specMat$LOGMSG )
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
break
}
if (cmdName == lbBUCKET) {
if ( !( length(cmdPars) >= 6 && cmdPars[2] %in% c('aibin','erva','unif') ) &&
!( length(cmdPars) <= 3 && cmdPars[2] %in% c('vsb') ) ) {
CMD <- CMD[-1]
break;
}
CMD <- CMD[-1]
zones <- NULL
while(CMD[1] != EOL) {
zones <- rbind(zones, as.numeric(unlist(strsplit(CMD[1],";"))))
CMD <- CMD[-1]
}
fappend <- 0
if ( cmdPars[2] %in% c('aibin','erva','unif') ) {
params <- as.numeric(cmdPars[-c(1:2)])
PPM_NOISE <- c( min(params[1:2]), max(params[1:2]) )
resol <- params[3]; snr <- params[4];
if (length(params)>4) fappend <- params[5]
Write.LOG(LOGFILE,paste0("Rnmr1D: ",toupper(cmdPars[2])," - Resolution =",resol," - SNR threshold=",snr,"\n"))
} else {
PPM_NOISE <- NULL
resol <- 0; snr <- 0;
if (length(params)>2) fappend <- params[3]
Write.LOG(LOGFILE,paste0("Rnmr1D: ",toupper(cmdPars[2]),"\n"))
}
Write.LOG(LOGFILE,"Rnmr1D: Bucketing the selected PPM ranges ...\n")
Write.LOG(LOGFILE,paste0("Rnmr1D: ",toupper(cmdPars[2])," - Resolution =",resol," - SNR threshold=",snr, " - Append=",fappend,"\n"))
specMat <- RWrapperCMD1D(cmdName,specMat, cmdPars[2], resol, snr, zones, PPM_NOISE, fappend, DEBUG=debug)
if (dim(specMat$buckets_zones)[1]>2) {
specMat$buckets_zones <- specMat$buckets_zones[order(specMat$buckets_zones[,1]), ]
}
if (debug) Write.LOG(LOGFILE, specMat$LOGMSG )
specMat$fWriteSpec <- TRUE
CMD <- CMD[-1]
break
}
CMD <- CMD[-1]
break
}
gc()
}
parallel::stopCluster(cl)
return(specMat)
}
#' plotSpecMat Overlaid/Stacked Plot
#'
#' \code{plotSpecMat} Plot spectra set, overlaid or stacked; if stacked, plot with or
#' without a perspective effect.
#'
#' @param specMat a 'specMat' object - Spectra matrix in specMat$int (rows = samples,
#' columns = buckets)
#' @param ppm_lim ppm range of the plot
#' @param K Graphical height of the stack (0 .. 1),(default=0.67)
#' @param pY Intensity limit factor (default 1)
#' @param dppm_max Max ppm shift to have a perspective effect
#' @param asym Correction of vertical parallax effect (-1 .. 1)
#' -1 : parallelogram
#' 0 : trapeze with maximum asymmetric
#' 1 : symmetric trapeze
#' @param beta Correction of horizontal parallax effect (0 .. 0.2) (defaut 0)
#' @param cols Vector of colors (same size that the number of spectra, i.e dim(specmat)[1])
#'
#' @examples
#' \donttest{
#' data_dir <- system.file("extra", package = "Rnmr1D")
#' cmdfile <- file.path(data_dir, "NP_macro_cmd.txt")
#' samplefile <- file.path(data_dir, "Samples.txt")
#' out <- Rnmr1D::doProcessing(data_dir, cmdfile=cmdfile,
#' samplefile=samplefile, ncpu=2)
#' # Overlaid plot
#' plotSpecMat(out$specMat, ppm_lim=c(0.5,9), K=0, pY=0.1)
#' # Stacked plot with perspective effect
#' plotSpecMat(out$specMat, ppm_lim=c(-0.1,9),K=0.33)
#' # Stacked plot with perspective effect with maximum asymmetric
#' plotSpecMat(out$specMat, ppm_lim=c(0.5,5), K=0.33, asym=0)
#' cols <- c(rep("red",3), rep("blue",3))
#' # Stacked plot with colors accordings to group levels
#' plotSpecMat(out$specMat, ppm_lim=c(0.5,5), K=0.67, dppm_max=0, cols=cols)
#' }
plotSpecMat <- function(specMat, ppm_lim=c(min(specMat$ppm),max(specMat$ppm)), K=0.67, pY=1, dppm_max=0.2*(max(ppm_lim) - min(ppm_lim)), asym=1, beta=0, cols=NULL)
{
specmat <- specMat$int
ppm <- specMat$ppm
i2<-which(ppm<=min(ppm_lim))[1]
i1<-length(which(ppm>max(ppm_lim)))
ppm_sub <- ppm[i1:i2]
specmat_sub <- specmat[ , i1:i2 ]
if (K==0) dppm_max <- 0
Ymax <- pY*max(specmat_sub) # intensity limit
diffppm <- max(ppm_sub) - min(ppm_sub)
Cy <- 1-beta*(max(ppm_sub)-asym*dppm_max - ppm_sub)/(max(ppm_sub)-asym*dppm_max - min(ppm_sub))
if (is.null(cols)) cols <- grDevices::rainbow(dim(specmat_sub)[1], s=0.8, v=0.75)
graphics::plot( cbind(ppm_sub, specmat_sub[1,]), xlim=rev(ppm_lim), ylim=c(0,Ymax), type="h", col="white", xlab="ppm", ylab="Intensity (u.a)")
graphics::segments( max(ppm_sub) - asym*dppm_max, K*Ymax, min(ppm_sub) + dppm_max, (1-beta)*K*Ymax , col="lightgrey" )
graphics::segments( max(ppm_sub), 0, min(ppm_sub), 0, col="lightgrey" )
graphics::segments( max(ppm_sub), 0, max(ppm_sub) - asym*dppm_max, K*Ymax , col="lightgrey" )
graphics::segments( min(ppm_sub) + dppm_max, (1-beta)*K*Ymax, min(ppm_sub), 0, col="lightgrey" )
for( i in 1:dim(specmat_sub)[1] ) {
dppm <- dppm_max*(i-1)/(dim(specmat_sub)[1]-1)
ppmi <- ppm_sub*(1-(1+asym)*dppm/diffppm) + dppm*(1+(1+asym)*min(ppm_sub)/diffppm)
y_offset <- K*Ymax*Cy*(i-1)/(dim(specmat_sub)[1]-1)
graphics::lines(cbind(ppmi, specmat_sub[i,] + y_offset), col=cols[i])
}
}
#' getBucketsTable
#'
#' Generates the buckets table
#'
#' @param specObj a complex list return by \code{doProcessing} function. See the manual page of the \code{\link{doProcessing}} function for more details on its structure.
#' @return
#' the buckets table
getBucketsTable <- function(specObj)
{
outtable <- NULL
specMat <- specObj$specMat
buckets <- specMat$buckets_zones
if ( ! is.null(buckets) ) {
colnames(buckets) <- c("max","min")
buckets_m <- t(simplify2array(lapply( c( 1:(dim(buckets)[1]) ),
function(x) { c( length(which(specMat$ppm>buckets[x,1])), length(which(specMat$ppm>buckets[x,2])) ) }
)))
buckets <- as.data.frame(buckets, stringsAsFactors=FALSE)
buckets$center <- 0.5*(buckets[,1]+buckets[,2])
buckets$width <- 0.5*abs(buckets[,1]-buckets[,2])
buckets$intMax <- specMat$ppm[ C_ppmIntMax_buckets(specMat$int, buckets_m) ]
if ( is.null(specMat$namesASintMax) || ! specMat$namesASintMax ) {
buccenter <- buckets$center
} else {
buccenter <- buckets$intMax
}
buckets$name <- gsub("^(\\d+)","B\\1", gsub("\\.", "_", gsub(" ", "", sprintf("%7.4f",buccenter))) )
outtable <- buckets[, c("name", "center", "min", "max", "width", "intMax") ]
}
return(outtable)
}
#' getBucketsDataset
#'
#' Generates the matrix including the integrations of the areas defined by the buckets (columns)
#' on each spectrum (rows)
#'
#' Before bucket data export in order to make all spectra comparable with each other, the variations of the overall concentrations of samples have to be taken into account. We propose two normalization methods. In NMR metabolomics, the total intensity normalization (called the Constant Sum Normalization) is often used so that all spectra correspond to the same overall concentration. It simply consists in normalizing the total intensity of each individual spectrum to a same value. An other method called Probabilistic Quotient Normalization (Dieterle et al. 2006) assumes that biologically interesting concentration changes influence only parts of the NMR spectrum, while dilution effects will affect all metabolites signals. Probabilistic Quotient Normalization (PQN) starts by the calculation of a reference spectrum based on the median spectrum. Next, for each variable of interest the quotient of a given test spectrum and reference spectrum is calculated and the median of all quotients is estimated. Finally, all variables of the test spectrum are divided by the median quotient.
#' An internal reference can be used to normalize the data. For example, an electronic reference (ERETIC, see Akoka et al. 1999, or ERETIC2 generated with TopSpin software) can be used for this purpose. The integral value of each bucket will be divided by the integral value of the ppm range given as reference.
#'
#' @param specObj a complex list return by \code{doProcessing} function. See the manual page of the \code{\link{doProcessing}} function for more details on its structure.
#' @param norm_meth Normalization method. The possible values are : 'none', 'CSN' or 'PDN'. See below.
#' @param zoneref Specify the ppm zone of the internal reference (i.e. ERETIC) if applicable. default is NA.
#'
#' @return the data matrix
#'
#' @references{
#' Akoka S1, Barantin L, Trierweiler M. (1999) Concentration Measurement by Proton NMR
#' Using the ERETIC Method, Anal. Chem 71(13):2554-7. doi: 10.1021/ac981422i.
#'
#' Dieterle F., Ross A., Schlotterbeck G. and Senn H. (2006). Probabilistic Quotient
#' Normalization as Robust Method to Account for Dilution of Complex Biological Mixtures.
#' Application in 1H NMR Metabonomics. Analytical Chemistry, 78:4281-4290.doi: 10.1021/ac051632c
#' }
#'
#' @examples
#' \donttest{
#' data_dir <- system.file("extra", package = "Rnmr1D")
#' cmdfile <- file.path(data_dir, "NP_macro_cmd.txt")
#' samplefile <- file.path(data_dir, "Samples.txt")
#' out <- Rnmr1D::doProcessing(data_dir, cmdfile=cmdfile,
#' samplefile=samplefile, ncpu=2)
#' outMat <- getBucketsDataset(out, norm_meth='CSN')
#' }
getBucketsDataset <- function(specObj, norm_meth='none', zoneref=NA)
{
outdata <- NULL
specMat <- specObj$specMat
buckets <- specMat$buckets_zones
if ( ! is.null(buckets) ) {
# get index of buckets' ranges
colnames(buckets) <- c("max","min")
buckets_m <- t(simplify2array(lapply( c( 1:(dim(buckets)[1]) ),
function(x) { c( length(which(specMat$ppm>buckets[x,1])), length(which(specMat$ppm>buckets[x,2])) ) }
)))
# Integration
buckets_IntVal <- C_all_buckets_integrate (specMat$int, buckets_m, 0)
if (norm_meth == 'CSN') {
buckets_IntVal <- C_buckets_CSN_normalize( buckets_IntVal )
}
if (norm_meth == 'PQN') {
buckets_IntVal_CSN <- C_buckets_CSN_normalize( buckets_IntVal )
bucVref_IntVal <- C_MedianSpec(buckets_IntVal_CSN)
bucRatio <- buckets_IntVal_CSN / bucVref_IntVal
Coeff <- apply(bucRatio,1, stats::median)
buckets_IntVal <- buckets_IntVal_CSN / Coeff
}
# if supplied, integrate of all spectra within the PPM range of the reference signal
Vref <- 0*c(1:specMat$nspec)
if (! is.na(zoneref)) {
istart <- length(which(specMat$ppm>max(zoneref)))
iend <- length(which(specMat$ppm>min(zoneref)))
Vref <- C_spectra_integrate (specMat$int, istart, iend)
buckets_IntVal <- buckets_IntVal/Vref
}
# read samples
samples <- specObj$samples
# write the data table
if ( is.null(specMat$namesASintMax) || ! specMat$namesASintMax ) {
buccenter <- 0.5*(buckets[,1]+buckets[,2])
} else {
buccenter <- specMat$ppm[ C_ppmIntMax_buckets(specMat$int, buckets_m) ]
}
bucnames <- gsub("^(\\d+)","B\\1", gsub("\\.", "_", gsub(" ", "", sprintf("%7.4f",buccenter))) )
outdata <- buckets_IntVal
colnames(outdata) <- bucnames
rownames(outdata) <- samples[,2]
}
return(outdata)
}
#' getSnrDataset
#'
#' Generates the Signal-Noise-Ratio dataset
#'
#' whatever the bucketing approach used, the Signal-to-Noise ratio is a good quality indicator.
#' Thus, it is possible to check buckets based on their Signal-to-Noise ratio.
#'
#' @param specObj a complex list return by \code{doProcessing} function.
#' See the manual page of the \code{\link{doProcessing}} function for more details on its structure.
#' @param zone_noise Specify a ppm range of noisy zone default is c(10.2,10.5)
#' @param ratio boolean; TRUE for output Signal-Noise Ratio, or FALSE to output maximum value of
#' each bucket and in addition, the estimate noise as a separate column
#' @return
#' the Signal-Noise-Ratio matrix
getSnrDataset <- function(specObj, zone_noise=c(10.2,10.5), ratio=TRUE)
{
outdata <- NULL
specMat <- specObj$specMat
buckets <- specMat$buckets_zones
if ( ! is.null(buckets) ) {
# get index of buckets' ranges
colnames(buckets) <- c("max","min")
buckets_m <- t(simplify2array(lapply( c( 1:(dim(buckets)[1]) ),
function(x) { c( length(which(specMat$ppm>buckets[x,1])), length(which(specMat$ppm>buckets[x,2])) ) }
)))
# Compute Vnoise vector & Maxvals maxtrix
i1 <- ifelse( max(zone_noise)>=specMat$ppm_max, 1, length(which(specMat$ppm>max(zone_noise))) )
i2 <- ifelse( min(zone_noise)<=specMat$ppm_min, specMat$size - 1, which(specMat$ppm<=min(zone_noise))[1] )
flg <- 1
Vnoise <- abs( C_noise_estimate(specMat$int, i1, i2, flg) )
MaxVals <- C_maxval_buckets (specMat$int, buckets_m)
# read samples
samples <- specObj$samples
# write the data table
if ( is.null(specMat$namesASintMax) || ! specMat$namesASintMax ) {
buccenter <- 0.5*(buckets[,1]+buckets[,2])
} else {
buccenter <- specMat$ppm[ C_ppmIntMax_buckets(specMat$int, buckets_m) ]
}
bucnames <- gsub("^(\\d+)","B\\1", gsub("\\.", "_", gsub(" ", "", sprintf("%7.4f",buccenter))) )
if (ratio) {
outdata <- cbind( MaxVals/(2*Vnoise))
colnames(outdata) <- bucnames
} else {
outdata <- cbind( Vnoise, MaxVals )
colnames(outdata) <- c( 'Noise', bucnames )
}
rownames(outdata) <- samples[,2]
}
return(outdata)
}
#' getSnrDataset
#'
#' Generates the spectral data matrix. The first column indicates the value of ppm, then the following columns correspond to spectral data, one column per spectrum.
#'
#' @param specObj a complex list return by \code{doProcessing} function.
#' @return
#' the spectral data matrix
getSpectraData <- function(specObj)
{
# read samples
specMat <- specObj$specMat
samples <- specObj$samples
outdata <- cbind( specMat$ppm, t(specMat$int) )
colnames(outdata) <- c( "ppm", samples[,1] )
return(outdata)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.