#-------------------------------------------------------------------------------
# Program: pointCleaning.R
# Objective: detection of outlier points for time courses using a
# locfit smoothing and an interval of prediction
# Author: I.Sanchez
# Creation: 13/04/2018
# Update : 04/09/2020
#-------------------------------------------------------------------------------
#' flagPointLocfit
#'
#' detection of outlier points in time courses
#' @param datain input dataframe of parameters
#' @param trait character, parameter of interest (ex: plantHeight)
#' @param xvar character, time variable (ex: thermalTime)
#' @param loopID character, ID on which to make the loop
#' @param locfit.h numeric, the constant component of the smoothing parameter
#' @param threshold numeric, threshold to detect on the prediction interval
#'
#' @return a list
#' \describe{
#' \item{1}{prediction and detection of outlier on observed data}
#' \item{2}{prediction on regular abscissa data}
#' \item{3}{time courses with not enough point to be evaluated}
#' }
#' @export
#'
#' @importFrom dplyr select bind_rows
#'
#' @examples
#' \donttest{
#' # Example
#' }
flagPointLocfit<-function(datain,trait,xvar,loopID,locfit.h,threshold){
.Deprecated("FuncDetectPointOutlierLocFit")
# Initialisation
#myfm<-list()
tabpred<-NULL # dataframe on observed data
tabpredreg<-NULL # dataframe on regular abscissa data
tabNotEnoughPoint<-NULL # dataframe of observed data with not enough point by loopId
for (i in levels(factor(datain[,loopID]))){
### select the data
tmp<-datain[datain[,loopID]==i,]
tmp<-na.omit(select(tmp,.data[[trait]],.data[[xvar]]))
y<-tmp[,trait]
x<-tmp[,xvar]
# abscisse reguliere
xreg<-seq(0,max(x,na.rm=TRUE),1)
### Choix du pas suivant le nombre de donnees, car certaines courbes peuvent avoir peu de points...
if (length(x) <=4){
next
} else if (length(x) <= 7) {
if (is.null(locfit.h)) mylocfit<-30
else mylocfit<-locfit.h
} else {
if (is.null(locfit.h)) mylocfit<-15
else mylocfit<-locfit.h
}
### lissage et intervalles de prediction
if (length(na.omit(y)) <=4){
tabNotEnoughPoint<-bind_rows(tabNotEnoughPoint,
select(datain[datain[,loopID]==i,],
.data[[loopID]],.data[[xvar]],
.data[[trait]]))
names(tabNotEnoughPoint)<-c("Ref","x","y")
next
} else {
# modele locfit
myfm<-locfit(y ~ lp(x, h = mylocfit, deg = 2))
#-- Recuperation des predictions pour l'intervalle sur vecteur regulier
ypred<-predict(myfm,newdata=list(x=xreg),se.fit=TRUE)
lwr<-ypred[[1]]-threshold*ypred[[2]]
upr<-ypred[[1]]+threshold*ypred[[2]]
#-- Recuperation des predictions pour l'intervalle sur x observe
ypred2<-predict(myfm,newdata=list(x=x),se.fit=TRUE)
lwr2<-ypred2[[1]]-threshold*ypred2[[2]]
upr2<-ypred2[[1]]+threshold*ypred2[[2]]
#-- concatenation des resus en dataframe
tppred<-cbind.data.frame(rep(i,length(x)),x,y,ypred2[[1]],ypred2[[2]],lwr2,upr2)
tppred<-mutate(tppred,outlier=if_else(y < upr2 & y > lwr2,0,1))
tabpred<-rbind.data.frame(tabpred,tppred)
tppredreg<-cbind.data.frame(rep(i,length(xreg)),xreg,ypred[[1]],ypred[[2]],lwr,upr)
tabpredreg<-rbind.data.frame(tabpredreg,tppredreg)
}
}
names(tabpred)<-c("Ref","x","y","ypred","ypred_se","lwr","upr","outlier")
names(tabpredreg)<-c("Ref","xreg","ypred","ypred_se","lwr","upr")
return(list(tabpred,tabpredreg,tabNotEnoughPoint))
}
#' plotFlagPoint
#'
#' plot detection of outlier points in time courses
#' @param smoothin the 1st element of a flagPointLocfit() list result. smoothin contains the prediction on observed data and the detected outlier
#' @param loopID character, name of the column of the time courses' ID
#' @param myselect character, a vector of ID
#'
#' @return a graph
#' @export
#'
#' @examples
#' \donttest{
#' # Example
#' }
plotFlagPoint<-function(smoothin,loopID,myselect){
# filtering smoothin dataframe for some time courses
tmp<-smoothin[smoothin[,loopID] %in% myselect,]
g<-ggplot(data=tmp,aes(x=x,y=y)) +
geom_point() +
geom_line(aes(y=ypred,colour="red")) +
geom_line(aes(y=upr,colour="green")) +
geom_line(aes(y=lwr,colour="green")) +
geom_point(data=subset(tmp, outlier == 1),colour="blue",shape=2,size=2) +
facet_wrap(loopID)
print(g)
}
#,nbCurveByPage,xlabel
#plot(x,y,main=i,xlab=xlabel,ylab=trait,cex.main=0.7)
# vecteur pour disposition des graphs dans page
# if (nbCurveByPage ==1) mypage<-c(1,1)
# else if (nbCurveByPage ==2) mypage<-c(2,1)
# else if (nbCurveByPage %in% c(3,4)) mypage<-c(2,2)
# else if (nbCurveByPage %in% c(5,6)) mypage<-c(2,2)
# else if (nbCurveByPage %in% c(7,8)) mypage<-c(4,2)
# else if (nbCurveByPage ==9) mypage<-c(3,3)
# par(mfrow=mypage)
# Graph locfit modele + IC
#plot(x,y,main=i,xlab=xlabel,pch=19,cex.main=0.7,cex=0.8)
#lines(xreg,ypred[[1]],col="red")
#lines(xreg,upr,col="green")
#lines(xreg,lwr,col="green")
#points(filter(tppred,outlier==1)[,"x"],filter(tppred,outlier==1)[,"y"],col="blue",pch=1,cex=1.5)
#------------- Tutorial version
#' FuncDetectPointOutlierLocFit
#' @description function to model each curve of a dataset using a local regression
#' @param datain input dataframe. This dataframe contains a set of time courses
#' @param myparam character, name of the variable to model in datain
#' (for example, Biomass, PH or LA and so on)
#' @param mytime character, name of the time variable in datain which must be numeric
#' @param myid character, name of the id variable in datain
#' @param mylevel numeric, factor to calculate the confidence interval
#' @param mylocfit numeric, The constant component of the smoothing parameter. (see the locfit())
#'
#' @details see locfit() help function from the locfit R library
#' The user can act on:
#' \describe{
#' \item{mylocfit}{the constant of the smoothing parameter. Increase mylocfit to have a
#' very smooth curve}
#' \item{mylevel}{the level to calculate the confidence interval. Increase mylevel to
#' exclude less outliers}
#' }
#' @details to produce the grahics of the prediction and detected outliers, please use
#' plotDetectPointOutlierLocFit() function.
#' @return a data.frame with outlier containing the following columns:
#' \describe{
#' \item{Ref}{the id variable}
#' \item{mytime}{name of the time variable in datain}
#' \item{myparam}{name of the modeled variable in datain}
#' \item{ypred}{the locfit prediction}
#' \item{sd_ypred}{standard deviation of the prediction}
#' \item{lwr}{lower bound of the confidence interval}
#' \item{upr}{upper bound of the confidence interval}
#' \item{outlier}{flag of detected outlier (0 is outlier, 1 is not)}
#' }
#' @export
#'
#' @importFrom dplyr select_ bind_rows mutate if_else
#'
#' @examples
#' \donttest{
#' data(plant1)
#' library(locfit)
#' selec<-c("manip1_1_1_WW","manip1_1_2_WW","manip1_1_3_WW",
#' "manip1_1_4_WW","manip1_1_5_WW")
#' mydata<-plant1[plant1[,"Ref"] %in% selec,]
#' resu<-FuncDetectPointOutlierLocFit(datain=mydata,
#' myparam="biovolume",mytime="thermalTime",
#' myid="Ref",mylevel=5,mylocfit=70)
#' }
#-------------------------------------------------------------------
FuncDetectPointOutlierLocFit <-function(datain,
myparam,
mytime,
myid,
mylevel,
mylocfit) {
myfm<-list() # initialize a list
tabpred<-NULL # initialize a dataframe
for (i in levels(factor(datain[,myid]))) {
tmp <- datain[datain[,myid]==i,]
tmp <- na.omit(tmp)
y <- tmp[,myparam]
x <- tmp[,mytime]
xpred <- seq(0,max(x,na.rm=TRUE),1)
# Choice of step according to the number of data
if (length(x) <=6) {
print("Not enough data (<=6 points): ",i)
next
} else {
myfm <- locfit(y ~ lp(x, h = mylocfit, deg = 2))
# Retrieving predictions for the interval for a fine graphic
ypred <- predict(myfm,newdata=list(x=xpred),se.fit=TRUE)
lwr <- ypred[[1]]-mylevel*ypred[[2]]
upr <- ypred[[1]]+mylevel*ypred[[2]]
# Retrieving predictions for the x input interval
ypred2 <- predict(myfm,newdata=list(x=x),se.fit=TRUE)
lwr2 <- ypred2[[1]] - mylevel*ypred2[[2]]
upr2 <- ypred2[[1]] + mylevel*ypred2[[2]]
# joining results into dataframe
tppred <- cbind.data.frame(rep(i,length(x)),x,y,ypred2[[1]],ypred2[[2]],lwr2,upr2)
tppred <- mutate(tppred,outlier=if_else(y < upr2 & y > lwr2,0,1))
tabpred <- rbind.data.frame(tabpred,tppred)
}
}
names(tabpred)<-c(myid,mytime,myparam,"ypred","sd_ypred","lwr","upr","outlier")
return(tabpred)
}
#---------------------------- End of file --------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.