# R/fregre.np.r In fda.usc: Functional Data Analysis and Utilities for Statistical Computing

#### Documented in fregre.np

#' @title Functional regression with scalar response using non-parametric kernel
#' estimation
#'
#' @description Computes functional regression between functional explanatory variables and
#' scalar response using kernel estimation.
#'
#' @details The non-parametric functional regression model can be written as follows \deqn{y_i =r(X_i)+\epsilon_i}{ y
#' = r(X) + \epsilon} where the unknown smooth real function \eqn{r} is
#' estimated using kernel estimation by means of
#' \deqn{\hat{r}(X)=\frac{\sum_{i=1}^{n}{K(h^{-1}d(X,X_{i}))y_{i}}}{\sum_{i=1}^{n}{K(h^{-1}d(X,X_{i}))}}}{\hat{r}(X)=(\sum_i
#' K(d(X,X_i))y_i/h) / (\sum_i K(d(X,X_i)/h)) i=1,...,n} where \eqn{K} is an
#' kernel function (see \code{Ker} argument), \code{h} is the smoothing
#' parameter and \eqn{d} is a metric or a semi-metric (see \code{metric}
#' argument).
#'
#' The distance between curves is calculated using the \code{\link{metric.lp}}
#' although any other semimetric could be used (see
#' The kernel is applied to a metric or semi-metrics that provides non-negative
#' values, so it is common to use asymmetric kernels. Different asymmetric
#' kernels can be used, see \code{\link{Kernel.asymmetric}}.\cr
#'
#' @param fdataobj \code{\link{fdata}} class object.
#' @param y Scalar response with length \code{n}.
#' @param h Bandwidth, \code{h>0}. Default argument values are provided as the
#' 5\%--quantile of the distance between \code{fdataobj} curves, see
#' @param Ker Type of asymmetric kernel used, by default asymmetric normal
#' kernel.
#' @param metric Metric function, by default \code{\link{metric.lp}}.
#' @param type.S Type of smothing matrix \code{S}. By default \code{S} is
#' calculated by Nadaraya-Watson kernel estimator (\code{S.NW}).
#' @param par.S List of parameters for \code{type.S}: \code{w}, the weights.
#' @param \dots Arguments to be passed for \code{\link{metric.lp}} o other
#' metric function.
#' @return Return:
#' \itemize{
#' \item {call}{ The matched call.}
#' \item {fitted.values}{ Estimated scalar response.}
#' \item {H}{ Hat matrix.}
#' \item {residuals}{ \code{y} minus \code{fitted values}.}
#' \item {df.residual}{ The residual degrees of freedom.}
#' \item {r2}{ Coefficient of determination.}
#' \item {sr2}{ Residual variance.}
#' \item {y}{ Response.}
#' \item {fdataobj}{ Functional explanatory data.}
#' \item {mdist}{ Distance matrix between \code{x} and \code{newx}.}
#' \item {Ker}{ Asymmetric kernel used.}
#' \item {h.opt}{ smoothing parameter or' bandwidth.}
#' }
#' @author Manuel Febrero-Bande, Manuel Oviedo de la Fuente
#' \email{manuel.oviedo@@udc.es}
#' @references Ferraty, F. and Vieu, P. (2006). \emph{Nonparametric functional
#' data analysis.} Springer Series in Statistics, New York. \cr
#'
#' Febrero-Bande, M., Oviedo de la Fuente, M. (2012).  \emph{Statistical
#' Computing in Functional Data Analysis: The R Package fda.usc.} Journal of
#' Statistical Software, 51(4), 1-28. \url{https://www.jstatsoft.org/v51/i04/}
#'
#' Hardle, W. \emph{Applied Nonparametric Regression}. Cambridge University
#' Press, 1994.
#' @keywords regression
#' @examples
#' \dontrun{
#' data(tecator)
#' absorp=tecator$absorp.fdata #' ind=1:129 #' x=absorp[ind,] #' y=tecator$y$Fat[ind] #' #' res.np=fregre.np(x,y,Ker=AKer.epa) #' summary(res.np) #' res.np2=fregre.np(x,y,Ker=AKer.tri) #' summary(res.np2) #' #' # with other semimetrics. #' res.pca1=fregre.np(x,y,Ker=AKer.tri,metri=semimetric.pca,q=1) #' summary(res.pca1) #' res.deriv=fregre.np(x,y,metri=semimetric.deriv) #' summary(res.deriv) #' x.d2=fdata.deriv(x,nderiv=1,method="fmm",class.out='fdata') #' res.deriv2=fregre.np(x.d2,y) #' summary(res.deriv2) #' x.d3=fdata.deriv(x,nderiv=1,method="bspline",class.out='fdata') #' res.deriv3=fregre.np(x.d3,y) #' summary(res.deriv3) #' } #' #' @export fregre.np<-function(fdataobj,y,h=NULL,Ker=AKer.norm, metric=metric.lp,type.S=S.NW,par.S=list(w=1),...){ if (!is.fdata(fdataobj)) fdataobj=fdata(fdataobj) isfdata<-is.fdata(y) nas<-is.na.fdata(fdataobj) nas.g<-is.na(y) if (is.null(names(y))) names(y)<-seq_len(length(y)) if (any(nas) & !any(nas.g)) { bb<-!nas cat("Warning: ",sum(nas)," curves with NA are omited\n") fdataobj$data<-fdataobj$data[bb,] y<-y[bb] } else { if (!any(nas) & any(nas.g)) { cat("Warning: ",sum(nas.g)," values of group with NA are omited \n") bb<-!nas.g fdataobj$data<-fdataobj$data[bb,] y<-y[bb] } else { if (any(nas) & any(nas.g)) { bb<-!nas & !nas.g cat("Warning: ",sum(!bb)," curves and values of group with NA are omited \n") fdataobj$data<-fdataobj$data[bb,] y<-y[bb] } }} x<-fdataobj[["data"]] #tt<-fdataobj[["argvals"]] #rtt<-fdataobj[["rangeval"]] C<-match.call() mf <- match.call(expand.dots = FALSE) m<-match(c("fdataobj", "y","h","Ker","metric","type.S","par.S"),names(mf),0L) # if (is.vector(x)) x <- t(data.matrix(x)) n = nrow(x) np <- ncol(x) if (!isfdata) { if (n != (length(y))) stop("ERROR IN THE DATA DIMENSIONS") if (is.null(rownames(x))) rownames(x) <- 1:n if (is.null(colnames(x))) colnames(x) <- 1:np if (is.vector(y)) y.mat<-matrix(y,ncol=1) else y.mat<-data.matrix(y) ny = nrow(y.mat) npy <- ncol(y.mat) } else { tty<-y$argvals
rtty<-y$rangeval y.mat<-y$data
ny = nrow(y.mat)
npy <- ncol(y.mat)
if (n != ny | npy!=np)         stop("ERROR IN THE DATA DIMENSIONS")
}

if (is.matrix(metric)) mdist<-metric
else mdist=metric(fdataobj,fdataobj,...)
#ke<-deparse(substitute(Ker))
#if (!is.function(Ker)) Ker<-get(Ker)

# if (is.character(Ker)){  nker <- function(u,mik=Ker){get(mik)(u)}
# } else {  nker <- function(u,mik=Ker){mik(u)} }

ty<-deparse(substitute(type.S))
attr(par.S, "call") <- ty
#print(h)
if (is.null(h)) {
#      nker=get(paste0("Ker.",unlist(strsplit(deparse(substitute(Ker)),"[.]"))[2]))
h=h.default(fdataobj,prob=0.1,len=1,metric = mdist, type.S = ty, Ker=Ker,...)
}
par.S$tt<-mdist if (is.null(par.S$Ker))  par.S$Ker<-Ker if (is.null(par.S$h))  par.S$h<-h #if (type.S=="S.KNN") par.S$cv<-TRUE
H=do.call(type.S,par.S)
par.S$cv<-TRUE H.cv=do.call(type.S,par.S) # for (j in 1:npy) { # y.est[,j]=H%*%y.mat[,j] # y.est.cv[,j]=H.cv%*%y.mat[,j] # } df=trace.matrix(H) yp=H%*%y.mat yp2<-H.cv%*%y.mat^2-(H.cv%*%y.mat)^2 if (isfdata) { yp<-fdata(yp,tty,rtty,names=y$names)
#      yp.cv<-fdata(y.est.cv,tty,rtty,names=y$names) rownames(yp$data)=rownames(y$data) # rownames(yp.cv$data)=rownames(y$data) ydif<-y-yp # ydif.cv<-y-yp.cv e<-y-yp # ecv<-y-yp.cv # sr2=sum(e^2)/(n-df) ycen=fdata.cen(y)$Xcen
#  	  r2=1-sum(e^2)/sum(ycen^2)
norm.e<-norm.fdata(e,metric=metric,...)^2
sr2=sum(norm.e)/(n-df)
ycen=fdata.cen(y)\$Xcen
r2=1-sum(norm.e)/sum(ycen^2)
out<-list("call"=C,"fitted.values"=yp,"H"=H,"residuals"=e,"df.residual"=df,"r2"=r2,
"sr2"=sr2,"y"=y,"fdataobj"=fdataobj,"mdist"=mdist,"Ker"=Ker,
"metric"=metric,"type.S"=type.S,"par.S"=par.S,"h.opt"=h,"m"=m)
}
else {
yp<-drop(yp)
y<-drop(y)
e<-y-yp
names(e)<-rownames(x)
sr2=sum(e^2)/(n-df)
ycen=y-mean(y)
r2=1-sum(e^2)/sum(ycen^2)
out<-list("call"=C,"fitted.values"=yp,"H"=H,"residuals"=e,"df.residual"=df,"r2"=r2,
"sr2"=sr2,"y"=drop(y),"fdataobj"=fdataobj,"mdist"=mdist,"Ker"=Ker,
"metric"=metric,"type.S"=type.S,"par.S"=par.S,"h.opt"=h,"m"=m,var.y=yp2)
}
class(out) <- "fregre.fd"
return(out)
}


## Try the fda.usc package in your browser

Any scripts or data that you put into this service are public.

fda.usc documentation built on Oct. 17, 2022, 9:06 a.m.