#' List ids of individuals to be dropped from height modeling
#'
#' This function returns a vector containing ids of individuals with incomplete
#' stature measurements.
#'
#' @return Vector of ids
#' @export
growthfd.bgs.dropoutsIds.Height <- function() {
return(c(
0,2,3,4,
11,12,14,18,
25,28,
44,47,49,
58,
70,71,78,
83,85,86,87,88,
91,93,96,
100,104,105,
116,
120,123,124,127,
130,133,
144,148,149,
152,155,156,157,
166,169,
174,
182,183,184,186,
191,194,
202,207,
213,214,218,219,
222,228,
231,234,236,
240,243,244,249,
251,253,
261,265,
275,278,
280,283,285,
290,292,294,297,
310,311,318,319,
322,324,
332,
344,347,
355,356,
365,
372,
388,
394,396,397,
401,402,
415,418,
420,425,
432,
440,443,
457,
461,462,464,
471,
487,
516,
521,522,523,
531,538
)
)
}
#' List ages of measurements
#'
#' This function returns a vector of ages when the measurements were performed.
#'
#' @return Vector of ages
#' @export
growthfd.bgs.measurementsAge <- function() {
return(c(0, 0.25, 0.5, 0.75, seq(1, 18, 0.5)))
}
#' Gather selected columns
#'
#' Selects columns with given prefix for supplied ages and gathers them into
#' a matrix together with id and sex data
#'
#' @param data BGS data
#' @param prefix Columns prefix
#' @param age Vector containing ages (optional)
#' @return Gathered data
#' @importFrom dplyr %>%
#' @export
growthfd.bgs.gather <- function(data, prefix='vysk', age=NULL) {
if(is.null(age)) {
age <- growthfd.bgs.measurementsAge()
}
n <- sprintf('%s%.2d%.2d', rep(prefix, length(age)), floor(age), (age%%1)*1e2)
s <- subset(data, select=c('id', 'sex', n)) %>% tidyr::gather(agecat, value, n)
s <- s[order(s$id),]
s$age <- rep(age, dim(data)[1])
return(s)
}
#' Estimate NA values
#'
#' Interpolates missing values using spline method. Interpolates data
#' from the 'value' column and join them as the 'valuei' column.
#'
#' @param gatheredData Data in gathered form
#' @return Interpolated data
#' @export
growthfd.bgs.interpolateNAs <- function(gatheredData) {
ids <- unique(gatheredData$id)
gatheredData$valuei <- rep(NA, length(gatheredData$value))
for(id in ids) {
rows <- gatheredData$id == id
value <- gatheredData$value[rows]
gatheredData$valuei[rows] <- imputeTS::na_interpolation(value, option = "stine")
}
return(gatheredData)
}
#' Resample the data
#'
#' Resamples the data without NA values to fine grid.
#'
#' @param interpolatedData Data to be resampled.
#' @return Resampled data
#' @export
growthfd.bgs.resample <- function(interpolatedData) {
result <- list()
ids <- unique(interpolatedData$id)
for(id in ids) {
rows <- interpolatedData$id == id
br <- zoo::zoo(interpolatedData$valuei[rows], interpolatedData$age[rows])
t <- time(br)
t.4iy <- seq(from = min(t), to=max(t),by=0.05)
dummy <- zoo::zoo(,t.4iy)
br.interpolated <- merge(br,dummy,all=TRUE)
br.spl <- zoo::na.spline(br.interpolated, method = "natural")
w.spl <- time(br.spl)
Tset.spl <- t.4iy
brs.spl <- br.spl[w.spl %in% Tset.spl]
age <- as.vector(time(brs.spl))
result[[id]] <- cbind('id'=rep(id,length(age)),'age'=age,'value'=as.vector(brs.spl))
}
return(do.call(rbind, result))
}
#' Fit the monotone spline
#'
#' This function fit the monotone splines to the data.
#'
#' @param resampledData Data to be interpolated by monotone fda splines
#' @return Object with fitted splines
#' @example man/examples/train.R
#' @export
growthfd.bgs.smooth <- function(resampledData, monotone=T, norder=6, Lfdobj=3, lambda=5e-2) {
age <- unique(resampled[,'age'])
values <- resampledData[,'value']
ncases <- length(unique(resampledData[,'id']))
dim(values) <- c(length(age), ncases)
rng <- c(min(age),max(age))
nage <- length(age)
nbasis <- nage + norder - 2
wbasis <- fda::create.bspline.basis(rng, nbasis, norder, age)
cvec0 <- matrix(0,nbasis,ncases)
Wfd0 <- fda::fd(cvec0, wbasis)
growfdPar <- fda::fdPar(Wfd0, Lfdobj, lambda)
wgt <- rep(1, nage)
return(if(monotone) {
fda::smooth.monotone(age, values, growfdPar, wgt, conv=0.1)
} else {
fda::smooth.basis(age, values, growfdPar, wgt)
})
}
#' Evaluate monotone fda splines
#'
#' This function evaluates the set of monotone splines.
#'
#' @param fda Fda object
#' @param age Vector of ages to be evaluated
#' @param deriv Derivative of the curve
#' @return Matrix of evaluated points
#' @export
growthfd.bgs.evalMonotone <- function(fda, age, deriv=0) {
n <- length(age)
m <- dim(fda$beta)[2]
result <- matrix(NA, n, m)
for(i in seq(m)) {
if(deriv == 0) {
result[,i] <- fda$beta[1,i] + fda$beta[2,i]*fda::eval.monfd(age, fda$Wfdobj[i])
} else {
result[,i] <- fda$beta[2,i]*fda::eval.monfd(age, fda$Wfdobj[i], deriv)
}
}
return(result)
}
#' Evaluate general fda splines
#'
#' This function evaluates non monotone fda splines.
#'
#' @param fda Fda object
#' @param age Vector of ages to be evaluated
#' @param deriv Derivative of the curve
#' @return Matrix of evaluated points
#' @export
growthfd.bgs.eval <- function(fda, age, deriv=0) {
return(fda::eval.fd(age, fda, deriv))
}
#' Find apvs on growth curves
#'
#' This function finds ages of maximal growth velocity on the velocity curves.
#'
#' @param age Vector of ages
#' @param velocity Matrix of velocity curves
#' @param ids Vector of individuals' ids
#' @param limits List of limits
#' @return Vector of apv values
#' @export
growthfd.bgs.apvs <- function(age, velocity) {
n <- dim(velocity)[2]
result <- rep(NA, n)
for(col in seq(n)) {
xyi <- data.frame(x=age, y=velocity[,col])
result[col] <- sitar::getPeak(xyi)[1]
}
return(result)
}
#' Plot all individual curves to pdf
#'
#' Plots value, velocity and acceleration curves together with apvs
#' and measured data points into separate figure for each individual. All plots
#' are stored into a single pdf file, one figure per page.
#'
#' @param age Vector of ages
#' @param ids Vector containing ids
#' @param apvs Vector containing apv for each individual
#' @param values Matrix with value curves
#' @param vel Matrix with velocity curves
#' @param values Matrix with acceleration curves
#' @param data Matrix with original data points
#' @param filename File name of the output pdf
#' @export
growthfd.bgs.plotIndividuals <- function(age, ids, apvs, values, vel, acc, data, filename="plots.pdf") {
n <- length(ids)
pdf(filename)
for(i in seq(n)) {
df <- data.frame('age'=age, 'v'=values[,i], 'vel'=vel[,i], 'acc'=acc[,i])
rows <- data$id == ids[i]
dfPts <- data.frame('age' = data$age[rows], 'v' = data$value[rows])
p <- ggplot2::ggplot(data = df) +
ggplot2::geom_line(ggplot2::aes(x=age,y=v)) +
ggplot2::geom_line(ggplot2::aes(x=age,y=acc)) +
ggplot2::geom_line(ggplot2::aes(x=age,y=vel)) +
ggplot2::geom_vline(xintercept = apvs[i]) +
ggplot2::geom_point(data=dfPts, ggplot2::aes(x=age, y=v)) +
ggplot2::ggtitle(ids[i])
print(p)
}
dev.off()
}
#' Plot individual points to pdf
#'
#' Plots measured data points into separate figure for each individual. All plots
#' are stored into a single pdf file, one figure per page.
#'
#' @param age Vector of ages
#' @param ids Vector containing ids
#' @param apvs Vector containing apv for each individual
#' @param values Matrix with value curves
#' @param vel Matrix with velocity curves
#' @param values Matrix with acceleration curves
#' @param data Matrix with original data points
#' @param filename File name of the output pdf
#' @export
growthfd.bgs.plotIndividualsPoints <- function(ids, data, filename="plots_points.pdf") {
n <- length(ids)
pdf(filename)
for(i in seq(n)) {
rows <- data$id == ids[i]
dfPts <- data.frame('age' = data$age[rows], 'v' = data$value[rows])
p <- ggplot2::ggplot(data = dfPts) +
ggplot2::geom_point(ggplot2::aes(x=age, y=v)) +
ggplot2::ggtitle(ids[i])
print(p)
}
dev.off()
}
#' Plot curves in one figure
#'
#' Plots all curves from given matrix into a single figure.
#'
#' @param age Vector of ages
#' @param values Matrix containing curves as columns
#' @param xlimit Limits for the x axis
#' @param ylimit Limits for the y axis
#' @return GGPlot2 plot
#' @export
growthfd.bgs.plotAll <- function(age, values, xlimit=NULL, ylimit=NULL) {
d <- dim(values)
dim(values) <- c(prod(dim(values)), 1)
df <- data.frame(age=rep(age, d[2]), values=values, i=factor(rep(seq(d[2]), each=d[1])))
result <- ggplot2::ggplot(data = df) +
ggplot2::geom_line(ggplot2::aes(x=age,y=values,group=i,colour=i),show.legend = FALSE)
if(!is.null(xlimit)) {
result <- result + ggplot2::xlim(xlimit)
}
if(!is.null(ylimit)) {
result <- result + ggplot2::ylim(ylimit)
}
return(result)
}
#' Register curves to the apvs
#'
#' Calculates the time warping functions with respect to the supplied apvs
#' and the growth curves for the final refinement.
#'
#' @param fdaObject Fda object contaning the curves
#' @param apvs Vector containing the respective apvs
#' @return FDA object containing the time warping functions
#' @export
growthfd.bgs.registerCurvesToApvs <- function(fdaObject, apvs) {
hgtfhatfd = fdaObject$yhatfd;
accelfdUN = fda::deriv.fd(hgtfhatfd, 2)
accelmeanfdUN = fda::mean.fd(accelfdUN)
PGSctr=apvs
PGSctrmean = mean(PGSctr)
ncases <- length(apvs)
# wbasisLM = fda::create.bspline.basis(c(0,18), 4, 3, c(0,PGSctrmean,18))
wbasisLM = fda::create.bspline.basis(c(min(fdaObject$argvals),max(fdaObject$argvals)), 4, 3, c(min(fdaObject$argvals),PGSctrmean,max(fdaObject$argvals)))
WfdLM = fda::fd(matrix(0,4,1),wbasisLM)
WfdParLM = fda::fdPar(WfdLM,1,1e-12)
regListLM = fda::landmarkreg(accelfdUN, PGSctr, PGSctrmean, WfdParLM, TRUE)
accelfdLM = regListLM$regfd
accelmeanfdLM = fda::mean.fd(accelfdLM)
warpfdLM = regListLM$warpfd
WfdLM = regListLM$Wfd
# wbasisCR = fda::create.bspline.basis(c(0,18), 15, 5)
wbasisCR = fda::create.bspline.basis(c(min(fdaObject$argvals),max(fdaObject$argvals)), 15, 5)
Wfd0CR = fda::fd(matrix(0,15,ncases),wbasisCR)
WfdParCR = fda::fdPar(Wfd0CR, 1, 1)
regList = fda::register.fd(accelmeanfdLM,accelfdLM, WfdParCR)
warpfdCR = regList$warpfd
return(fda::register.newfd(warpfdLM, warpfdCR))
}
growthfd.bgs.registerCurvesToApvs.nonmonotone <- function(fdaObject, apvs) {
hgtfhatfd = fdaObject$fd;
accelfdUN = fda::deriv.fd(hgtfhatfd, 2)
accelmeanfdUN = fda::mean.fd(accelfdUN)
PGSctr=apvs
PGSctrmean = mean(PGSctr)
ncases <- length(apvs)
# wbasisLM = fda::create.bspline.basis(c(0,18), 4, 3, c(0,PGSctrmean,18))
wbasisLM = fda::create.bspline.basis(c(min(fdaObject$argvals),max(fdaObject$argvals)), 4, 3, c(min(fdaObject$argvals),PGSctrmean,max(fdaObject$argvals)))
WfdLM = fda::fd(matrix(0,4,1),wbasisLM)
WfdParLM = fda::fdPar(WfdLM,1,1e-12)
regListLM = fda::landmarkreg(accelfdUN, PGSctr, PGSctrmean, WfdParLM, TRUE)
accelfdLM = regListLM$regfd
accelmeanfdLM = fda::mean.fd(accelfdLM)
warpfdLM = regListLM$warpfd
WfdLM = regListLM$Wfd
# wbasisCR = fda::create.bspline.basis(c(0,18), 15, 5)
wbasisCR = fda::create.bspline.basis(c(min(fdaObject$argvals),max(fdaObject$argvals)), 15, 5)
Wfd0CR = fda::fd(matrix(0,15,ncases),wbasisCR)
WfdParCR = fda::fdPar(Wfd0CR, 1, 1)
regList = fda::register.fd(accelmeanfdLM,accelfdLM, WfdParCR)
warpfdCR = regList$warpfd
return(fda::register.newfd(warpfdLM, warpfdCR))
}
#' Register curves on apvs and atfs
#'
#' Computes the time-warping functions based on landmark-based registration
#' apv and atf points to the population means
#'
#' @param fdaObject Fda object contaning the curves
#' @param apvs Vector containing the respective apvs
#' @param atfs Vector containing the atfs
#' @return FDA object containing the time warping functions
#' @export
growthfd.bgs.registerCurvesToApvsAtfs <- function(fdaObject, apvs, atfs) {
hgtfhatfd = fdaObject$yhatfd;
accelfdUN = fda::deriv.fd(hgtfhatfd, 2)
accelmeanfdUN = fda::mean.fd(accelfdUN)
PGSctr=apvs
PGSctrmean = mean(PGSctr)
ncases <- length(apvs)
wbasisLM = fda::create.bspline.basis(c(0,18), 4, 3, c(0,PGSctrmean,18))
WfdLM = fda::fd(matrix(0,4,1),wbasisLM)
WfdParLM = fda::fdPar(WfdLM,1,1e-12)
regListLM = fda::landmarkreg(accelfdUN, PGSctr, PGSctrmean, WfdParLM, TRUE)
}
#' Compute inverse time-warping functions
#'
#' Computes inverse for given functions.
#'
#' @param age Vector of ages
#' @param tw Fda object containing the time-warping functions
#' @return Fda object containing the inverse functions
#' @export
growthfd.bgs.invertTw <- function(age, tw) {
values=fda::eval.fd(age, tw)
values[1,] = min(age)
values[length(age),] = max(age)
rng <-c(min(age),max(age))
nage <- dim(values)[1]
ncases <- dim(values)[2]
norder <- 6
nbasis <- nage + norder - 2
Lfdobj <- 3
lambda <- 5e-2
d <- matrix(nrow = nage, ncol = ncases)
for(i in 1:ncases) {
wbasis <- fda::create.bspline.basis(rangeval=rng, nbasis=nbasis, norder=norder, breaks=values[,i])
cvec0 <- matrix(0,nbasis,1)
Wfd0 <- fda::fd(cvec0, wbasis)
growfdPar <- fda::fdPar(Wfd0, Lfdobj, lambda)
d[,i] <- fda::eval.fd(age, smooth.basis(values[,i], age, growfdPar)$fd)
}
wbasis <- fda::create.bspline.basis(rangeval=rng, nbasis=nbasis, norder=norder, breaks=age)
Wfd0 <- fda::fd(cvec0, wbasis)
growfdPar <- fda::fdPar(Wfd0, Lfdobj, lambda)
return(fda::smooth.basis(age, d, growfdPar))
}
#' Create FPCA growth model
#'
#' Creates FPCA growth model from fda objects of growth functions registered
#' on apv and inverse time warping functions.
#'
#' @param ampitude Fda object of registered growth functions
#' @param itw Fda object of inverse time warping functions
#' @param nharm Number of harmonic functions for each fpca
#' @return FPCA growth model
#' @export
growthfd.bgs.model <- function(amplitude, itw, nharm=6) {
model<-list();
model$growthfpca <- fda::pca.fd(amplitude,nharm=nharm);
model$warpfpca <- fda::pca.fd(itw, nharm=nharm);
model$scores.elements <- c(
dim(model$warpfpca$scores)[2],
dim(model$growthfpca$scores)[2]
)
return(model)
}
#' Plot eigenwarps and eigenamplitudes of the model
#'
#' Plots the effects of individual parameters on the curve shape.
#'
#' @param model FPCA growth model
#' @param deriv Derivation of the growth curve
#' @param ylim Limits the scale of y axis
#' @export
growthfd.plotwarps <-function(model, deriv=0, ylim=NULL, age = seq(0,18,.05)) {
windows(width=7,height=12,rescale=c("fixed"))
nwarpscores = model$scores.elements[1];
nparams = sum(model$scores.elements) + 2;
par(mfrow=c(ceiling(.25*nparams),3))
for(i in 1:sum(model$scores.elements)) {
params <- rep(0, nparams);
plot(x=age, y=growthfd.evaluate(age, params, model, deriv), ylim=ylim, cex=0.3,pch=19, col="black");
params[i] = 3;
points(x=age, y=growthfd.evaluate(age, params, model, deriv), ylim=ylim, cex=0.3,pch=19, col="red");
params[i] = -3;
points(x=age, y=growthfd.evaluate(age, params, model, deriv), ylim=ylim, cex=0.3,pch=19, col="blue");
}
}
#' Preprocess a digit data
#'
#' Selects data with minimal count of measurements and convert them into a data frame.
#'
#' @param data A csv table containing the measurements
#' @param colName Name of the particular column with the length data
#' @param minCount Minimal count of measurements
#' @param ncores Number of cores used for parallelisation
#' @return Data frame with uniformly distributed data
#' @export
#' @example man/examples/digits.R
growthfd.digits <- function(data, colName, minCount = 9, ncores = 4) {
# select data column
col <- which(names(data) == colName)
data <- data[, c(2,6,col)]
# omit incomplete observations
data <- data[stats::complete.cases(data), ]
# remove duplicated data and convert to data frame
dup <- duplicated(data[,c(1,2)])
message('Duplicated data')
data_dup <- data[dup,]
print(data_dup)
data <- data[!dup,]
# select ids with minimal count of measurements equal or higher than minCount
cnt <- aggregate(data$ind, by=list(data$ind), FUN=length)
ids <- cnt[cnt[, 2] >= minCount, 1]
# select chosen ids
data <- data[data$ind %in% ids,]
df <- as.data.frame(data)
names(df)[3] <- "value"
res <- santaR::santaR_auto_fit(inputData = df, ind=df$ind, time=df$age, df=8, ncores=ncores)
x <- res$value$groups[[1]]$groupMeanCurve$x
s <- seq(min(x), max(x), (max(x) - min(x)) / 100)
y <- list()
acceptedIds <- c()
for(i in seq(length(ids))) {
if(!is.null(res$value$groups[[1]]$curveInd[[i]])) {
p <- stats::predict(res$value$groups[[1]]$curveInd[[i]], s)
y <- append(y, p$y)
acceptedIds <- c(acceptedIds, ids[i])
}
else {
message(sprintf('Rejected id: %d', ids[i]))
}
}
return(list(data.frame(id = rep(acceptedIds, each=length(s)),
age = rep(s, length(acceptedIds)),
valuei = unlist(y)),
setdiff(ids, acceptedIds),
data_dup)
)
}
growthfd.digits.spline <- function(data, colName, minCount = 9) {
# select data column
col <- which(names(data) == colName)
data <- data[, c(2,6,col)]
# omit incomplete observations
data <- data[stats::complete.cases(data), ]
# remove duplicated data and convert to data frame
dup <- duplicated(data[,c(1,2)])
message('Duplicated data')
data_dup <- data[dup,]
print(data_dup)
data <- data[!dup,]
# select ids with minimal count of measurements equal or higher than minCount
cnt <- aggregate(data$ind, by=list(data$ind), FUN=length)
ids <- cnt[cnt[, 2] >= minCount, 1]
# select chosen ids
msk <- data$ind %in% ids
data <- data[msk,]
min_x <- 7.5 # min(data$age)
max_x <- 20 # max(data$age)
s <- seq(min_x, max_x, (max_x - min_x) / 100)
y <- list()
acceptedIds <- c()
for(i in seq(length(ids))) {
msk <- data$ind == ids[i]
i_x <- data[msk, 2]
i_y <- data[msk, 3]
sp <- stats::smooth.spline(i_x,i_y,df=length(i_x)-1)
p <- stats::predict(sp, s)
y <- append(y, p$y)
acceptedIds <- c(acceptedIds, ids[i])
}
return(list(data.frame(id = rep(acceptedIds, each=length(s)),
age = rep(s, length(acceptedIds)),
valuei = unlist(y)),
setdiff(ids, acceptedIds),
data_dup)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.