Nothing
#' Estimates in an admixture using Patra and Sen approach
#'
#' Estimation of both the weight and the distribution of the unknown component in an admixture model, by Patra and Sen approach.
#' Remind that the admixture probability density function (pdf) l is given by
#' l = p*f + (1-p)*g,
#' where g is the known component of the two-component mixture, p is the unknown proportion of the unknown component distribution f.
#' More information in 'Details' below concerning the estimation method.
#'
#' @param samples Sample to be studied.
#' @param admixMod An object of class \link[admix]{admix_model}, containing information about the known component distribution and its parameter(s).
#' @param method One of 'lwr.bnd', fixed' or 'cv': depending on whether compute some lower bound of the mixing proportion, the estimate
#' based on the value of 'c.n' or use cross-validation for choosing 'c.n' (tuning parameter).
#' @param c.n (default to NULL) A positive number for the penalization, see reference below. If NULL, equals to 0.1*log(log(n)).
#' @param folds (optional, default to 10) Number of folds used for cross-validation.
#' @param reps (optional, default to 1) Number of replications for cross-validation.
#' @param cn.s (optional) A sequence of 'c.n' to be used for cross-validation (vector of values). Default is equally
#' spaced grid of 100 values between .001 x log(log(n)) and 0.2 x log(log(n)).
#' @param cn.length (optional, default to 100) Number of equally spaced tuning parameter (between .001 x log(log(n)) and 0.2 x log(log(n))).
#' Values to search from.
#' @param gridsize (default to 600) Number of equally spaced points (between 0 and 1) to evaluate the distance function.
#' Larger values are more computationally intensive but also lead to more accurate estimates.
#'
#' @references
#' \insertRef{PatraSen2016}{admix}
#'
#' @return An object of class \link[admix]{estim_PS}, containing 10 attributes: 1) the number of samples studied (1 in this case); 2) the sample
#' size; 3) the information about component distributions of the admixture model; 4) the estimation method 5patra and Sen here);
#' 5) the estimated mixing weight (estimate of the unknown component proportion); 6) the estimated decontaminated CDF;
#' 7) an object of the class 'dist.fun' (that gives the distance); 8) the tuning parameter 'c.n'; 9) the lower bound of the
#' estimated mixing proportion (if such an option has been chosen); 10) the number of observations.
#'
#' @seealso [print.estim_PS()] for printing a short version of the results from this estimation method,
#' and [summary.estim_PS()] for more comprehensive results.
#'
#' @examples
#' \dontrun{
#' ## Simulate mixture data:
#' mixt1 <- twoComp_mixt(n = 800, weight = 0.33,
#' comp.dist = list("gamma", "exp"),
#' comp.param = list(list("shape" = 2, "scale" = 0.5),
#' list("rate" = 0.25)))
#' data1 <- getmixtData(mixt1)
#' ## Define the admixture model:
#' admixMod1 <- admix_model(knownComp_dist = mixt1$comp.dist[[2]],
#' knownComp_param = mixt1$comp.param[[2]])
#' ## Estimation step:
#' estim_PS(samples = data1, admixMod = admixMod1, method = 'fixed')
#' }
#'
#' @author Xavier Milhaud <xavier.milhaud.research@gmail.com>
#' @keywords internal
estim_PS <- function(samples, admixMod, method = c("fixed", "lwr.bnd", "cv"),
c.n = 0.1*log(log(length(samples))), folds = 10, reps = 1,
cn.s = NULL, cn.length = 100, gridsize = 1200)
{
if (!inherits(x = admixMod, what = "admix_model"))
stop("Argument 'admixMod' is not correctly specified. See ?admix_model.")
if (!is.vector(samples)) stop("'samples' has to be a numerical vector.")
if (is.null(method)) stop("'method' cannot be NULL")
n <- length(samples)
## Transform the known component distribution into a Uniform distribution:
samples <- knownComp_to_uniform(samples, admixMod)
method <- match.arg(method)
if (method == 'lwr.bnd') {
dist.out <- PS_dist_calc(samples, gridsize = gridsize)
q <- 0.6792
alp.Lwr <- sum(dist.out$distance > q/ sqrt(n)) / gridsize
alp.hat <- NULL
Fs.hat.fun <- NULL
c.n <- NULL
} else {
alp.Lwr <- NULL
}
if (method == "fixed"){
if (is.null(c.n)) {
warning("\n'c.n' is not given. Fixing it to be '0.1*log(log(n))\n")
c.n <- 0.1 *log(log(n))
}
dist.out <- PS_dist_calc(samples, gridsize = gridsize)
alp.hat <- sum(dist.out$distance > c.n/ sqrt(n)) / gridsize
if (alp.hat > 0) {
F.hat <- (dist.out$F.n - (1-alp.hat)*dist.out$F.b) / alp.hat # computes the naive estimator of F_s
Fs.hat <- Iso::pava(F.hat, dist.out$Freq, decreasing = FALSE) # computes the Isotonic Estimator of F_s
Fs.hat[which(Fs.hat <= 0)] <- 0
Fs.hat[which(Fs.hat >= 1)] <- 1
} else {
Fs.hat <- dist.out$F.b
}
Fs.hat.fun <- NULL
Fs.hat.fun$y <- Fs.hat
Fs.hat.fun$x <- dist.out$F.n.x
} else if (method == "cv") {
out.cv <- estimCV_PS(samples, admixMod, folds = folds, reps = reps,
cn.s = cn.s, cn.length = cn.length, gridsize = gridsize)
alp.hat <- out.cv$alp.hat
Fs.hat.fun <- out.cv$Fs.hat
dist.out <- out.cv$dist.out
c.n <- out.cv$cn.cv
}
ret <- list(
n_populations = 1,
population_sizes = length(samples),
admixture_models = admixMod,
estimation_method = "Patra and Sen",
estimated_mixing_weights = alp.hat,
Fs.hat = Fs.hat.fun,
dist.out = dist.out,
c.n = c.n,
alp.Lwr = alp.Lwr,
n = n)
if (method == "cv"){ ret$cv.out <- out.cv
} else { ret$cv.out <- NULL }
ret$method <- method
ret$call <- match.call()
class(ret) <- c("estim_PS", "admix_estim")
return(ret)
}
#' Print method for objects of class 'estim_PS'
#'
#' Print all the attributes of objects of class 'estim_PS'. Results of estimated quantities in an admixture
#' using Patra and Sen approach
#'
#' @param x An object of class 'estim_PS'.
#' @param ... further arguments passed to or from other methods.
#'
#' @author Xavier Milhaud <xavier.milhaud.research@gmail.com>
#' @keywords internal
print.estim_PS <- function(x, ...){
#cat("\n")
#cat("Call:")
#print(x$call)
cat("\n")
if(x$method != "lwr.bnd"){
cat(paste("Estimated mixing weight (of the unknown component):" , round(x$estimated_mixing_weights,3)))
#cat("\n", paste("The chosen value c_n is", round(x$c.n, 3)), "\n")
# if( !is.null(x$cv.out)){
# old_par <- graphics::par()$mfrow
# on.exit(graphics::par(old_par))
# graphics::par(mfrow=c(1,2))
# plot(x$cv.out)
# }
# plot(x$dist.out)
} else if(x$method == 'lwr.bnd'){
plot(x$dist.out)
print (paste("The '95%' lower confidence for alp_0 is ", x$alp.Lwr))
}
cat("\n\n")
}
#' Summary method for objects 'estim_PS'
#'
#' Summarizes the results stored in an object of class 'estim_PS'.
#'
#' @param object An object of class 'estim_PS'.
#' @param ... A list of additional parameters belonging to the default method.
#'
#' @author Xavier Milhaud <xavier.milhaud.research@gmail.com>
#' @keywords internal
summary.estim_PS <- function(object, ...)
{
cat("Call:")
print(object$call)
cat("\n")
cat("------- Sample -------\n")
cat("Sample size: ", object$population_sizes, "\n")
cat("-> Distribution of the known component:", object$admixture_models$comp.dist$known, "\n", sep="")
cat("-> Parameter(s) of the known component:", paste(names(object$admixture_models$comp.param$known), object$admixture_models$comp.param$known, collapse="\t", sep="="), sep="")
cat("\n")
cat("\n------- Estimation results -------\n")
cat(paste("Estimate of the mixing weight (proportion of the unknown component distribution) is" , round(object$estimated_mixing_weights,3)))
cat("\n", paste(" The chosen value c_n is", round(object$c.n,3)))
cat("\n")
}
#plot.estim_PS <- function(x, ...){
# if(x$method != "lwr.bnd"){
# plot(x$dist.out$grid.pts)
# graphics::abline(h= {x$c.n /sqrt(x$n)}, col="red", lwd= 1.3)
# graphics::abline(v= x$alp.hat, lty=2, lwd =1, col="black")
# } else {
# plot(x$dist.out$grid.pts)
# graphics::abline(h = 0.6792/sqrt(x$n), col="red", lwd= 1.3)
# graphics::abline(v= x$alp.Lwr, lty=2, lwd =1, col="black")
# }
#}
#' Cross-validation estimates in an admixture using Patra and Sen approach
#'
#' Estimation of both the unknown component weight and the unknown component distribution in an admixture model,
#' by Patra and Sen (2016) method. Remind that an admixture probability density function l is given by
#' l = p*f + (1-p)*g,
#' where l is observed, g is the known component, and p is the unknown proportion of the unknown component distribution f.
#' The estimated unknown component weight p is selected using a cross-validation technique that helps to choose the right
#' penalization, see 'Details' below for further information.
#'
#' @param data Sample where the known component density of the admixture model has been transformed into a Uniform(0,1) distribution.
#' @param admixMod An object of class 'admix_model', containing the information about the known component distribution and its parameter(s).
#' @param folds (default to 10) Number of folds used for cross-validation.
#' @param reps (default to 1) Number of replications for cross-validation.
#' @param cn.s (default to NULL) A sequence of 'c.n' to be used for cross-validation (vector of values).
#' @param cn.length (default to NULL) Number of equally spaced tuning parameter (between .001 x log(log(n)) and 0.2 x log(log(n))).
#' Values to search from.
#' @param gridsize (default to 200) Number of equally spaced points (between 0 and 1) to evaluate the distance function.
#' Larger values are more computationally intensive but also lead to more accurate estimates.
#'
#' @references
#' \insertRef{PatraSen2016}{admix}
#'
#' @return A list containing 'alp.hat' (estimate of the unknown component weight), 'Fs.hat' (list with elements 'x' and 'y' values for the function estimate
#' of the unknown cumultaive distribution function), 'dist.out' which is an object of the class 'dist.fun'
#' using the complete data.gen, 'c.n' the value of the tuning parameter used to compute the final estimate,
#' and finally 'cv.out' which is an object of class 'cv.mixmodel'. The object is NULL if method is "fixed".
#'
#' @examples
#' ## Simulate mixture data:
#' mixt1 <- twoComp_mixt(n = 1000, weight = 0.2,
#' comp.dist = list("norm", "norm"),
#' comp.param = list(list("mean" = 3, "sd" = 0.5),
#' list("mean" = 0, "sd" = 1)))
#' data1 <- getmixtData(mixt1)
#'
#' ## Define the admixture model:
#' admixMod1 <- admix_model(knownComp_dist = mixt1$comp.dist[[2]],
#' knownComp_param = mixt1$comp.param[[2]])
#' ## Estimate the proportion of the unknown component of the admixture model:
#' estimCV_PS(data = data1, admixMod = admixMod1, folds = 10,
#' reps = 1, cn.s = NULL, cn.length = 3, gridsize = 100)$alp.hat
#'
#' @author Xavier Milhaud <xavier.milhaud.research@gmail.com>
#' @noRd
estimCV_PS <- function(data, admixMod, folds = 10, reps = 1, cn.s = NULL, cn.length = NULL, gridsize = 200)
{
if (!is.vector(data)) stop("'data' has to be a vector.")
n <- length(data)
# cvFolds(length(data), K = folds, R = reps, type = "random")
## Transform the known component distribution into a Uniform distribution:
data <- knownComp_to_uniform(data, admixMod)
if (is.null(cn.s)) {
if (is.null(cn.length)) stop("Both 'cn.s' and 'cn.length' can not be null")
cn.s <- seq(.001 * log(log(n)), .2* log(log(n)), length.out = cn.length)
} else if (length(cn.s) != cn.length) {
stop("Length of 'cn.s' is different from cn.length")
}
score <- rep(0, length(cn.s))
for (rr in 1:reps) {
cv.ind <- rep(1:folds, times = ceiling(n/folds) )
cv.ind <- cv.ind[sample.int(length(cv.ind))]
cv.ind <- cv.ind[1:n]
for (kk in 1:folds) {
t.1 <- Sys.time()
t.data <- PS_dist_calc(data[cv.ind != kk], gridsize = gridsize)
for (cc in 1:length(cn.s)) {
score[cc] <- score[cc] + cv.score(t.data, test.data = data[cv.ind == kk], c.n = cn.s[cc] )
}
t.2 <- Sys.time()
# if (kk == 1) {
# print("Expected time for completion")
# print( folds*reps*(t.2-t.1))
# }
}
}
cn.cv <- cn.s[which.min(score)]
tot.out <- PS_dist_calc(data, gridsize = gridsize)
alp.hat.cv <- sum(tot.out$distance > cn.cv / sqrt(tot.out$n)) / tot.out$gridsize
if (alp.hat.cv > 0) {
F.hat <- (tot.out$F.n - (1-alp.hat.cv) * tot.out$F.b) / alp.hat.cv # computes the naive estimator of F_s
Fs.hat <- Iso::pava(F.hat,tot.out$Freq, decreasing=FALSE) # computes the Isotonic Estimator of F_s
Fs.hat[which(Fs.hat <= 0)] <- 0
Fs.hat[which(Fs.hat >= 1)] <- 1
} else {
Fs.hat <- tot.out$F.b
}
Fs.hat.fun <- NULL
Fs.hat.fun$y <- Fs.hat
Fs.hat.fun$x <- tot.out$F.n.x
ret <- list(cn.cv = cn.cv,
score = score,
cn.s = cn.s,
Fs.hat = Fs.hat.fun,
alp.hat = alp.hat.cv,
folds = folds,
rep = rep,
data = data,
dist.out = tot.out)
ret$call <- match.call()
class(ret) <- "estimCV_PS"
return(ret)
}
#' Print object of class 'estimCV_PS'
#'
#' @param x An object of class 'estimCV_PS'.
#' @param ... Arguments to be passed to print default method.
#'
#' @author Xavier Milhaud <xavier.milhaud.research@gmail.com>
#' @noRd
print.estimCV_PS <- function(x,...)
{
cat("Call:")
print(x$call)
print(paste("Cross validated estimate of the mixing weight (proportion of the unknown component distribution) is" , x$alp.hat))
print(paste(" The cross-validated choice of c_n is", x$cn.cv))
cat("\n")
}
#' Plot object of class 'estimCV_PS'
#'
#' @param x An object of class 'estimCV_PS'.
#' @param ... Arguments to be passed to plot default method.
#'
#' @author Xavier Milhaud <xavier.milhaud.research@gmail.com>
#' @noRd
plot.PS_estimCV <- function(x,...)
{
plot(x$cn.s, x$score, ylab= "cross validation score", xlab= "c_n" )
}
#' Function that computes the cross-validation score
#'
#' @param tr.data An object of class 'PS_dist_fun'.
#' @param test.data The data used in the test sample.
#' @param c.n The coefficient in the penalization.
#'
#' @references
#' \insertRef{PatraSen2016}{admix}
#'
#' @return The loss corresponding to the cross-validation error.
#'
#' @author Xavier Milhaud <xavier.milhaud.research@gmail.com>
#' @noRd
cv.score <- function(tr.data, test.data, c.n)
{
if(!inherits(tr.data, "PS_dist_fun")) stop("'tr.data' is of the wrong type.")
alp.hat <- sum(tr.data$distance>c.n/sqrt(tr.data$n))/tr.data$gridsize
if (alp.hat > 0) {
F.hat <- (tr.data$F.n - (1-alp.hat)*tr.data$F.b) / alp.hat # computes the naive estimator of F_s
F.is <- Iso::pava(F.hat, tr.data$Freq, decreasing=FALSE) # computes the Isotonic Estimator of F_s
F.is[which(F.is <= 0)] <- 0
F.is[which(F.is >= 1)] <- 1
# Fhat.train = alp.hat * F.is + (1-alp.hat) * tr.data$F.b
} else {
F.is <- tr.data$F.b
}
test.data <- sort(test.data) ## Sorts the data set
test.data.1 <- unique(test.data) ## Finds the unique data points
test.Fn <- stats::ecdf(test.data) ## Computes the empirical DF of the data
test.Fn.1 <- test.Fn(test.data.1)
test.Fb <- test.data.1
test.Freq <- diff(c(0,test.Fn.1))
F.is.interpolate <- stats::approx(x = tr.data$F.b, y = F.is, xout = test.Fb, yleft = 0, yright = 1)
loss <- sum( {alp.hat* F.is.interpolate$y + (1- alp.hat) *test.Fb - test.Fn.1}^2* test.Freq)
return(loss)
}
#' Distance to minimize for estimation in an admixture, using Patra and Sen approach
#'
#' Computes the distance to be minimized within the estimation process using Patra and Sen estimation technique by integrating along some given grid
#' the appropriate distance. For further developments, see 'Details' below.
#'
#' @param data Sample where the known component density of the admixture model has been transformed into a Uniform(0,1) distribution.
#' @param gridsize Gridsize to make the computations.
#'
#' @references
#' \insertRef{PatraSen2016}{admix}
#'
#' @return a list containing the evaluated distance and some additional information.
#'
#' @examples
#' ## Simulate mixture data:
#' mixt1 <- twoComp_mixt(n = 1000, weight = 0.6,
#' comp.dist = list("norm", "norm"),
#' comp.param = list(list("mean" = 3, "sd" = 0.5),
#' list("mean" = 0, "sd" = 1)))
#' data1 <- getmixtData(mixt1)
#' ## Define the admixture model:
#' admixMod1 <- admix_model(knownComp_dist = mixt1$comp.dist[[2]],
#' knownComp_param = mixt1$comp.param[[2]])
#' ## Transform the known component distribution into a Uniform distribution:
#' data <- knownComp_to_uniform(data1, admixMod1)
#' PS_dist_calc(data = data, gridsize = 200)
#'
#' @author Xavier Milhaud <xavier.milhaud.research@gmail.com>
#' @noRd
PS_dist_calc <- function(data, gridsize = 200)
{
## Transform the known component distribution into a Uniform distribution:
#data <- knownComp_to_uniform(data, admixMod)
q <- 0.6792
n <- length(data) # length of the data set
data <- sort(data) # sorts the data set
data.1 <- unique(data) # finds the unique data points
Fn <- stats::ecdf(data) # computes the empirical DF of the data
Fn.1 <- Fn(data.1) # empirical DF of the data at the data points
## Calculate the known F_b at the data points
## Note: for Uniform(0,1) F_b(x) = x
## Usually would need to CHANGE this
Fb <- data.1
## Compute the weights (= frequency/n) of the unique data values, i.e., dF_n
Freq <- diff(c(0, Fn.1))
distance <- rep(0, floor(gridsize * .12))
distance[0] <- sqrt( t((Fn.1-Fb)^2) %*% Freq )
grid.pts <- {1:floor(gridsize)} / gridsize
for (ii in 1:length(grid.pts)) {
aa <- grid.pts[ii]
F.hat <- (Fn.1 - (1-aa)*Fb) / aa # computes the naive estimator of F_s
F.is <- Iso::pava(F.hat, Freq, decreasing=FALSE) # computes the Isotonic Estimator of F_s
F.is[which(F.is <= 0)] <- 0
F.is[which(F.is >= 1)] <- 1
distance[ii] <- aa * sqrt( t((F.hat-F.is)^2) %*% Freq )
}
# Lower.Cfd.Bound <- sum(distance>q/sqrt(n))/gridsize
ret <- list(distance = distance,
gridsize = gridsize,
grid.pts = grid.pts,
F.n = Fn.1,
F.n.x = data.1,
Freq = Freq,
F.b = Fb,
n = n)
class(ret) <- "PS_dist_fun"
ret$call <- match.call()
return(ret)
}
#' Results of the computation of the contrast using Patra and Sen approach
#'
#' Plots all the attributes of objects of class 'PS_dist_fun'.
#'
#' @param x An object of class 'PS_dist_fun'.
#' @param ... further arguments passed to or from other methods.
#'
#' @author Xavier Milhaud <xavier.milhaud.research@gmail.com>
#' @noRd
plot.PS_dist_fun <- function(x,...){
plot(x$grid.pts, x$distance, ylab = "distance", xlab = "gamma" , type = "l")
}
#' Results of the computation of the contrast using Patra and Sen approach
#'
#' Print all the attributes of objects of class 'PS_dist_fun'.
#'
#' @param x An object of class 'PS_dist_fun'.
#' @param ... further arguments passed to or from other methods.
#'
#' @author Xavier Milhaud <xavier.milhaud.research@gmail.com>
#' @noRd
print.PS_dist_fun <- function(x,...){
cat("Call:\n")
print(x$call)
t.mat <- cbind(x$grid.pts , x$distance)
colnames(t.mat) <- c("gamma", "distance")
print(t(t.mat))
cat("\n")
}
#' Estimates the density of the unknown component, using Patra and Sen approach
#'
#' Computes by Patra and Sen technique the estimate of the density function when the latter is known to be either decreasing or increasing, still in the framework of an admixture model.
#' Remind that an admixture probability density function (pdf) l is given by
#' l = p*f + (1-p)*g,
#' where l is observed, g is the known component, and p is the unknown proportion of the unknown component distribution f.
#'
#' @param input an R object of class 'PS_estim' or 'PS_estimCV'.
#' @param dec.density a boolean indicating whether the density is increasing or decreasing.
#'
#' @references
#' \insertRef{PatraSen2016}{admix}
#'
#' @return an estimator of the unknown component density.
#'
#' @examples
#' ## Simulate mixture data:
#' mixt1 <- twoComp_mixt(n = 1000, weight = 0.6,
#' comp.dist = list("norm", "norm"),
#' comp.param = list(list("mean" = 3, "sd" = 0.5),
#' list("mean" = 0, "sd" = 1)))
#' data1 <- getmixtData(mixt1)
#' ## Define the admixture model:
#' admixMod1 <- admix_model(knownComp_dist = mixt1$comp.dist[[2]],
#' knownComp_param = mixt1$comp.param[[2]])
#' ## Estimate the proportion of the unknown component of the admixture model:
#' res <- estim_PS(data = data1, admixMod = admixMod1, method = 'fixed',
#' c.n = 0.1*log(log(length(data1))), gridsize = 1000)
#' PS_unknownDensity_estim(res, dec.density = TRUE)
#'
#' @author Xavier Milhaud <xavier.milhaud.research@gmail.com>
#' @noRd
PS_unknownDensity_estim <- function(input, dec.density = TRUE)
{
if (inherits(input, "estim_PS") || inherits(input, "estimCV_PS")) {
## CDF:
Fs.hat <- input$Fs.hat
} else {
stop("This function only works on objects of class 'estim_PS' or 'estimCV_PS'. See functions 'estim_PS' or 'estimCV_PS'.")
}
if (dec.density == TRUE){
ll <- fdrtool::gcmlcm(Fs.hat$x, Fs.hat$y, type = "lcm")
} else if (dec.density == FALSE){
ll <- fdrtool::gcmlcm(Fs.hat$x, Fs.hat$y, type = "gcm")
}
## Density function:
fs.hat <- NULL
fs.hat$x <- rep(ll$x.knots, each = 2) # data points for density
fs.hat$y <- c(0, rep(ll$slope.knots, each = 2), 0) # value of density
return(fs.hat)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.