R/GSE.R
In GSE: Robust Estimation in the Presence of Cellwise and Casewise Contamination and Missing Data

Documented in GSE

###########################################################
## Generalized S-Estimator
###########################################################
GSE <- function(x, tol=1e-4, maxiter=150, method=c("bisquare","rocke"), init=c("emve","qc","huber","imputed","emve_c"), mu0, S0, ...)
{
    xcall <- match.call()

    ## argument checks
    method <- match.arg(method)
    init <- match.arg(init)

    ## check dat
    if(is.data.frame(x) | is.matrix(x))
        x <- data.matrix(x)
    else stop("Data matrix must be of class matrix or data.frame")
    
    p <- ncol(x)
    if( p >200 | p < 2 ) stop("Column dimension of 'x' must be in between 2 and 200.")
    
    ## drop all rows with missing values (!!) :
    x_nonmiss <- is.na(x)*-1 + 1
    pp <- rowSums(x_nonmiss)
    pp_col <- colSums(x_nonmiss)
    
    ## Cannot contain all obs with completely missing rows!!
    if( all(pp == 0) ) stop("All observations have missing values!")
    if( any(pp_col == 0) )stop("Data matrix cannot contain column(s) with completely missing data!")	
    
    ok <- which(pp > 0)
    x_orig <- x
    x <- x[ ok,]
    x_nonmiss <- x_nonmiss[ ok,]

    ## reorder the data based on missingness
    x_sort <- .sort.missing(x, x_nonmiss)

    ## get initial estimate for all the EM calculation in EMVE subsampling
    EM.mu0 <- colMeans(x_sort$x, na.rm=T)
    EM.S0 <- diag(apply(x_sort$x, 2, var, na.rm=T))
    x_sort <- c(x_sort, .CovEM.setparam(p, EM.mu0, EM.S0))
    
    ## dimension
    n <- nrow(x_sort$x); p <- ncol(x_sort$x)
    if(n <= p + 1)
        stop(if (n <= p) "n <= p -- you can't be serious!" else "n == p+1  is too small sample size")
    if(n < 2 * p)
        ## p+1 < n < 2p
        warning("n < 2 * p, i.e., possibly too small sample size")

    if( xor(missing(mu0), missing(S0)) ) warning("Both 'mu0' and 'S0' must be provided. Default 'init' is used...")
    if( missing(mu0) || missing(S0) ){
        init.res <- switch( init,
            emve= {emve(x, sampling="uniform", ...)},
            qc ={HuberPairwise(x, psi="sign", computePmd = FALSE)},
            huber = {HuberPairwise(x, psi="huber", computePmd = FALSE, ...)},
            imputed = {ImpS(x, ...)},
            emve_c= {emve(x, sampling="cluster", ...)}
            )
        S0 <- init.res@S
        mu0 <- init.res@mu
    } 
    S0.chol <- tryCatch( chol(S0), error=function(e) NA)
    if( !is.matrix(S0.chol) )  stop("Estimated initial covariance matrix 'S0' is not positive definite.")

    ## initiate GSE computation
    bdp <- 0.5
    print.step <- 0
    tol.scale <- 1e-9
    miter.scale <- 300
    res <- with(x_sort, .GSE.init(x, x_nonmiss, bdp, pu, n, p, miss.group.unique, miss.group.counts, mu0, S0, tol, 
        maxiter, tol.scale, miter.scale, print.step=print.step, method))
    
    ## compute pmd
    pmd <- pmd.adj <- rep(NA, nrow(x_orig))
    pmd[ok] <- res$pmd[x_sort$id.ro]
    pmd.adj[ok] <- res$pmd.adj[x_sort$id.ro]
    
    ## new 2014-07-28
    wgts <- rep(NA, nrow(x_orig))
    wgts[ok] <- res$weights[x_sort$id.ro]
    wgtsp <- rep(NA, nrow(x_orig))
    wgtsp[ok] <- res$weightsprm[x_sort$id.ro]
    ximp <- matrix(NA, nrow(x_orig), ncol(x_orig))
    ximp[ok,] <- res$ximp[x_sort$id.ro,]
    
    res <- new("GSE",
        call = xcall,
        S = res$S,
        mu = res$mu,
        sc = res$stilde0,
        mu0 = mu0,
        S0 = S0, 
        iter = res$iter,
        eps = res$ep,
        estimator = "Generalized S-Estimator", 
        x = x_orig,
        ximp = ximp,
        weights = wgts,
        weightsp = wgtsp,
        pmd = pmd,
        pmd.adj = pmd.adj,
        p = p,
        pu = pp)
    res
}

## Assume the input data matrix is sorted using .sort.missing
.GSE.init <- function(x, x_nonmiss, bdp, pu, n, p, miss.group.unique, miss.group.counts,  mu0, S0, tol, maxiter, tol.scale, miter.scale, print.step, method)
{
    ##########################################################################################
    ## computing
    p.const.group <- rowSums(miss.group.unique)
    if( method == "bisquare"){
        tuning.const.group <- .rho.bisquare.tune(p.const.group, bdp)
        res <- .GSE.Rcpp(x, matrix(mu0,1,p), S0, tol, maxiter, tol.scale, miter.scale, 
            miss.group.unique, miss.group.counts, tuning.const.group, print.step, bdp)
    }
    if( method == "rocke"){
        tuning.const.group <- .rho.rocke.tune(p.const.group)
        gamma.const.group <- pmin(1, qchisq(0.95, df=p.const.group)/p.const.group - 1)
        res <- .GSE.rocke.Rcpp(x, matrix(mu0,1,p), S0, tol, maxiter, tol.scale, miter.scale, 
            miss.group.unique, miss.group.counts, tuning.const.group, gamma.const.group, print.step, bdp)
    }
    ##########################################################################################
    ## Include additional output other than mu and S output by .GSE.fixpt.init:
    ## input data 'x', nonmissing variables 'pu', partial MD 'pmd', 
    ## note: need to reorder x, pmd, pu
    
    ## Compute pmd
    x.tmp <- sweep(x, 2, c(res$mu), "-")
    pmd <- .partial.mahalanobis.Rcpp(x.tmp, res$S, miss.group.unique, miss.group.counts)
    pmd.adj <- qchisq( pchisq( pmd, df=pu, log.p=T, lower.tail=F), df=p, log.p=T, lower.tail=F) 
    pmd.adj[ which( pu == p)] <- pmd[ which(pu==p) ]
    res$pmd <- pmd
    res$pmd.adj <- pmd.adj

    ## new 2016-08
    pmd.adj.med <- median(pmd.adj)
    qchis5 <- qchisq(.5,p)
    res$S <- (pmd.adj.med/qchis5)*res$S
    pmd <- (qchis5/pmd.adj.med)*pmd
    pmd.adj <- (qchis5/pmd.adj.med)*pmd.adj
    
    ##########################################################################################
    ## Output
    res
}

Any scripts or data that you put into this service are public.

GSE documentation built on Dec. 28, 2022, 1:31 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

GSE
Robust Estimation in the Presence of Cellwise and Casewise Contamination and Missing Data

R/GSE.R
In GSE: Robust Estimation in the Presence of Cellwise and Casewise Contamination and Missing Data

Defines functions .GSE.init GSE

Documented in GSE

Try the GSE package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GSE Robust Estimation in the Presence of Cellwise and Casewise Contamination and Missing Data

R/GSE.R In GSE: Robust Estimation in the Presence of Cellwise and Casewise Contamination and Missing Data

Defines functions .GSE.init GSE

Documented in GSE

Try the GSE package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GSE
Robust Estimation in the Presence of Cellwise and Casewise Contamination and Missing Data

R/GSE.R
In GSE: Robust Estimation in the Presence of Cellwise and Casewise Contamination and Missing Data