#' @rdname susie
#'
#' @param XtX A p by p matrix \eqn{X'X} in which the columns of X
#' are centered to have mean zero.
#'
#' @param Xty A p-vector \eqn{X'y} in which y and the columns of X are
#' centered to have mean zero.
#'
#' @param yty A scalar \eqn{y'y} in which y is centered to have mean
#' zero.
#'
#' @param n The sample size.
#'
#' @param X_colmeans A p-vector of column means of \code{X}. If both
#' \code{X_colmeans} and \code{y_mean} are provided, the intercept
#' is estimated; otherwise, the intercept is NA.
#'
#' @param y_mean A scalar containing the mean of \code{y}. If both
#' \code{X_colmeans} and \code{y_mean} are provided, the intercept
#' is estimated; otherwise, the intercept is NA.
#'
#' @param maf Minor allele frequency; to be used along with
#' \code{maf_thresh} to filter input summary statistics.
#'
#' @param maf_thresh Variants having a minor allele frequency smaller
#' than this threshold are not used.
#'
#' @param r_tol Tolerance level for eigenvalue check of positive
#' semidefinite matrix of R.
#'
#' @param check_input If \code{check_input = TRUE},
#' \code{susie_suff_stat} performs additional checks on \code{XtX} and
#' \code{Xty}. The checks are: (1) check that \code{XtX} is positive
#' semidefinite; (2) check that \code{Xty} is in the space spanned by
#' the non-zero eigenvectors of \code{XtX}.
#'
#' @param check_prior If \code{check_prior = TRUE}, it checks if the
#' estimated prior variance becomes unreasonably large (comparing with
#' 10 * max(abs(z))^2).
#'
#' @param n_purity Passed as argument \code{n_purity} to
#' \code{\link{susie_get_cs}}.
#'
#' @param ... Additional arguments to provide backward compatibility
#' with earlier versions of \code{susie_suff_stat}.
#'
#' @export
#'
susie_suff_stat = function (XtX, Xty, yty, n,
X_colmeans = NA, y_mean = NA,
maf = NULL, maf_thresh = 0, L = 10,
scaled_prior_variance = 0.2,
residual_variance = NULL,
estimate_residual_variance = TRUE,
estimate_prior_variance = TRUE,
estimate_prior_method = c("optim","EM","simple"),
check_null_threshold = 0, prior_tol = 1e-9,
r_tol = 1e-08, prior_weights = NULL,
null_weight = 0, standardize = TRUE,
max_iter = 100, s_init = NULL, coverage = 0.95,
min_abs_corr = 0.5, tol = 1e-3,
verbose = FALSE, track_fit = FALSE,
check_input = FALSE, refine = FALSE,
check_prior = FALSE, n_purity = 100, ...) {
# Check for use of arguments that are now deprecated.
args <- list(...)
if (any(is.element(names(args),c("bhat","shat","R","var_y"))))
stop("susie_suff_stat no longer accepts inputs bhat, shat, R or var_y; ",
"these inputs are now accepted by susie_rss instead")
# Process input estimate_prior_method.
estimate_prior_method = match.arg(estimate_prior_method)
if (missing(n))
stop("n must be provided")
if (n <= 1)
stop("n must be greater than 1")
# Check sufficient statistics.
missing_XtX = c(missing(XtX), missing(Xty), missing(yty))
if (all(missing_XtX))
stop("Please provide all of XtX, Xty, yty, n")
if (ncol(XtX) > 1000 & !requireNamespace("Rfast",quietly = TRUE))
warning_message("For large R or large XtX, consider installing the ",
"Rfast package for better performance.", style="hint")
# Check input XtX.
if (ncol(XtX) != length(Xty))
stop(paste0("The dimension of XtX (",nrow(XtX)," by ",ncol(XtX),
") does not agree with expected (",length(Xty)," by ",
length(Xty),")"))
if (!is_symmetric_matrix(XtX)) {
warning_message("XtX is not symmetric; forcing XtX to be symmetric by ",
"replacing XtX with (XtX + t(XtX))/2")
XtX = XtX + t(XtX)
XtX = XtX/2
}
# MAF filter.
if (!is.null(maf)) {
if (length(maf) != length(Xty))
stop(paste("The length of maf does not agree with expected",length(Xty)))
id = which(maf > maf_thresh)
XtX = XtX[id,id]
Xty = Xty[id]
}
if (any(is.infinite(Xty)))
stop("Input Xty contains infinite values")
if (!(is.double(XtX) & is.matrix(XtX)) & !inherits(XtX,"CsparseMatrix"))
stop("Input XtX must be a double-precision matrix, or a sparse matrix")
if (anyNA(XtX))
stop("Input XtX matrix contains NAs")
# Replace NAs in Xty with zeros.
if (anyNA(Xty)) {
warning_message("NA values in Xty are replaced with 0")
Xty[is.na(Xty)] = 0
}
if (check_input) {
# Check whether XtX is positive semidefinite.
semi_pd = check_semi_pd(XtX,r_tol)
if (!semi_pd$status)
stop("XtX is not a positive semidefinite matrix")
# Check whether Xty in space spanned by the non-zero eigenvectors of XtX
proj = check_projection(semi_pd$matrix,Xty)
if (!proj$status)
warning_message("Xty does not lie in the space of the non-zero eigenvectors ",
"of XtX")
}
if (is.numeric(null_weight) && null_weight == 0)
null_weight = NULL
if (!is.null(null_weight)) {
if (!is.numeric(null_weight))
stop("Null weight must be numeric")
if (null_weight < 0 || null_weight >= 1)
stop("Null weight must be between 0 and 1")
if (is.null(prior_weights))
prior_weights = c(rep(1/ncol(XtX)*(1-null_weight),ncol(XtX)),null_weight)
else
prior_weights = c(prior_weights*(1 - null_weight),null_weight)
XtX = cbind(rbind(XtX,0),0)
Xty = c(Xty,0)
}
p = ncol(XtX)
if (standardize) {
dXtX = diag(XtX)
csd = sqrt(dXtX/(n-1))
csd[csd == 0] = 1
XtX = t((1/csd) * XtX) / csd
Xty = Xty / csd
} else
csd = rep(1,length = p)
attr(XtX,"d") = diag(XtX)
attr(XtX,"scaled:scale") = csd
# Check that X_colmeans has length 1 or p.
if (length(X_colmeans) == 1)
X_colmeans = rep(X_colmeans,p)
if (length(X_colmeans) != p)
stop("The length of X_colmeans does not agree with number of variables")
# Initialize susie fit.
s = init_setup(0,p,L,scaled_prior_variance,residual_variance,prior_weights,
null_weight,yty/(n-1),standardize)
s$Xr = NULL
s$XtXr = rep(0,p)
if (!missing(s_init)&& !is.null(s_init)) {
if (!inherits(s_init,"susie"))
stop("s_init should be a susie object")
if (max(s_init$alpha) > 1 || min(s_init$alpha) < 0)
stop("s_init$alpha has invalid values outside range [0,1]; please ",
"check your input")
# First, remove effects with s_init$V = 0
s_init = susie_prune_single_effects(s_init)
num_effects = nrow(s_init$alpha)
if(missing(L)){ # if L is not specified, we set it as in s_init.
L = num_effects
}else if(min(p, L) < num_effects){
warning_message(paste("Specified number of effects L =",min(p, L),
"is smaller than the number of effects",num_effects,
"in input SuSiE model. The initialized SuSiE model will have",
num_effects,"effects."))
L = num_effects
}
# expand s_init if L > num_effects.
s_init = susie_prune_single_effects(s_init, min(p, L), s$V)
s = modifyList(s,s_init)
s = init_finalize(s,X = XtX)
s$XtXr = s$Xr
s$Xr = NULL
} else
s = init_finalize(s)
# Initialize elbo to NA.
elbo = rep(as.numeric(NA),max_iter + 1)
elbo[1] = -Inf;
tracking = list()
bhat = (1/attr(XtX,"d")) * Xty
shat = sqrt(s$sigma2/attr(XtX,"d"))
z = bhat/shat
zm = max(abs(z[!is.nan(z)]))
for (i in 1:max_iter) {
if (track_fit)
tracking[[i]] = susie_slim(s)
s = update_each_effect_ss(XtX,Xty,s,estimate_prior_variance,
estimate_prior_method,check_null_threshold)
if(check_prior){
if(any(s$V > 100*(zm^2))){
stop('The estimated prior variance is unreasonably large.
This is usually caused by mismatch between the summary statistics and the LD matrix.
Please check the input.')
}
}
if (verbose)
print(paste0("objective:",get_objective_ss(XtX,Xty,s,yty,n)))
# Compute objective before updating residual variance because part
# of the objective s$kl has already been computed under the
# residual variance before the update.
elbo[i+1] = get_objective_ss(XtX,Xty,s,yty,n)
if(is.infinite(elbo[i+1])){
stop('The objective becomes infinite. Please check the input.')
}
if ((elbo[i+1] - elbo[i]) < tol) {
s$converged = TRUE
break
}
if (estimate_residual_variance) {
est_sigma2 = estimate_residual_variance_ss(XtX,Xty,s,yty,n)
if (est_sigma2 < 0)
stop("Estimating residual variance failed: the estimated value ",
"is negative")
s$sigma2 = est_sigma2
if (verbose)
print(paste0("objective:",get_objective_ss(XtX,Xty,s,yty,n)))
}
}
elbo = elbo[2:(i+1)] # Remove first (infinite) entry, and trailing NAs.
s$elbo = elbo
s$niter = i
if (is.null(s$converged)) {
warning(paste("IBSS algorithm did not converge in",max_iter,"iterations!
Please check consistency between summary statistics and LD matrix.
See https://stephenslab.github.io/susieR/articles/susierss_diagnostic.html"))
s$converged = FALSE
}
s$X_column_scale_factors = attr(XtX,"scaled:scale")
# Compute intercept.
s$intercept = y_mean -
sum(X_colmeans * (colSums(s$alpha * s$mu)/s$X_column_scale_factors))
if (track_fit)
s$trace = tracking
# SuSiE CS and PIP.
if (!is.null(coverage) && !is.null(min_abs_corr)) {
if(any(!(diag(XtX) %in% c(0,1)))){
s$sets = susie_get_cs(s,coverage = coverage,Xcorr = muffled_cov2cor(XtX),
min_abs_corr = min_abs_corr,
check_symmetric = FALSE,
n_purity = n_purity)
}else
s$sets = susie_get_cs(s,coverage = coverage,Xcorr = XtX,
min_abs_corr = min_abs_corr,
check_symmetric = FALSE,
n_purity = n_purity)
s$pip = susie_get_pip(s,prune_by_cs = FALSE,prior_tol = prior_tol)
}
if (!is.null(colnames(XtX))) {
variable_names = colnames(XtX)
if (!is.null(null_weight)) {
variable_names[length(variable_names)] = "null"
names(s$pip) = variable_names[-p]
} else
names(s$pip) = variable_names
colnames(s$alpha) = variable_names
colnames(s$mu) = variable_names
colnames(s$mu2) = variable_names
colnames(s$lbf_variable) = variable_names
}
if (refine) {
if (!missing(s_init) && !is.null(s_init))
warning("The given s_init is not used in refinement")
if (!is.null(null_weight) && null_weight != 0) {
## if null_weight is specified
## we remove the extra 0 column
XtX = XtX[1:(p-1), 1:(p-1)]
Xty = Xty[1:(p-1)]
pw_s = s$pi[-s$null_index]/(1 - null_weight)
} else
pw_s = s$pi
conti = TRUE
while (conti & length(s$sets$cs)>0) {
m = list()
for(cs in 1:length(s$sets$cs)) {
pw_cs = pw_s
pw_cs[s$sets$cs[[cs]]] = 0
if(all(pw_cs == 0)){
break
}
s2 = susie_suff_stat(XtX = XtX, Xty = Xty, yty = yty, n = n, L = L,
X_colmeans = X_colmeans, y_mean = y_mean,
prior_weights = pw_cs, s_init = NULL,
scaled_prior_variance = scaled_prior_variance,
residual_variance = residual_variance,
estimate_residual_variance = estimate_residual_variance,
estimate_prior_variance = estimate_prior_variance,
estimate_prior_method = estimate_prior_method,
check_null_threshold = check_null_threshold, prior_tol = prior_tol,
r_tol = r_tol, max_iter = max_iter,
null_weight = null_weight, standardize = standardize,
coverage = coverage, min_abs_corr = min_abs_corr, tol = tol,
verbose = FALSE, track_fit = FALSE, check_input = FALSE,
refine = FALSE)
sinit2 = s2[c("alpha","mu","mu2")]
class(sinit2) = "susie"
s3 = susie_suff_stat(XtX = XtX, Xty = Xty, yty = yty, n = n, L = L,
X_colmeans = X_colmeans, y_mean = y_mean,
prior_weights = pw_s, s_init = sinit2,
scaled_prior_variance = scaled_prior_variance,
residual_variance = residual_variance,
estimate_residual_variance = estimate_residual_variance,
estimate_prior_variance = estimate_prior_variance,
estimate_prior_method = estimate_prior_method,
check_null_threshold = check_null_threshold, prior_tol = prior_tol,
r_tol = r_tol, max_iter = max_iter, null_weight = null_weight,
standardize = standardize, coverage = coverage,
min_abs_corr = min_abs_corr, tol = tol, verbose = FALSE,
track_fit = FALSE, check_input = FALSE, refine = FALSE)
m = c(m,list(s3))
}
if(length(m) == 0){
conti = FALSE
}else{
elbo = sapply(m, function(x) susie_get_objective(x))
if ((max(elbo) - susie_get_objective(s)) <= 0)
conti = FALSE
else
s = m[[which.max(elbo)]]
}
}
}
return(s)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.