#' trcQTL with permutation in matrix form
#'
#' Perform trcQTL in matrix form with outcome permutated K times
#'
#' @param trc total read count (dimension = N x 1)
#' @param lib_size library size (dimension = N x 1)
#' @param x 'genotype' used in trcQTL (which is defined as (x1 + x2) / 2) (dimension = N x P)
#' @param cov effect of covariates on log(trc/2) (dimension = N x 1)
#' @param permutation_idx permutation of samples in each column (dimension = N x K)
#' @param trc_cutoff total read count cutoff to exclude observations with trc lower than the cutoff
#'
#' @return a list of summary statistics
#' beta_hat: estimated log aFC (dimension = K x P, with kth row corresponding to the result under
#' permutation defined in the kth column of permutation_idx)
#' beta_se: standard deviation of log aFC (dimension = K x P)
#' sample_size: number of observations used in regression
#'
#' @examples
#' matrix_ls_trc_permutation(
#' trc = rpois(100, 100),
#' lib_size = rpois(100, 10000),
#' x = matrix(sample(c(0, 0.5, 1), 200, replace = TRUE), ncol = 2),
#' cov = rnorm(100),
#' permutation_idx = matrix(c(sample(1:100), sample(1:100), sample(1:100)), ncol = 3, byrow = FALSE),
#' trc_cutoff = 20
#' )
#'
#' @export
#' @importFrom stats var
matrix_ls_trc_permutation = function(trc, lib_size, x, cov, permutation_idx, trc_cutoff = 20) {
trc_in = trc
trc = log(trc / 2 / lib_size) - cov
snp_id = 1 : ncol(x)
output = list(beta_hat = matrix(NA, ncol = length(snp_id), nrow = ncol(permutation_idx)), beta_se = matrix(NA, ncol = length(snp_id), nrow = ncol(permutation_idx)))
na_ind = is.na(trc) | trc_in < trc_cutoff
mask_ind = (!na_ind) * 1
mono_ind = apply(x, 2, var) == 0
x_filtered = x[, !mono_ind, drop = F]
sample_size = sum(mask_ind)
trc[na_ind] = 0
intercept_mat = matrix(1, nrow = nrow(x_filtered), ncol = ncol(x_filtered))
if(sample_size > 2 & ncol(x_filtered) > 0) {
trc_perm = matrix(as.vector(trc)[permutation_idx], byrow = F, ncol = ncol(permutation_idx))
mask_perm = matrix(as.vector(mask_ind)[permutation_idx], byrow = F, ncol = ncol(permutation_idx))
out = matrixQTL_with_mask_two_dim(trc_perm, x_filtered, intercept_mat, mask_perm)
output$beta_hat[, !mono_ind] = t(out$beta1_hat)
output$beta_se[, !mono_ind] = t(out$beta1_se)
}
output$sample_size = sample_size
return(output)
}
#' ascQTL with permutation in matrix form
#'
#' Perform ascQTL in matrix form with outcome permutated K times
#'
#' @param asc1 allele-specific read count for haplotype 1 (dimension = N x 1)
#' @param asc2 allele-specific read count for haplotype 2 (dimension = N x 1)
#' @param x 'genotype' used in ascQTL (which is defined as x1 - x2) (dimension = N x P)
#' @param permutation_idx permutation of samples in each column (dimension = N x K)
#' @param asc_cutoff allele-specific read count cutoff to exclude observations with asc1 or asc2 lower than asc_cutoff
#' @param weight_cap the maximum weight difference (in fold) is min(weight_cap, floor(sample_size / 10)). The ones exceeding the cutoff is capped.
#' @param asc_cap exclude observations with asc1 or asc2 higher than asc_cap
#'
#' @return a list of summary statistics
#' beta_hat: estimated log aFC (dimension = K x P, with kth row corresponding to the result under
#' permutation defined in the kth column of permutation_idx)
#' beta_se: standard deviation of log aFC (dimension = K x P)
#' sample_size: number of observations used in regression
#'
#' @examples
#' matrix_ls_asc_permutation(
#' asc1 = rpois(100, 100),
#' asc2 = rpois(100, 100),
#' x = matrix(sample(c(0, 0.5, 1), 200, replace = TRUE), ncol = 2),
#' permutation_idx = matrix(c(sample(1:100), sample(1:100), sample(1:100)), ncol = 3, byrow = FALSE),
#' asc_cutoff = 20,
#' weight_cap = 10,
#' asc_cap = 1000
#' )
#'
#' @export
#' @importFrom stats var
matrix_ls_asc_permutation = function(asc1, asc2, x, permutation_idx, asc_cutoff = 5, weight_cap = 100, asc_cap = 5000) {
snp_id = 1 : ncol(x)
output = list(beta_hat = matrix(NA, ncol = length(snp_id), nrow = ncol(permutation_idx)), beta_se = matrix(NA, ncol = length(snp_id), nrow = ncol(permutation_idx)))
passed_ind = asc1 >= asc_cutoff & asc2 >= asc_cutoff & asc1 <= asc_cap & asc2 <= asc_cap
mask_ind = passed_ind * 1
asc = log(asc1 / asc2)
mono_ind = apply(x, 2, var) == 0
x_filtered = x[, !mono_ind, drop = F]
sample_size = sum(mask_ind)
weights = harmonic_sum_(asc1, asc2)
weights[!passed_ind] = 0
asc[!passed_ind] = 0
weight_cap = min(weight_cap, floor(sample_size / 10))
weight_cutoff = min(weights) * weight_cap
weights[weights > weight_cutoff] = weight_cutoff
if(sample_size > 2 & ncol(x_filtered) > 0) {
# asc = diag(sqrt(weights)) %*% asc
# x_filtered = diag(sqrt(weights)) %*% x_filtered
# This is the work around for scaling by weight
asc_perm = matrix(as.vector(asc)[permutation_idx], byrow = F, ncol = ncol(permutation_idx))
weight_val = mask_ind * weights
weight_perm = matrix(weight_val[permutation_idx], byrow = F, ncol = ncol(permutation_idx))
mask_perm = matrix(as.vector(mask_ind)[permutation_idx], byrow = F, ncol = ncol(permutation_idx))
out = matrixQTL_with_mask_one_dim(asc_perm, x_filtered, mask_perm, Weight = weight_perm)
output$beta_hat[, !mono_ind] = t(out$beta_hat)
output$beta_se[, !mono_ind] = t(out$beta_se)
}
output$sample_size = sample_size
return(output)
}
#' Solve Y = Xb + e with mask and weight in matrix form
#'
#' For each column i in X, solve Y_k = X_i b_i + e with M_k as mask (entries with M_k = 0 are excluded)
#' and observations are weighted by W_k
#' as least squares problem and output estimated effect size and standard deviation
#'
#' @param Y response to regress against (dimension = N x K)
#' @param X P predictors to perform regression separately (dimension = N x P)
#' @param M mask for regression of Y_k (dimension = N x K)
#' @param Weight weights for regression of Y_k (dimension = N x K; default = NULL)
#'
#' @return a list of summary statistics
#' beta_hat: estimated b, b_hat (dimension = K x P)
#' beta_se: standard deviation of b_hat (dimension = K x P)
#'
#' @examples
#' matrixQTL_with_mask_one_dim(
#' Y = matrix(rnorm(300), ncol = 3),
#' X = matrix(sample(c(0, 0.5, 1), 200, replace = TRUE), ncol = 2),
#' M = matrix(sample(c(0, 1), 300, replace = TRUE), ncol = 1),
#' Weight = matrix(runif(300), ncol = 3)
#' )
#'
#' @export
#' @import tensorA
matrixQTL_with_mask_one_dim = function(Y, X, M, Weight = NULL) {
# input: n -> i, p -> j, k -> k
# Y: n by k
# X: n by p
# M: n by k --> binary
# W: n by k --> weight of each observation
# output:
# for each y_k x_p pair with mask m_k and weight w_k run y ~ -1 + x
# betahat: p by k
# betase: p by k
if(dim(Y)[1] != dim(X)[1] | dim(M)[2] != dim(Y)[2]) {
message('Wrong dimension')
return(NULL)
}
X_tensor <- to.tensor(as.vector(X), c(i = dim(X)[1], j = dim(X)[2]))
# Y_tensor <- to.tensor(as.vector(Y), c(i = dim(Y)[1], j = dim(Y)[2]))
Y_tensor <- to.tensor(as.vector(Y), c(i = dim(Y)[1], k = dim(Y)[2]))
M_tensor <- to.tensor(as.vector(M), c(i = dim(M)[1], k = dim(M)[2]))
if(is.null(Weight)) {
W_tensor = M_tensor
} else {
W_tensor = to.tensor(as.vector(Weight), c(i = dim(Weight)[1], k = dim(Weight)[2])) * M_tensor
}
n_tensor <- margin.tensor(M_tensor, by = 'k')
# mask out some Y's
YsqW_tensor = Y_tensor * sqrt(W_tensor)
Y_tensor = Y_tensor * W_tensor
# END
YtX_tensor = einstein.tensor(X_tensor, by = 'j', Y_tensor)
# YtX_tensor = mul.tensor(X_tensor, i = 'i', Y = Y_tensor)
# XtX_tensor = einstein.tensor(X_tensor, by = 'j', X_tensor)
# two step work around on XtX_tensor with mask
XtX_tensor = X_tensor * X_tensor
XtX_tensor = einstein.tensor(XtX_tensor, W_tensor)
# END
hatbeta_tensor = YtX_tensor / XtX_tensor
# R_tensor = YsqW_tensor - diagmul.tensor(X_tensor, D = hatbeta_tensor, j = 'j', i = 'j') * sqrt(W_tensor)
#
# # mask out unwanted residuals
# R_tensor = R_tensor * M_tensor
# # END
#
# Rsq_tensor = einstein.tensor(R_tensor, by = c('j', 'k'), R_tensor)
# new work around: YtY - 2 b XtY + b^2 XtX
YtY_tensor = einstein.tensor(YsqW_tensor, by = 'k', YsqW_tensor)
Rsq_tensor = YtY_tensor - 2 * hatbeta_tensor * YtX_tensor + hatbeta_tensor^2 * XtX_tensor
hatsigma_tensor = sqrt(Rsq_tensor / (n_tensor - 1))
sebeta_tensor = hatsigma_tensor / sqrt(XtX_tensor)
# if(dim(Y)[2] == 1) {
# return(data.frame(beta_hat = as.vector(hatbeta_tensor), beta_se = as.vector(sebeta_tensor)))
# } else {
return(list(beta_hat = to.matrix.tensor(hatbeta_tensor, 'j', 'k'), beta_se = to.matrix.tensor(sebeta_tensor, 'j', 'k')))
# }
}
#' Solve Y = X1 b1 + X2 b2 + e with mask in matrix form
#'
#' For each column i in X, solve Y_k = X_1i b_1i + X_2i b_2i + e with M_k as mask (entries with M_k = 0 are excluded)
#' as least squares problem and output estimated effect size and standard deviation
#'
#' @param Y response to regress against (dimension = N x K)
#' @param X1 P predictors (as the first predictor) perform regression separately (dimension = N x P)
#' @param X2 P predictors (as the second predictor) to perform regression separately (dimension = N x P)
#' @param M mask for regression of Y_k (dimension = N x K)
#'
#' @return a list of summary statistics
#' beta1_hat: estimated b1, b1_hat (dimension = K x P)
#' beta1_se: standard deviation of b1_hat (dimension = K x P)
#' beta2_hat: estimated b2, b2_hat (dimension = K x P)
#' beta2_se: standard deviation of b2_hat (dimension = K x P)
#'
#' @examples
#' matrixQTL_with_mask_two_dim(
#' Y = matrix(rnorm(300), ncol = 3),
#' X1 = matrix(sample(c(0, 0.5, 1), 200, replace = TRUE), ncol = 2),
#' X2 = matrix(sample(c(0, 0.5, 1), 200, replace = TRUE), ncol = 2),
#' M = matrix(sample(c(0, 1), 300, replace = TRUE), ncol = 3)
#' )
#'
#' @export
#' @import tensorA
matrixQTL_with_mask_two_dim = function(Y, X1, X2, M) {
# input: n -> i, p -> j, k -> k
# Y: n by k
# X1: n by p
# X2: n by p
# M: n by k --> binary
# output:
# for each y_k x_p pair with mask m_k run y ~ -1 + x1 + x2
# betahat1: p by k
# betase1: p by k
# betahat2: p by k
# betase2: p by k
if(dim(Y)[1] != dim(X1)[1] | dim(M)[2] != dim(Y)[2] | dim(Y)[1] != dim(X2)[1]) {
message('Wrong dimension')
return(NULL)
}
X1_tensor <- to.tensor(as.vector(X1), c(i = dim(X1)[1], j = dim(X1)[2]))
X2_tensor <- to.tensor(as.vector(X2), c(i = dim(X2)[1], j = dim(X2)[2]))
# Y_tensor <- to.tensor(as.vector(Y), c(i = dim(Y)[1], j = dim(Y)[2]))
Y_tensor <- to.tensor(as.vector(Y), c(i = dim(Y)[1], k = dim(Y)[2]))
M_tensor <- to.tensor(as.vector(M), c(i = dim(M)[1], k = dim(M)[2]))
n_tensor <- margin.tensor(M_tensor, by = 'k')
# mask out some Y's
Y_tensor = Y_tensor * M_tensor
# END
T1_tensor = einstein.tensor(X1_tensor, by = 'j', Y_tensor)
T2_tensor = einstein.tensor(X2_tensor, by = 'j', Y_tensor)
# T1_tensor = mul.tensor(X1_tensor, i = 'i', Y = Y_tensor)
# T2_tensor = mul.tensor(X2_tensor, i = 'i', Y = Y_tensor)
# S11_tensor = einstein.tensor(X1_tensor, by = 'j', X1_tensor)
# S12_tensor = einstein.tensor(X1_tensor, by = 'j', X2_tensor)
# S22_tensor = einstein.tensor(X2_tensor, by = 'j', X2_tensor)
# two step work around on S11_tensor, S12_tensor, and S22_tensor with mask
S11_tensor = X1_tensor * X1_tensor
S11_tensor = einstein.tensor(S11_tensor, M_tensor)
S12_tensor = X1_tensor * X2_tensor
S12_tensor = einstein.tensor(S12_tensor, M_tensor)
S22_tensor = X2_tensor * X2_tensor
S22_tensor = einstein.tensor(S22_tensor, M_tensor)
# END
Delta_tensor = abs(S11_tensor * S22_tensor - S12_tensor * S12_tensor)
hatbeta1_tensor = (S22_tensor * T1_tensor - S12_tensor * T2_tensor) / Delta_tensor
hatbeta2_tensor = (S11_tensor * T2_tensor - S12_tensor * T1_tensor) / Delta_tensor
# R_tensor = Y_tensor - diagmul.tensor(X1_tensor, D = hatbeta1_tensor, j = 'j', i = 'j') - diagmul.tensor(X2_tensor, D = hatbeta2_tensor, j = 'j', i = 'j')
#
# # mask out unwanted residuals
# R_tensor = R_tensor * M_tensor
# # END
#
# Rsq_tensor = einstein.tensor(R_tensor, by = c('j', 'k'), R_tensor)
# new work around: YtY - 2 b1 X1tY - 2 b2 X2tY - 2 b1 b2 X1tX2 + b1^2 X1tX1 + b2^2 X2tX2
YtY_tensor = einstein.tensor(Y_tensor, by = 'k', Y_tensor)
Rsq_tensor = YtY_tensor - 2 * hatbeta1_tensor * T1_tensor - 2 * hatbeta2_tensor * T2_tensor + 2 * hatbeta1_tensor * hatbeta2_tensor * S12_tensor + hatbeta1_tensor^2 * S11_tensor + hatbeta2_tensor^2 * S22_tensor
hatsigma_tensor = sqrt(Rsq_tensor / (n_tensor - 2))
sebeta1_tensor = hatsigma_tensor * sqrt(S22_tensor / Delta_tensor)
sebeta2_tensor = hatsigma_tensor * sqrt(S11_tensor / Delta_tensor)
# if(dim(Y)[2] == 1) {
# return(
# data.frame(
# beta1_hat = as.vector(hatbeta1_tensor),
# beta1_se = as.vector(sebeta1_tensor),
# beta2_hat = as.vector(hatbeta2_tensor),
# beta2_se = as.vector(sebeta2_tensor)
# )
# )
# } else {
return(
list(
beta1_hat = to.matrix.tensor(hatbeta1_tensor, 'j', 'k'),
beta1_se = to.matrix.tensor(sebeta1_tensor, 'j', 'k'),
beta2_hat = to.matrix.tensor(hatbeta2_tensor, 'j', 'k'),
beta2_se = to.matrix.tensor(sebeta2_tensor, 'j', 'k')
)
)
# }
}
#' Generate permutation matrix
#'
#' Given the number of observations and the number of permutations, generate the permutated indexes (from 1 to the total number of observations) at each column of output. The number of columns of the output equals to the number of permutation desired.
#'
#' @param n number of permutation (as K)
#' @param nindiv number of observations to permutate (as N)
#' @param seed set.seed(seed) (default = 1)
#'
#' @return N-by-K matrix where each column is permutation of 1:N
#'
#' @examples
#' get_permutations(n = 10, nindiv = 100, seed = 2019)
#'
#' @export
get_permutations = function(n, nindiv, seed = 1) {
if(!is.null(seed)) {
set.seed(seed)
}
sapply(1 : n, function(x) { sample(1:nindiv) })
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.