TGPred is a R package including six efficient methods for predicting target genes of a transcription factor by integrating statistics, machine learning, and optimization
We also have Python version, please see the following link for the guideline of Python version https://github.com/tobefuture/TGPred.
You can install the released version of TGPred from Github with:
devtools::install_github("xueweic/TGPred")
Xuewei Cao+, Ling Zhang+, Mingxia Zhao, Cheng He, Kui Zhang, Sanzhen Liu, Qiuying Sha, Hairong Wei. TGPred:Efficient methods for predicting target genes of a transcription factor by integrating statistics, machine learning, and optimization.
+ These authors have contributed equally to this work
Any questions? xueweic_AT_mtu_DOT_edu, lingzhan_AT_mtu_DOT_edu
Step 1: Construct the network structure from either Hierarchical Network or Barabasi-Albert Network in simulation studies.
library(TGPred)
N_genes = 200
Adj = ConstructNetwork(N_genes, "HN")
Adj = ConstructNetwork(N_genes, "BAN")
Step 2: Calculate Laplacian matrix and symmetric normalized Laplacian matrix from an adjacency matrix.
Sigma1 = GraphicalModel(Adj)
# - Laplacian matrix
res <- CalculateLaplacian(Adj)
L <- res$L
L_norm <- res$L_norm
Step 3: Simulate y and X from a given network structure (Adjacency matrix and Laplacian matrix).
N_sample <- 300
res = SimulationData(N_sample, N_genes, Adj, Sigma1, "BAN", beta0 = 1)
y = res$y
X = res$X
or
res = SimulationData(N_sample, N_genes, Adj, Sigma1, "HN", beta0 = 1)
Calculate the estimated regression coefficients $\hat{\beta}$
using one of methods solving by APGD or CVX for a given set of $\alpha_0$
and $\lambda_0$
.
lambda0 = 200
alpha0 = 0.5
beta_hat_APGD <- HuberNet_Beta(X, y, Adj, lambda0, alpha0, method="APGD", if.scale=TRUE)
plot(beta_hat_APGD)
# install.packages("CVXR") before your library it.
library("CVXR")
beta_hat_CVX <- HuberNet_Beta(X, y, Adj, lambda0, alpha0, method="CVX", if.scale=TRUE)
plot(beta_hat_CVX)
lambda0 = 200
alpha0 = 0.5
beta_hat_APGD <- HuberENET_Beta(X, y, lambda0, alpha0, method="APGD", if.scale=TRUE)
library("CVXR")
beta_hat_CVX <- HuberENET_Beta(X, y, lambda0, alpha0, method="CVX", if.scale=TRUE)
lambda0 = 200
beta_hat_APGD <- HuberLasso_Beta(X, y, lambda0, method="APGD", if.scale=TRUE)
library("CVXR")
beta_hat_CVX <- HuberLasso_Beta(X, y, lambda0, method="CVX", if.scale=TRUE)
lambda0 = 200
alpha0 = 0.5
beta_hat_APGD <- MSEENET_Beta(X, y, lambda0, alpha0, method="APGD", if.scale=TRUE)
library("CVXR")
beta_hat_CVX <- MSEENET_Beta(X, y, lambda0, alpha0, method="CVX", if.scale=TRUE)
lambda0 = 200
beta_hat_APGD <- MSELasso_Beta(X, y, lambda0, method="APGD", if.scale=TRUE)
library("CVXR")
beta_hat_CVX <- MSELasso_Beta(X, y, lambda0, method="CVX", if.scale=TRUE)
lambda0 = 200
alpha0 = 0.5
beta_hat_APGD <- MSENet_Beta(X, y, Adj, lambda0, alpha0, method="APGD", if.scale=TRUE)
library("CVXR")
beta_hat_CVX <- MSENet_Beta(X, y, Adj,lambda0, alpha0, method="CVX", if.scale=TRUE)
To avoid selecting the optimal tuning parameters $\lambda$
and $\alpha$
, we can use half-sample resampling method to calculate the selection probabilities of each predictor. The grid of $\lambda$
s for a given $\alpha$
from the proposed Huber or MSE loss functions can be calculated by
alpha <- 0.5
n_lambda <- 10
ratio <- 0.01
lambda_set <- Lambda_grid(X, y, n_lambda, alpha, loss_func = "Huber", ratio)
lambda_set <- Lambda_grid(X, y, n_lambda, alpha, loss_func = "MSE", ratio)
The selection probability can be calculated by each method.
## HuberNet
alphas <- seq(0.1,0.9,0.1)
n_lambda <- 10
B0 <- 100
ratio <- 0.01
SP_HuberNet = HuberNet_SP(X, y, Adj ,alphas, n_lambda, ratio, B=B0, gamma=1000, niter=2000, timer=FALSE)
## HuberENET
SP_HuberENET = HuberENET_SP(X, y, alphas, n_lambda, ratio, B=B0, gamma=1000, niter=2000, timer=FALSE)
## MSENet
SP_Net = MSENet_SP(X, y, Adj ,alphas, n_lambda, ratio, B=B0, gamma=1000, niter=2000, timer=FALSE)
## MSEENET
SP_ENET = MSEENET_SP(X, y, alphas, n_lambda, ratio, B=B0, gamma=1000, niter=2000, timer=FALSE)
## HuberLasso
n_lambda <- 50
SP_HuberLasso = HuberLasso_SP(X, y, n_lambda, ratio, B=B0, gamma=1000, niter=2000, timer=FALSE)
## MSELasso
SP_Lasso = MSELasso_SP(X, y, n_lambda, ratio, B=B0, gamma=1000, niter=2000, timer=FALSE)
Example datasets
Example code for HuberNet penalized regression.
library(TGPred)
Sample_data <- TGPred::Sample_data
X <- Sample_data$PWG
y <- Sample_data$TF
Annotation <- Sample_data$Annotation
## obtain adjacency matrix from Annotation file
Annotation <- data.matrix(Annotation)
Adj <- CalculateAdj(Annotation)
## Estimate Regression Coefficients by APGD or CVX
lambda0 = 10
alpha0 = 0.5
beta_hat_APGD <- HuberNet_Beta(X, y, Adj, lambda0, alpha0, method="APGD", gamma=1000, niter=2000, crit_beta=1e-4, crit_obj=1e-8)
library("CVXR")
beta_hat_CVX <- HuberNet_Beta(X, y, Adj, lambda0, alpha0, method="CVX")
## Calculate Selection Probabilities by APGD. (90 alpha-lambda pairs and 100 resampling for each pair.)
alphas <- seq(0.1,0.9,0.1)
n_lambda <- 10
B0 <- 100
ratio <- 0.01
SP_HuberNet = HuberNet_SP(X, y, Adj ,alphas, n_lambda, ratio, B=B0, gamma=1000, niter=2000, timer=FALSE)
You Own datasets
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.