Description Usage Arguments Value Author(s) See Also Examples
The model for compound-protein interactions (CPI) takes the pair of SMILES strings of compounds and amino acid sequences (one letter amino acid code) of proteins as input. They are fed into the compound and protein encoders, respectively, and then these encoders are concatenated. Due to the combination of compound and protein encoders, there are many kinds of CPI models. However, the graph neural network such as the graph concolutional network (GCN) is only available for compounds. We need to select one of types of compounds. For graph and fingerprint, the SMILES sequences are not used for encoders, because the information of graph or fingerprint is extracted from the SMILES sequenes and then it is fed into encoders. For sequence, the unigram is used as default, but the n-gram is available only for proteins. Since the CPI model needs some arguments of encoders, we may have to match the names of such arguments.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | fit_cpi(smiles = NULL, AAseq = NULL, outcome,
convert_canonical_smiles = TRUE,
compound_type = NULL, compound_max_atoms,
compound_length_seq, protein_length_seq,
compound_embedding_dim, protein_embedding_dim,
protein_ngram_max = 1, protein_ngram_min = 1,
smiles_val = NULL, AAseq_val = NULL, outcome_val = NULL,
net_args = list(
compound,
compound_args,
protein,
protein_args,
fc_units = c(1),
fc_activation = c("linear"), ...),
net_names = list(
name_compound_max_atoms = NULL,
name_compound_feature_dim = NULL,
name_compound_fingerprint_size = NULL,
name_compound_embedding_layer = NULL,
name_compound_length_seq = NULL,
name_compound_num_tokens = NULL,
name_compound_embedding_dim = NULL,
name_protein_length_seq = NULL,
name_protein_num_tokens = NULL,
name_protein_embedding_dim = NULL),
preprocessor_only = FALSE,
preprocessing = list(
outcome = NULL,
outcome_val = NULL,
convert_canonical_smiles = NULL,
canonical_smiles = NULL,
compound_type = NULL,
compound_max_atoms = NULL,
compound_A_pad = NULL,
compound_X_pad = NULL,
compound_A_pad_val = NULL,
compound_X_pad_val = NULL,
compound_fingerprint = NULL,
compound_fingerprint_val = NULL,
smiles_encode_pad = NULL,
smiles_val_encode_pad = NULL,
compound_lenc = NULL,
compound_length_seq = NULL,
compound_num_tokens = NULL,
compound_embedding_dim = NULL,
AAseq_encode_pad = NULL,
AAseq_val_encode_pad = NULL,
protein_lenc = NULL,
protein_length_seq = NULL,
protein_num_tokens = NULL,
protein_embedding_dim = NULL,
protein_ngram_max = NULL,
protein_ngram_min = NULL),
batch_size, use_generator = FALSE,
validation_split = 0, ...)
predict_cpi(modelRes, smiles = NULL, AAseq = NULL,
preprocessing = list(
canonical_smiles = NULL,
compound_A_pad = NULL,
compound_X_pad = NULL,
compound_fingerprint = NULL,
smiles_encode_pad = NULL,
AAseq_encode_pad = NULL),
use_generator = FALSE,
batch_size = NULL)
|
smiles |
SMILES strings, each column for the element of a pair (default: NULL) |
AAseq |
amino acid sequences, each column for the element of a pair (default: NULL) |
outcome |
a variable that indicates how strong two molecules interact with each other or whether there is an interaction between them |
convert_canonical_smiles |
SMILES strings are converted to canonical SMILES strings if TRUE (default: TRUE) |
compound_type |
"graph", "fingerprint" or "sequence" |
compound_max_atoms |
maximum number of atoms for compounds |
compound_length_seq |
length of compound sequence |
protein_length_seq |
length of protein sequence |
compound_embedding_dim |
dimension of the dense embedding for compounds |
protein_embedding_dim |
dimension of the dense embedding for proteins |
protein_ngram_max |
maximum size of an n-gram for protein sequences (default: 1) |
protein_ngram_min |
minimum size of an n-gram for protein sequences (default: 1) |
smiles_val |
SMILES strings for validation (default: NULL) |
AAseq_val |
amino acid sequences for validation (default: NULL) |
outcome_val |
outcome for validation (default: NULL) |
net_args |
list of arguments for compound and protein encoder networks and for fully connected layer
|
net_names |
list of names of arguments used in both the CPI model and encoder networks, names are set to NULL as default
|
preprocessor_only |
model is not fitted after preprocessing if TRUE (default: FALSE) |
preprocessing |
list of preprocessed results for "fit_cpi" or "predict_cpi", they are set to NULL as default
|
batch_size |
batch size |
use_generator |
use data generator if TRUE (default: FALSE) |
validation_split |
proportion of validation data, it is ignored when there is a validation set (default: 0) |
modelRes |
result of the "fit_cpi" |
... |
additional parameters for the "keras::fit" or "keras::fit_generator" |
model
Dongmin Jung
keras::compile, keras::fit, keras::fit_generator, keras::layer_dense, keras::keras_model, purrr::pluck, webchem::is.smiles
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | compound_max_atoms <- 50
protein_embedding_dim <- 16
protein_length_seq <- 100
gcn_cnn_cpi <- fit_cpi(
smiles = example_cpi[1:100, 1],
AAseq = example_cpi[1:100, 2],
outcome = example_cpi[1:100, 3],
compound_type = "graph",
compound_max_atoms = compound_max_atoms,
protein_length_seq = protein_length_seq,
protein_embedding_dim = protein_embedding_dim,
net_args = list(
compound = "gcn_in_out",
compound_args = list(
gcn_units = c(128, 64),
gcn_activation = c("relu", "relu"),
fc_units = c(10),
fc_activation = c("relu")),
protein = "cnn_in_out",
protein_args = list(
cnn_filters = c(32),
cnn_kernel_size = c(3),
cnn_activation = c("relu"),
fc_units = c(10),
fc_activation = c("relu")),
fc_units = c(1),
fc_activation = c("sigmoid"),
loss = "binary_crossentropy",
optimizer = keras::optimizer_adam(),
metrics = "accuracy"),
epochs = 2, batch_size = 16)
pred <- predict_cpi(gcn_cnn_cpi, example_cpi[101:110, 1], example_cpi[101:110, 2])
gcn_cnn_cpi2 <- fit_cpi(
preprocessing = gcn_cnn_cpi$preprocessing,
net_args = list(
compound = "gcn_in_out",
compound_args = list(
gcn_units = c(128, 64),
gcn_activation = c("relu", "relu"),
fc_units = c(10),
fc_activation = c("relu")),
protein = "cnn_in_out",
protein_args = list(
cnn_filters = c(32),
cnn_kernel_size = c(3),
cnn_activation = c("relu"),
fc_units = c(10),
fc_activation = c("relu")),
fc_units = c(1),
fc_activation = c("sigmoid"),
loss = "binary_crossentropy",
optimizer = keras::optimizer_adam(),
metrics = "accuracy"),
epochs = 2, batch_size = 16)
pred <- predict_cpi(gcn_cnn_cpi2, preprocessing = pred$preprocessing)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.