example.R

library(hyQSAR)

#load dataset already preprocessed
data("hyQSAR_dataset")

nLambda = 100
nCores = 1
n.splits = 9

# run traditional QSAR using only MDS
md.tqsar.res = tqsar(type.transformation = "none", X_train,X_test, y_train,y_test,
                     md_list = colnames(X_train),gene_list = c(),n.splits = n.splits,
                     nLambda = nLambda, nCores = nCores)

# run traditional QSAR using only MOA features
ge.tqsar.res = tqsar(type.transformation = "none", X_train = X_ge[train.idx,],
                     X_test = X_ge[test.idx,], y_train,y_test,
                     md_list = c(), gene_list = colnames(X_ge),
                     n.splits = n.splits,nLambda = nLambda, nCores = nCores)

# create stacking regression model by combining MD and MOA models
stack_res = stacking_regression_wrapper(pred_tr = c(md.tqsar.res$pred_tr,ge.tqsar.res$pred_tr),
                                        pred_te = c(md.tqsar.res$pred_te,ge.tqsar.res$pred_te),
                                        y_test = y_test,y_train =y_train)


# Create hybrid QSAR model by combining MDs and MOA features with the ABS-POW transformation

# fit hybrid models by applying the ABS-POW transformation to both MD and MOA features. This function computes 81 models
# for each combination of the alpha and alpha1 values. The model are also validated by computing up-to-date internal and
# external validation measures according to the OECD principles.
abs.alpha.power.res = hqsar(type.transformation.md = "abs.alpha.power",
                            type.transformation.ge = "abs.alpha.power",
                            X_md_train = X_md[train.idx,],X_md_test = X_md[test.idx,],
                            X_ge_train = X_ge[train.idx,],X_ge_test = X_ge[test.idx,],
                            y_train = y[train.idx],y_test = y[test.idx],
                            alpha = c(0.1,0.25,0.5,0.75,1,1.25,1.5,1.75,2),
                            alpha1 = c(0.1,0.25,0.5,0.75,1,1.25,1.5,1.75,2),
                            type.solution="lci", measure="MSE",
                            n.splits = n.splits,nLambda = nLambda, nCores = nCores)

# this function filter the 81 models computed with the hqsar function according to the accepted threshold for the validation
# metrics. Furthermore it store the filtered and unfiltered metric files in the start_path folder. Eventually it store the
# williams plot, the scatter plot of experimental vs predicted values and the csv with the predicted values in the same folder.

#### NB change the start_path variable or create the results folder!
dir.create("results")
abs.alpha.power.res.filtered.metrics=build_predicted_results(res = abs.alpha.power.res,
                                                             type.transformation="abs_alpha_power_res__abs_alpha_power_res",
                                                             start_path = "results/", xlab = "Experimental HSB", ylab="Predicted HSB")

#### NB check the optimal solution and set the idx variable accordingly
View(abs.alpha.power.res.filtered.metrics)
idx = 40

# compute the stacking regression model combining the prediced results of the 81 models
abs.alpha.power.res_stacking_res_list = stacking_regression_wrapper(pred_tr = abs.alpha.power.res$pred_tr,
                                                                    pred_te = abs.alpha.power.res$pred_te,
                                                                    y_test = abs.alpha.power.res$y_test,
                                                                    y_train = abs.alpha.power.res$y_train)

# Plot the RSVA correlation plot of the 36-th model
plot(abs.alpha.power.res$finalModels[[idx]])

# investigate MDs and MOA features in the selected model
coeff = round(as.numeric(unlist(strsplit(as.character(abs.alpha.power.res$Metrics[paste(idx,sep = ""),c("coefficients")]),split=";"))),4)
MDs = unlist(strsplit(as.character(abs.alpha.power.res$Metrics[paste(idx,sep = ""),"MDlist"]),split = ";"))
GEs = unlist(strsplit(as.character(abs.alpha.power.res$Metrics[paste(idx,sep = ""),"GEList"]),split = ";"))
MOD = cbind(c("Intercept",MDs,GEs),c(round(as.numeric(abs.alpha.power.res$Metrics[paste(idx,sep = ""),"Intercept"]),4),coeff))

# williams plot of the 36-th model
plot_wp(abs.alpha.power.res$williams_plots[[idx]]$DTP)

# scatterplot of the experimental versus predicted values of the 36-th model
plot_pred_real(pred_train=abs.alpha.power.res$pred_tr[[idx]],
               pred_test=abs.alpha.power.res$pred_te[[idx]],
               train_class = abs.alpha.power.res$y_train,
               test_class = abs.alpha.power.res$y_test,xlab = "predicted",ylab = "real")

# Plot correlation graph of the MDs/MOA features of the model
plot_MD_GE_graph(model_info= abs.alpha.power.res.filtered.metrics[paste(idx,sep = ""),],
                 MDMat= rbind(X_md[train.idx,],X_md[test.idx,]),
                 GeneMat= rbind(X_ge[train.idx,],X_ge[test.idx,]),
                 neg_th = 0.9, pos_th = 0.9)
angy89/hyQSAR documentation built on Sept. 24, 2019, 7:31 a.m.