inst/doc/COSNet_v.R

### R code from vignette source 'COSNet_v.Rnw'

###################################################
### code chunk number 1: a1
###################################################
library(COSNet);
library(bionetdata)
data(Yeast.STRING.data)
dim(Yeast.STRING.data)
rownames(Yeast.STRING.data)[1:10]


###################################################
### code chunk number 2: a2
###################################################
data(Yeast.STRING.FunCat)
dim(Yeast.STRING.FunCat)
rownames(Yeast.STRING.FunCat)[1:10]
colnames(Yeast.STRING.FunCat)[1:10]


###################################################
### code chunk number 3: a3
###################################################
## excluding the dummy "00" root
to.be.excl <- which(colnames(Yeast.STRING.FunCat) == "00")
Yeast.STRING.FunCat <- Yeast.STRING.FunCat[, -to.be.excl]
## choosing the first 35 classes
labeling <- Yeast.STRING.FunCat[, 1:35] 
## number of positive labels
colSums(labeling)       
Yeast.STRING.FunCat[Yeast.STRING.FunCat == 0] <- -1


###################################################
### code chunk number 4: a4
###################################################
out <- cosnet.cross.validation(labeling, Yeast.STRING.data, 5, cost=0)


###################################################
### code chunk number 5: a5
###################################################
out.r <- cosnet.cross.validation(labeling, Yeast.STRING.data, 5, cost=0.0001)


###################################################
### code chunk number 6: a6
###################################################
predictions <- out$predictions
scores <- out$scores;
labels <- out$labels;
predictions.r <- out.r$predictions
scores.r <- out.r$scores;
labels.r <- out.r$labels;


###################################################
### code chunk number 7: a7
###################################################
library(PerfMeas);
## computing F-score 
Fs <- F.measure.single.over.classes(labels, predictions);
## Average F-score 
Fs$average[4]
Fs.r <- F.measure.single.over.classes(labels.r, predictions.r);
# Average F-score for the regularized version of COSNet
Fs.r$average[4]
## Computing AUC
labels[labels <= 0] <- 0;
labels.r[labels.r <= 0] <- 0;
auc <- AUC.single.over.classes(labels, scores);
## AUC averaged across classes
auc$average
auc.r <- AUC.single.over.classes(labels.r, scores.r);
## AUC averaged across classes for the regularized version of COSNet
auc.r$average
## Computing precision at different recall levels
PXR <- precision.at.multiple.recall.level.over.classes(labels, 
scores, seq(from=0.1, to=1, by=0.1));
## average PxR
PXR$avgPXR
PXR.r <- precision.at.multiple.recall.level.over.classes(labels.r, 
        scores.r, seq(from=0.1, to=1, by=0.1));
## average PxR for the regularized version of COSNet
PXR.r$avgPXR


###################################################
### code chunk number 8: a8
###################################################
## reading similarity network W
W <- 
as.matrix(read.table(file=paste(sep="", "http://frasca.di.unimi.it/",
 "cosnetdata/u.sum.fly.txt"), sep=" "))
## reading GO annotations
GO.ann.sel <- 
as.matrix(read.table(file=paste(sep="", "http://frasca.di.unimi.it/",
"cosnetdata/GO.ann.fly.15.5.13.3_300.txt"), sep = " ",))
GO.classes <- colnames(GO.ann.sel)
## changing "." to ":"
GO.classes <- unlist(lapply(GO.classes, function(x){
                substr(x, 3, 3) <- ":"; return(x)}))
colnames(GO.ann.sel) <- GO.classes;      


###################################################
### code chunk number 9: a9
###################################################
n<-nrow(W);
## selecting some classes to be predicted
classes <- c("GO:0009605", "GO:0022414", "GO:0032504",
            "GO:0002376", "GO:0009888", "GO:0065003");
labels <- GO.ann.sel[, classes]
## for COSNet negative labels must be -1
labels[labels <= 0] <- -1;
## Determining a random partition for the class GO:0009605 in 3 folds
##    ensuring that each fold has a similar proportion of positives 
folds <- find.division.strat(labels[, 1], 1:n, 3)
## hiding the labels of the test set (the fold of index 1)
labels[folds[[1]], ] <- 0;
## predicting the hidden labels for each class with COSNet
res <- apply(labels, 2, function(x, W, cost){
        return(COSNet(W, x, cost))},
        W = W, cost = 0.0001);


###################################################
### code chunk number 10: a10
###################################################
library(PerfMeas);
## last predicted term
term.ind <- 6;
scores <- res[[term.ind]]$scores;
test.genes <- names(scores);
test.labels <- as.vector(GO.ann.sel[test.genes, term.ind]);
pos.labels <- sum(test.labels > 0)
pos.labels
alpha <- res[[term.ind]]$alpha
gamma <- res[[term.ind]]$c
alpha
gamma
AUC <- AUC.single(scores, test.labels)
AUC
P10R <- precision.at.recall.level(scores, test.labels,
rec.level = 0.1)
P10R


###################################################
### code chunk number 11: a11
###################################################
library(bionetdata);
## similarity matrix DD.chem.data
data(DD.chem.data); 
## label matrix DrugBank.Cat
data(DrugBank.Cat); 


###################################################
### code chunk number 12: a12
###################################################
n <- nrow(DD.chem.data);
drugs <- rownames(DD.chem.data);
drug.category <- c("Cephalosporins");
labels <- as.vector(DrugBank.Cat[, drug.category]);
names(labels) <- rownames(DrugBank.Cat);
## Determining a random partition in 5 folds ensuring that each 
##    fold has a similar proportion of positives
folds <- find.division.strat(labels, 1:n, 5)
labels[labels <= 0] <- -1;
## hiding the test labels (the fold of index 1)
test.drugs <- folds[[1]];
training.drugs <- setdiff(1:n, test.drugs);
labels[test.drugs] <- 0;


###################################################
### code chunk number 13: a13
###################################################
points <- generate_points(DD.chem.data, test.drugs, labels);
str(points)
opt_parameters <- optimizep(points$pos_vect[training.drugs],
                points$neg_vect[training.drugs], labels[training.drugs]);


###################################################
### code chunk number 14: a14
###################################################
## alpha parameter
alpha <- opt_parameters$alpha;
## gamma parameter
gamma <- opt_parameters$c;
## optimal F-score achieved during learning phase 
    ## procedure (see Frasca et al. 2013)
Fscore <- opt_parameters$Fscore;
res <- runSubnet(DD.chem.data, labels, alpha, gamma, cost=0.035);


###################################################
### code chunk number 15: a15
###################################################
library(PerfMeas)
str(res)
res$iter
labels <- as.vector(DrugBank.Cat[, drug.category]);
names(labels) <- rownames(DrugBank.Cat);
test.names <- names(res$scores);
AUC <- AUC.single(res$scores, labels[test.names]);
AUC;
P10R <- precision.at.recall.level(res$scores, 
        labels[test.names], rec.level=0.1);
P10R;
Fs <- F.measure.single(res$state, labels[test.names]);
Fs


###################################################
### code chunk number 16: a16
###################################################
library(bionetdata);
data(DD.chem.data); 
data(DrugBank.Cat); 
labels <- DrugBank.Cat;
labels[labels <= 0] <- -1;
out <- cosnet.cross.validation(labels, DD.chem.data,
        5, cost=0.035);
Fs <- F.measure.single.over.classes(labels, out$predictions);
Fs$average[4];
labels[labels <= 0] <- 0;
auc <- AUC.single.over.classes(labels, out$scores);
auc$average
PXR <- precision.at.multiple.recall.level.over.classes(labels,
        out$scores, seq(from=0.1, to=1, by=0.1));
PXR$avgPXR

Try the COSNet package in your browser

Any scripts or data that you put into this service are public.

COSNet documentation built on Nov. 8, 2020, 8:12 p.m.