Plot the ROC curve of the best model

Share:

Description

Plot the ROC curve of the best model using cross-fold validation. Internal function of tuningParametersCombROC and predictionGW.

Usage

1
  plotROC(datatrain,k,y,different.weight,type,cost,output)

Arguments

datatrain

training set (data.frame)

y

a vector with the list of labels

k

number of iteration for k-fold cross validation default values is 5.

output

name of output file.

different.weight

specify if the classes are unbalanced.TRUE

type

type of kernel to use

cost

parameter cost of SVM

Details

Some detailled description

Value

A pdf with the k-cross-correlation plots for: 1) k-cross-validation, 2) ROC horizontal 3) ROC vertical

Author(s)

Guidantonio Malagoli Tagliazucchi guidantonio.malagolitagliazucchi@unimore.it

See Also

cisREfindbed, tuningParametersCombROC, perftuningParametersCombROC, perfomanceSVM

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
	library("GenomicRanges")
        library("SVM2CRMdata")

        setwd(system.file("data",package="SVM2CRMdata"))
        load("CD4_matrixInputSVMbin100window1000.rda")
        completeTABLE<-CD4_matrixInputSVMbin100window1000

        new.strings<-gsub(x=colnames(completeTABLE[,c(6:ncol(completeTABLE))]),pattern="CD4.",replacement="")
        new.strings<-gsub(new.strings,pattern=".norm.w100.bed",replacement="")
        colnames(completeTABLE)[c(6:ncol(completeTABLE))]<-new.strings

        #list_file<-grep(dir(),pattern=".sort.txt",value=TRUE)

        #train_positive<-getSignal(list_file,chr="chr1",reference="p300.distal.fromTSS.txt",win.size=500,bin.size=100,label1="enhancers")
        #train_negative<-getSignal(list_file,chr="chr1",reference="random.region.hg18.nop300.txt",win.size=500,bin.size=100,label1="not_enhancers")
        setwd(system.file("data",package="SVM2CRMdata"))
        load("train_positive.rda")
        load("train_negative.rda")

        training_set<-rbind(train_positive,train_negative)
        #the colnames of the training set should be the same of data_enhancer_svm
        colnames(training_set)[c(5:ncol(training_set))]<-gsub(x=gsub(x=colnames(training_set[,c(5:ncol(training_set))]),pattern="sort.txt.",replacement=""),pattern="CD4.",replacement="")


        setwd(system.file("extdata", package = "SVM2CRMdata"))
        data_level2 <- read.table(file = "GSM393946.distal.p300fromTSS.txt",sep = "\t", stringsAsFactors = FALSE)
        data_level2<-data_level2[data_level2[,1]=="chr1",]

        DB <- data_level2[, c(1:3)]
        colnames(DB)<-c("chromosome","start","end")

        label <- "p300"

        table.final.overlap<-findFeatureOverlap(query=completeTABLE,subject=DB,select="all")

        data_enhancer_svm<-createSVMinput(inputpos=table.final.overlap,inputfull=completeTABLE,label1="enhancers",label2="not_enhancers")
        colnames(data_enhancer_svm)[c(5:ncol(data_enhancer_svm))]<-gsub(gsub(x=colnames(data_enhancer_svm[,c(5:ncol(data_enhancer_svm))]),pattern="CD4.",replacement=""),pattern=".norm.w100.bed",replacement="")

        listcolnames<-c("H2AK5ac","H2AK9ac","H3K23ac","H3K27ac","H3K27me3","H3K4me1","H3K4me3")

        dftotann<-smoothInputFS(train_positive[,c(6:ncol(train_positive))],listcolnames,k=20)


        results<-featSelectionWithKmeans(dftotann,5)

        resultsFS<-results[[7]]

	resultsFSfilter<-resultsFS[which(resultsFS[,2]>median(resultsFS[,2])),]

        resultsFSfilterICRR<-resultsFSfilter[which(resultsFSfilter[,3]<0.50),]

        listHM<-resultsFSfilterICRR[,1]
        listHM<-gsub(gsub(listHM,pattern="_.",replacement=""),pattern="CD4.",replacement="")

        selectFeature<-grep(x=colnames(training_set[,c(6:ncol(training_set))]),pattern=paste(listHM,collapse="|"),value=TRUE)

        colSelect<-c("chromosome","start","end","label",selectFeature)
        training_set<-training_set[,colSelect]

        vecS <- c(2:length(listHM))
        typeSVM <- c(0, 6, 7)[1]
        costV <- c(0.001, 0.01, 0.1, 1, 10, 100, 1000)[6]
        wlabel <- c("not_enhancer", "enhancer")
        infofile<-data.frame(a=c(paste(listHM,"signal",sep=".")))
        infofile[,1]<-gsub(gsub(x=infofile[,1],pattern="CD4.",replacement=""),pattern=".sort.bed",replacement="")

        tuningTAB <- tuningParametersCombROC(training_set = training_set, typeSVM = typeSVM, costV = costV,different.weight="TRUE", vecS = vecS[1],pcClass=100,ncClass=400,infofile)

        tuningTABfilter<-tuningTAB[tuningTAB$fscore<0.95,]
        #row_max_fscore<-which.max(tuningTABfilter[tuningTABfilter$nHM >2,"fscore"])
        row_max_fscore<-which.max(tuningTABfilter[,"fscore"])
        listHM_prediction<-gsub(tuningTABfilter[row_max_fscore,4],pattern="//",replacement="|")

        columnPR<-grep(colnames(training_set),pattern=paste(listHM_prediction,collapse="|"),value=TRUE)

        predictionGW(training_set=training_set,data_enhancer_svm=data_enhancer_svm, listHM=columnPR,pcClass.string="enhancers",nClass.string="not_enhancers",pcClass=100,ncClas=400,cost=100,type=0,"prediction_enhancers_CD4_results_cost=100_type=0")