R/nodupMAP.R

nodupMAP <- function(nbOutput){

    ## Loop over all folds
    for(i in seq_along(nbOutput)){
        ## Loop over j best feature combinations
        for(j in seq_along(nbOutput[[i]])){

            ## Initialize nodupMAP column and define constant for min-heap
            nbOutput[[i]][[j]]$nodupMAP <- FALSE
            constant <- (min(nbOutput[[i]][[j]][, -posterior]) -1)

            ## Initialize temporary column
            #nbOutput[[i]][[j]]$nodupRepeat <- FALSE

            for(k in 1:length(nbOutput[[i]][[j]][, unique(run)])){

                ## Get keys except for posterior == 0 (non-unique heap keys and false candidate assignment)
                ## Also remove any rows with non-unique/duplicated retention times (therefore same posteriors)
                tempDT <- nbOutput[[i]][[j]][run == unique(run)[k] & posterior != 0]
                tempDT <- tempDT[, if(.N == 1) .SD, .(posterior)]
                keys <- tempDT[, -posterior]
                if(any(duplicated(keys)) == T){
                    stop("We have duplicated keys!")
                }

                ## Get list of data table rows for heap values
                tempDT[, "tempIdx" := 1:.N]
                nbOutputValues <- split(tempDT, by = "tempIdx")

                ## Create min heap
                bheap <- binomial_heap("numeric")
                bheap <- insert(bheap, keys, nbOutputValues)

                ## MAP algorithm constrained by new duplicate label assignments
                while(size(bheap) > 0){
                    #if(size(bheap) == 105){
                    #    stop("")
                    #}
                    rm1 <- peek(bheap)[[1]][,Barcode]
                    rm2 <- peek(bheap)[[1]][,Training.Barcode]
                    keep1 <- peek(bheap)[[1]][, posterior]

                    ## Find all runs with same rm1/rm2 but different keep1 and set to FALSE
                    nbOutput[[i]][[j]][run == unique(run)[k] &
                                           (Barcode == rm1 | Training.Barcode == rm2) &
                                           posterior != keep1,
                                       nodupMAP := FALSE]
                    #nbOutput[[i]][[j]][run == unique(run)[k] &
                    #                       (Barcode == rm1 | Training.Barcode == rm2) &
                    #                       posterior != keep1,
                    #                   nodupRepeat := TRUE]

                    ## Find run posteriors (keys) to remove from the heap
                    #changeKey <- nbOutput[[i]][[j]][run == unique(run)[k] &
                    #                                    (Barcode == rm1 | Training.Barcode == rm2) &
                    #                                    posterior != keep1 & posterior != 0 & nodupRepeat != TRUE,
                    #                                    -posterior]
                    changeKey <- nbOutput[[i]][[j]][run == unique(run)[k] &
                                                        (Barcode == rm1 | Training.Barcode == rm2) &
                                                        posterior != keep1 & posterior != 0,
                                                    -posterior]

                    ## nodupMAP TRUE assignment based on peek(bheap)
                    nbOutput[[i]][[j]][run == unique(run)[k] &
                                           Barcode == rm1 &
                                           Training.Barcode == rm2 &
                                           posterior == keep1,
                                       nodupMAP := TRUE]

                    ## Skim top of heap
                    invisible(pop(bheap))

                    if(length(changeKey) > 0){
                        #decrease_key(bheap,
                        #             from=changeKey,
                        #             to=seq(from=constant,
                        #                    to=(constant-1),
                        #                    length.out=length(changeKey)))
                        #for(l in seq_along(changeKey)){
                        #    pop(bheap)
                        #}
                        for(l in seq_along(changeKey)){
                            tryCatch(
                                expr = {
                                    decrease_key(bheap,
                                                 from=changeKey[l],
                                                 to=constant)
                                    pop(bheap)
                                },
                                error = function(e){
                                    invisible()
                                }
                            )
                        }
                    }
                }
            }

            ## Remove temporary column
            #nbOutput[[i]][[j]]$nodupRepeat <- NULL

            ## Progress message
            message(paste0("Completed nodupMAP for feature combination ", j, "/", length(nbOutput[[i]]), " in fold ", i, "/", length(nbOutput)))
        }
    }

    return(nbOutput)
}
jchitpin/blistR documentation built on July 8, 2019, 6:29 p.m.