R/molParser.R

molParser <- function(x){
    ## Convert vector to data table
    x <- as.data.table(x)
    colnames(x) <- "MolecularID"

    ## Substitute delimiters with a singular, consistent one R likes
    x[, MolecularID := gsub("\\(", ";", MolecularID )]
    x[, MolecularID := gsub("\\)", "", MolecularID )]
    x[, MolecularID := gsub(":", ";", MolecularID )]
    x[, MolecularID := gsub("/", ";", MolecularID )]

    ## Parse MolecularID into individual components (attention to linkage!)
    x[, c("Class", "X", "A", "Y", "B") := tstrsplit(MolecularID, ";")[1:5]]

    if(any(x[, Class] %in% c("LPC", "PC", "SM"))){
        x2 <- x[Class %in% c("LPC", "PC", "SM")]
        x <- x[!(Class %in% c("LPC", "PC", "SM"))]
        x2[, c("Linkage_X", "X") := tstrsplit(X, "-")[1:2]]
        x2[, "temp" := grepl("[0-9]", Linkage_X)]
        x2[temp == T, X := Linkage_X]
        x2[temp == T, Linkage_X := "acyl"]
        x2[, temp := NULL]
        setcolorder(x2, c("MolecularID", "Class", "Linkage_X", "X", "A", "Y", "B"))
    }

    if(any(grepl("d|t", x[, Linkage])) == T){
        for(i in grep("d|t", x[, Linkage])){
            tempLink <- gsub("[0-9]", "", x[i, Linkage])
            x[i, c("Linkage", "X") := tstrsplit(Linkage, "d|t")]
            x[i, Linkage := tempLink]
        }
    }

    setcolorder(x, c("MolecularID", "Class", "Linkage", "X", "A", "Y", "B"))

    x[, X := as.numeric(X)]
    x[, A := as.numeric(A)]
    x[, Y := as.numeric(Y)]
    x[, B := as.numeric(B)]
    return(x)
}
jchitpin/blistR documentation built on July 8, 2019, 6:29 p.m.