R/percentNormality.R

percentNormality <- function(DT, p_KS){
    ## Error-checking inputs
    if(p_KS <= 0 || p_KS > 1){
        stop("p_KS must be between [0 1]")
    }

    ## Initialize output
    percentNorm <- matrix(0, nrow = 4, ncol = 1)

    ## Melting 'DT' for transformations and K-S test
    minimumCol <- grep("ExactMass", colnames(DT)) +1
    meltDT <- melt(DT, measure.vars = c(colnames(DT)[minimumCol:(length(DT))]))
    meltDT <- meltDT[!is.na(value)]
    meltDT[which(meltDT[, GeneralID == "STANDARD"]), GeneralID := "Standard"]
    num <- grep("Structural.ID.by.", colnames(meltDT))
    setnames(meltDT, num, "Validation")
    meltDT[, c("dataset", "matrix", "method") := tstrsplit(variable, ";_;", fixed=TRUE)[2:4]]

    ## No transformation; estimate normality
    meltDT[, Mean := mean(as.numeric(value)), by = c("Barcode")]
    meltDT[, Sd := sd(as.numeric(value)), by = c("Barcode")]
    meltDT[, normality := ks.test(as.numeric(value), "pnorm", Mean, Sd)[2], by = c("Barcode")]
    percentNorm[1,1] <- nrow(meltDT[normality > p_KS])/nrow(meltDT)

    ## Log10 transformation; estimate normality
    meltDT[, logValue := log10(abs(as.numeric(value)))]
    meltDT[, Mean := log10(abs(as.numeric(value)))]
    meltDT[, Sd := log10(abs(as.numeric(value)))]
    meltDT[, Mean := mean(Mean), by = c("Barcode")]
    meltDT[, Sd := sd(Sd), by = c("Barcode")]
    meltDT[, normality := ks.test(logValue, "pnorm", Mean, Sd)[2], by = c("Barcode")]
    percentNorm[2,1] <- nrow(meltDT[normality > p_KS])/nrow(meltDT)

    ## Composite Z-score transformation; estimate normality
    meltDT[, "Z.score" := scale(as.numeric(value)), by = "variable"]
    meltDT[, Mean := mean(as.numeric(Z.score)), by = c("Barcode")]
    meltDT[, Sd := sd(as.numeric(Z.score)), by = c("Barcode")]
    meltDT[, normality := ks.test(Z.score, "pnorm", Mean, Sd)[2], by = c("Barcode")]
    percentNorm[3,1] <- nrow(meltDT[normality > p_KS])/nrow(meltDT)

    ## Log10 then composite Z-score transformation; estimate normality
    meltDT[, "Z.score.log" := scale(logValue), by = "variable"]
    meltDT[, Mean := mean(as.numeric(Z.score.log)), by = c("Barcode")]
    meltDT[, Sd := sd(as.numeric(Z.score.log)), by = c("Barcode")]
    meltDT[, normality := ks.test(Z.score.log, "pnorm", Mean, Sd)[2], by = c("Barcode")]
    percentNorm[4,1] <- nrow(meltDT[normality > p_KS])/nrow(meltDT)

    return(percentNorm)
}
jchitpin/blistR documentation built on July 8, 2019, 6:29 p.m.