R/getCharts.R

Defines functions createTable getORGLM readScore

readScore <- function(inData, inControl, inNumBins=4){
	## Function that reads in data and fam file and will split
	## data into 4 equal parts unless otherwise specified
	#inData <- fread(inFile, stringsAsFactors=F)
  inData <- inData
  inControl <- inControl
  #inFam <- fread(inFamFile, stringsAsFactors=F, header=F)
	## Assert that data is what we expect
	# Num of cols are 4
	# No duplicated samples and no na PRS scores
	assertthat::assert_that(ncol(inData) == 3)
	assertthat::assert_that(any(duplicated(inData$IID)) == FALSE)
	assertthat::assert_that(any(is.na(as.numeric(inData$PRS))) == FALSE)
  # If there is overlap between the base file and target file
  # use controls to create deciles else use target set
  setkey(inData, IID)
  setkey(inControl, IID)
  needCols <- c("V2", "V6")
	assertthat::assert_that(ncol(inControl) == 3)
	assertthat::assert_that(any(duplicated(inControl$IID)) == FALSE)
	assertthat::assert_that(any(is.na(as.numeric(inControl$PRS))) == FALSE)
  #inFam <- inFam[,needCols,with=FALSE]
  #inFam$V6 <- as.numeric(inFam$V6)
  #assertthat::assert_that(any(is.na(as.numeric(inFam$V6))) == FALSE)
  nrowInit <- nrow(inData)
  #inData <- inData[inFam, nomatch=0]
  inControl[,subject_type := "control"]
  inData[,subject_type := "case"]
  inTotal <- rbind(inData, inControl)
  #assert_that(nrow(inData) == nrowInit)
  ## Need to be careful with ifelse
  inTotal$PRS <- as.numeric(inTotal$PRS)
  assertthat::assert_that(any(is.na(as.numeric(inTotal$PRS))) == FALSE)
  assertthat::assert_that(length(unique(inTotal$subject_type)) == 2)
  assertthat::assert_that(all.equal(sort(unique(inTotal$subject_type)), c("case", "control")))
  inTotal <- tryCatch({
    inTotal$bin_PRS <- -1
    breaksD <- unique(as.numeric(as.character(quantile(as.numeric(inTotal[subject_type == "control",PRS]), probs = seq(0.25, 1, by = 1/inNumBins)))))
    seqC <- as.numeric(seq(0.25,1-1/inNumBins, by=1/inNumBins))
    if(length(unique(breaksD)) < 2){
      inNumBins <- length(unique(inTotal$PRS))-1
      seqC <- as.numeric(seq(1/inNumBins,1-1/inNumBins, by=1/inNumBins))
      inTotal[,bin_PRS := as.numeric(as.character(cut(as.numeric(PRS), breaks=c(sort(unique(inTotal$PRS)))[1:inNumBins], labels=seqC, include.lowest = TRUE, right=TRUE)))]
    } else {
      inTotal[,bin_PRS := as.numeric(as.character(cut(as.numeric(PRS), breaks=unique(breaksD), labels=seqC, include.lowest = TRUE, right=TRUE)))]
    }
    inTotal[,bin_PRS := ifelse(is.na(bin_PRS), 1, bin_PRS)]
    assertthat::assert_that(nrow(inTotal[bin_PRS == -1,]) == 0)
    inTotal
  }, error = function(e){
    ##Default behaviour if inNumBins is not defined
    inTotal[,bin_PRS := as.numeric(as.character(cut(inTotal$PRS, breaks=c(quantile(inTotal$PRS, probs = seq(0, 1, by = 0.1))), labels=seq(0,0.9, by=0.1), include.lowest = FALSE, right=TRUE)))]
  })
  minBin <- min(as.numeric(inTotal$bin_PRS))
  inTotal[,is_bott := ifelse(bin_PRS == minBin, TRUE,FALSE)]
  assertthat::assert_that(length(unique(inTotal[is_bott == TRUE, bin_PRS])) == 1)
  assertthat::assert_that(length(unique(inTotal[is_bott == FALSE, bin_PRS])) == (inNumBins-1))
  #assertthat::assert_that(!(c("0.25") %in% as.character(unique(inTotal[is_bott == FALSE, bin_PRS]))))
  #assertthat::assert_that(c("0.25") %in% as.character(unique(inTotal[is_bott == TRUE, bin_PRS])))
  #assert_that(nrow(unique(inTotal)) == nrow(inTotal))
  inTotal <- unique(inTotal)
  #fwrite(inTotal[bin_PRS == 0.75& subject_type=="case",],paste0("~/quantilePlot/",unique(inTotal$PGS_RECORD_ID), ".csv"))
	return(inTotal)
}

getORGLM <- function(inGLMMod){
  inGLM <- inGLMMod$logitModel
  inQuant <- inGLMMod$quant
  if(inQuant != 1){
    retFrame <- tryCatch({
      oddsR <- questionr::odds.ratio(inGLM, level=0.95)
      assertthat::assert_that(length(oddsR$OR) == 2)
      assertthat::assert_that(is.numeric(inQuant))
      retFrame <- cbind(quantile=inQuant, lowerBound=oddsR$`2.5 %`[2], oddsRatio=oddsR$OR[2], upperBound=oddsR$`97.5 %`[2])
    }, error = function(e){
      inORs <- exp(summary(inGLM)$coefficients["PRS",1] +
       qnorm(c(0.025,0.5,0.975)) * summary(inGLM)$coefficients["PRS",2])
      retFrame <- cbind(quantile=inQuant, lowerBound=inORs[1], oddsRatio=inORs[2], upperBound=inORs[3])
    })
  } else {
    retFrame <- cbind(quantile=inQuant, lowerBound=1, oddsRatio=1, upperBound=1)
  }
  # No DT because of doubles
  return(retFrame)
}

createTable <- function(inQuant, inData, inNumBins=4){
  inData$bin_data <- inData$bin_PRS*inNumBins
  botData <- inData[bin_data == 1 ,uniqueN(IID), by=.(subject_type, bin_data)]
  botData$category <- "z_bottom"
  quantData <- inData[bin_data == inQuant ,uniqueN(IID), by=.(subject_type, bin_data)]
  quantData$category <- "quantile"
  contTable <- rbind(botData, quantData)
  if(nrow(contTable[subject_type == "case",])  == 0){
    nullRow <- data.table::data.table(subject_type = c("case", "case"), bin_data
= rep(unique(contTable$bin_data),2), V1=c(0, 0),  category=c("z_bottom", "quantile"))
    contTable <- rbind(contTable, nullRow)
    contTable$V1 <- as.numeric(contTable$V1)
  }
  return(list(contTable=data.table::dcast(contTable, subject_type ~ category, value.var="V1", fun.aggregate = sum), quant=inQuant))
}

getORContT <- function(inData){
  contTable <- inData$contTable
  contTable$subject_type <- NULL
  contTable <- as.matrix(contTable)
  inQuant <- inData$quant
  if(inQuant != 1){
    oddsR <- questionr::odds.ratio(contTable, level=0.95)
    assertthat::assert_that(length(oddsR$OR) == 1)
    assertthat::assert_that(is.numeric(inQuant))
    retFrame <- cbind(quantile=inQuant, lowerBound=oddsR$`2.5 %`[1], oddsRatio=oddsR$OR[1], upperBound=oddsR$`97.5 %`[1])
  } else {
    retFrame <- cbind(quantile=inQuant, lowerBound=1, oddsRatio=1, upperBound=1)
  }
  return(retFrame)
}

getQuantilePlot <- function(inData){
  inDate <- "2020-02-17"
  inDir <- paste0("/home/114/sk3015/Analysis/makeGRS/quantilePlot/",inDate)
  #dir.create(inDir, showWarnings=FALSE)
  inFile <- paste0(inDir,"/boxplot.png")
  ggplot2::theme_set(cowplot::theme_cowplot())
  png(filename=inFile, width=1920, height=1080)
  ggplot2::ggplot(data.frame(inData, stringsAsFactors=F), ggplot2::aes(x=quantile, y=oddsRatio)) +
  ggplot2::geom_line()+
  ggplot2::geom_pointrange(ggplot2::aes(ymin=lowerBound, ymax=upperBound))
  dev.off()
}

getPlots <- function(inDis, inControl, inOutDir=NULL){
  disData <- inDis
  if(is.null(inControl)){
    print("Using 1000G Control Samples as specified or due to insufficient control samples")
    controlData <- data.table::fread(system.file("extdata", "control_samples.csv", package="PGSCatalogDownloader"), stringsAsFactors=F)
  } else {
    controlData <- inControl
  }
  lPLots <- lapply(list(unique(disData$PGS_RECORD_ID)), function(x){
    scoreData <- readScore(disData[PGS_RECORD_ID == x,], controlData[PGS_RECORD_ID == x,])
    scoreData[,risk := ifelse(as.numeric(bin_PRS) <= 0.25, 'Low Risk', ifelse(as.numeric(bin_PRS) > 0.75, "High Risk", "Medium Risk"))]
    aggScore <- scoreData 
    aggScore <- scoreData[subject_type == "case",]
    needCols <- c("IID", "PRS","risk")
    if(is.null(inControl)){
      needScore <- aggScore[,needCols, with=FALSE]
    } else {
      needCols <- c("IID", "subject_type","PRS","risk")
      needScore <- scoreData[,needCols, with=FALSE]
      colnames(needScore) <- c("Sample", "Subject Type", "PRS","Risk")
    }
    fwrite(needScore, paste(inOutDir, "sample_out.csv", sep="/"))
    if(length(unique(needScore$PRS)) <= 4){
      logger::log_info(sprintf("Not Able to create quantiles based off data"))
      return(NULL)
    }
    inContTable <- lapply(1:4, function(x)createTable(inQuant=x, inData=scoreData))
    orDF <- do.call("rbind", lapply(inContTable, getORContT))
    orDF <- setDT(data.frame(orDF, stringsAsFactors=F))
    ##ifelse not used in case of factor
    orDF[,upperBound := ifelse(is.infinite(upperBound), as.numeric(lowerBound), as.numeric(upperBound))]
    ggplot2::theme_set(cowplot::theme_cowplot())
    return(ggplot2::ggplot(data.frame(orDF, stringsAsFactors=F), ggplot2::aes(x=quantile, y=oddsRatio)) +
    ggplot2::geom_pointrange(ggplot2::aes(ymin=lowerBound, ymax=upperBound)) + ggplot2::ggtitle(x))
  })
  return(lPLots)
}

setPlots <- function(inDis, inControl=NULL, inOutDir=NULL){
  #allPlots <- getPlots(inFiles$inFile, inFiles$inRecord, inControl,inOutDir)
  allPlots <- getPlots(inDis, inControl, inOutDir)
  inTime <- Sys.time()
  inFile <- paste(inOutDir, "boxplot.png", sep="/")
  #png(filename=inFile, width=1920, height=1080)
  ggplot2::ggsave(filename=inFile, plot=cowplot::plot_grid(plotlist=allPlots))
  #dev.off()
}
VCCRI/PGSCatalogCalculator documentation built on June 25, 2021, 5:36 a.m.