R/processXMLs_Functions.R

Defines functions runParameters summariseConvStats summariseDemuxStats processConvStats processDemultiplex

processDemultiplex <- function(demuxStatsXML){

  if(file.exists(demuxStatsXML)){

  demuxStatsXMLparse <- xmlTreeParse(demuxStatsXML)
  demuxStatsXML_root <- xmlRoot(demuxStatsXMLparse)
  Projects <- demuxStatsXML_root[[1]]
  flowcellID <- xmlAttrs(Projects)
  Projects_Info <- list()
  for(p in seq_along(Projects)){
    Project <- Projects[[p]]
    Project_Name <- xmlAttrs(Project)
    Project_Sample_Info <- list()
    for(s in seq_along(Project)){
      Sample <- Project[[s]]
      Sample_Name <- xmlAttrs(Sample)
      Sample_BarcodeExpected <- Sample[[1]]
      Sample_BarcodeExpected_Name <- xmlAttrs(Sample[[1]])
      Project_Sample_BarcodeExpected_Lane_Info <- list()
      for(b in seq_along(Sample_BarcodeExpected)){
        Lane <- Sample_BarcodeExpected[[b]]
        Lane_Name <- xmlAttrs(Lane)
        Lane_BarcodeCount <- as.integer(xmlValue(Lane[["BarcodeCount"]]))
        Lane_PerfectBarcodeCount <- as.integer(xmlValue(Lane[["PerfectBarcodeCount"]]))
        Project_Sample_BarcodeExpected_Lane_Info[[b]] <- data.frame(BarcodeCount = Lane_BarcodeCount,
                                                                    PerfectBarcodeCount = Lane_PerfectBarcodeCount)
        names(Project_Sample_BarcodeExpected_Lane_Info)[b] <- paste0("Lane",xmlAttrs(Lane))

      }
      psbeliMat <- sapply(Project_Sample_BarcodeExpected_Lane_Info,function(x)x)
      psbeliMatDF <- data.frame(BarcodeStat=rownames(psbeliMat),psbeliMat)
      psbeliMatDF <- tidyr::gather(psbeliMatDF,Lane,Count,-BarcodeStat)

      Project_Sample_Info[[s]] <- data.frame(Barcode = rep(Sample_BarcodeExpected_Name,nrow(psbeliMatDF)),
                                             psbeliMatDF)
      names(Project_Sample_Info)[s] <- Sample_Name

    }
    psiMat <- do.call(rbind,Project_Sample_Info)
    psiMatDF <- data.frame(Sample=gsub("\\..*","",rownames(psiMat)),psiMat)
    Projects_Info[[p]] <- data.frame(Project = rep(Project_Name,nrow(psiMat)),
                                     psiMatDF)
    names(Projects_Info)[p] <- Project_Name

  }
  Projects_DF2 <- do.call(rbind,Projects_Info)
  rownames(Projects_DF2) <- NULL
  return(Projects_DF2)
  }
  return(NULL)
}



processConvStats <- function(ConvStatsXML){

  if(file.exists(ConvStatsXML)){
  convStatsXMLparse <- xmlTreeParse(ConvStatsXML)
  convStatsXML_root <- xmlRoot(convStatsXMLparse)
  Projects <- convStatsXML_root[[1]]
  flowcellID <- xmlAttrs(Projects)

  Projects <- Projects[names(Projects) == "Project"]
  Projects_Info <- list()
  for(p in seq_along(Projects)){
    Project <- Projects[[p]]
    Project_Name <- xmlAttrs(Project)
    Project_Sample_Info <- list()
    for(s in seq_along(Project)){
      Sample <- Project[[s]]
      Sample_Name <- xmlAttrs(Sample)
      Sample_BarcodeExpected <- Sample[[1]]
      Sample_BarcodeExpected_Name <- xmlAttrs(Sample[[1]])
      Project_Sample_BarcodeExpected_Lane_Info <- list()
      Lane_Info <- list()
      for(b in seq_along(Sample_BarcodeExpected)){
        Lane <- Sample_BarcodeExpected[[b]]
        Lane_Name <- xmlAttrs(Lane)
        Tile_Info <- list()
        for(t in seq_along(Lane)){
          Tile <- Lane[[t]]
          Tile_Name <- xmlAttrs(Tile)
          FilterState_Info <- list()
          for(f in seq_along(Tile)){
            FilterState <- Tile[[f]]
            FilterState_Name <- xmlName(FilterState)
            FilterState_ClusterCount <- as.integer(xmlValue(FilterState[["ClusterCount"]]))
            ReadNumber_Info <- list()
            for(r in seq_along(FilterState)[-1]){

              Read <-  FilterState[[r]]
              ReadNumber <-  xmlAttrs(Read)
              ReadNumber_Yield <- as.integer(xmlValue(Read[["Yield"]]))
              ReadNumber_YieldQ30 <- as.integer(xmlValue(Read[["YieldQ30"]]))
              ReadNumber_QualityScoreSum <- as.numeric(xmlValue(Read[["QualityScoreSum"]]))
              ReadNumber_Info[[r-1]] <- data.frame(Yield = ReadNumber_Yield,
                                                   Yield30 = ReadNumber_YieldQ30,
                                                   QualityScoreSum = ReadNumber_QualityScoreSum,
                                                   ClusterCount = FilterState_ClusterCount
                                                   
              )

              names(ReadNumber_Info)[r-1] <- paste0("Read_",ReadNumber)

            }
            rniMat <- do.call(rbind,ReadNumber_Info)
            rniMatDF <- data.frame(ReadNumber=rownames(rniMat),rniMat)
            FilterState_Info[[f]] <- data.frame(Filter = rep(FilterState_Name,nrow(rniMatDF)),
                                                rniMatDF)
            names(FilterState_Info)[f] <- FilterState_Name

          }
          fsiMat <- do.call(rbind,FilterState_Info)
          fsiMatDF <- data.frame(fsiMat)
          Tile_Info[[t]] <- data.frame(Tile = rep(Tile_Name,nrow(fsiMatDF)),
                                       fsiMatDF)
          names(Tile_Info)[t] <- paste0("Tile_",Tile_Name)

        }
        tiMat <- do.call(rbind,Tile_Info)
        tiMatDF <- data.frame(tiMat)
        Lane_Info[[b]] <- data.frame(Lane = rep(Lane_Name,nrow(tiMatDF)),
                                     tiMatDF)
        names(Lane_Info)[b] <- paste0("Lane_",Lane_Name)
      }
      liMat <- do.call(rbind,Lane_Info)
      liMatDF <- data.frame(liMat)
      Project_Sample_Info[[s]] <- data.frame(Sample = rep(Sample_Name,nrow(liMatDF)),
                                             liMatDF)
      names(Project_Sample_Info)[s] <- paste0(Sample_Name)

    }
    psi2Mat <- do.call(rbind,Project_Sample_Info)
    psi2MatDF <- data.frame(psi2Mat)
    Projects_Info[[p]] <- data.frame(Project = rep(Project_Name,nrow(psi2MatDF)),
                                     psi2MatDF)
    names(Projects_Info)[p] <- paste0(Project_Name)

  }
  Projects_DF <- do.call(rbind,Projects_Info)
  rownames(Projects_DF) <- NULL
  return(Projects_DF)
  }
  return(NULL)
}

summariseDemuxStats <- function(demuxProcessed){
  #Lane_Stats4 <- Test %>% filter(Sample != "all" & BarcodeStat == "BarcodeCount") %>% group_by(Project,Sample) %>% summarise(Count=sum(as.numeric(Count)))
  #Lane_Stats <- Projects_DF %>% filter(Sample == "all") %>% group_by(Lane,Filter) %>% summarise(sum(Yield))

  if(!is.null(demuxProcessed)){
  temp <- demuxProcessed %>% tbl_df %>% mutate(Count = as.numeric(Count)) %>%
            filter(Sample != "all" & BarcodeStat == "BarcodeCount") %>%
            filter(Project != "default") # Computing percent label text and position for pie chart
  # temp <- temp %>% group_by(Lane) %>% mutate(labelperc=round(Count/sum(Count),2)*100) %>% group_by(Lane) %>% mutate(pos = cumsum(labelperc)- labelperc/2)
  # p1 <- temp %>% filter(Project != "default") %>% ggplot(aes(x=Project,y=Count,fill=Project))+geom_violin(alpha=0.3,scale="width")+geom_jitter(alpha=0.6)+theme(legend.position="bottom")
  # p2 <- ggplot(temp,aes(x=Lane,y=Count,fill=Sample))+geom_bar(stat = "identity")+theme_bw()+theme(legend.position="bottom")
  # p3 <- ggplot(temp,aes(x=Sample,y=Count,fill=Lane))+geom_bar(stat = "identity")+theme_bw()+coord_flip()+theme(legend.position="bottom")
  # if(plot){
  #   print(p1)
  #   print(p2)
  #   print(p3)
  # }
  #return(list(Summary=temp,Boxplot=p1,StackedBar=p2,Bar=p3))
  # Lane_perTileStats <- demuxProcessed %>% group_by(Lane,Tile,Filter) %>% filter(Sample != "all") %>% summarise(Yield=sum(as.numeric(Yield)))
  # LaneSample_perTileStats <- demuxProcessed %>% group_by(Lane,Sample,Tile,Filter) %>% filter(Sample != "all") %>% summarise(Yield=sum(as.numeric(Yield)))
  #Sample_Stats <- demuxProcessed %>% filter(Sample != "all") %>% group_by(Sample) %>% summarise(Yield=sum(as.numeric(Yield)))
  #Lane_Stats <- demuxProcessed %>% filter(Sample != "all") %>% group_by(Lane) %>% summarise(Yield=sum(as.numeric(Yield)))

  return(list(Summary=temp))
  }
  return(NULL)
}

summariseConvStats <- function(convStatsProcessed){
  #Lane_Stats4 <- Test %>% filter(Sample != "all" & BarcodeStat == "BarcodeCount") %>% group_by(Project,Sample) %>% summarise(Count=sum(as.numeric(Count)))
  #Lane_Stats <- Projects_DF %>% filter(Sample == "all") %>% group_by(Lane,Filter) %>% summarise(sum(Yield))
  if(!is.null(convStatsProcessed)){
  Lane_perTileStats <- convStatsProcessed %>% group_by(Lane,Tile,Filter) %>% filter(Sample != "all") %>% summarise(Yield=sum(as.numeric(Yield)))
  LaneSample_perTileStats <- convStatsProcessed %>% group_by(Lane,Sample,Tile,Filter) %>% filter(Sample != "all") %>% summarise(Yield=sum(as.numeric(Yield)))
  Sample_Stats <- convStatsProcessed %>% filter(Sample != "all") %>% group_by(Sample,Filter) %>% summarise(Yield=sum(as.numeric(Yield)))
  Lane_Stats <- convStatsProcessed %>% filter(Sample != "all") %>% group_by(Lane,Filter) %>% summarise(Yield=sum(as.numeric(Yield)))
  #Lane_Stats <- Projects_DF %>% filter(Sample == "all") %>% group_by(Lane,Filter) %>% summarise(sum(Yield))
  #p3 <- ggplot(data=Lane_perTileStats,aes(x=Lane,y=Yield))+geom_violin()
  #ggplot(data=Sample_Stats,aes(x=Sample,y=Yield,fill=Filter))+geom_boxplot()
  # if(plot){
  #   print(p3)
  # }
  return(list(Lane_perTileStats=Lane_perTileStats,
              LaneSample_perTileStats=LaneSample_perTileStats,
              Sample_Stats=Sample_Stats,
              Lane_Stats=Lane_Stats))
  }
  return(NULL)
}


runParameters <- function(runParameters = NULL){

  xmlFromRunParameters <- xmlParse(runParameters)
  currentRunParameters <- xmlToDataFrame(xmlFromRunParameters)
  currentRunParameters <- currentRunParameters[!is.na(currentRunParameters$ExperimentName),,drop=FALSE]
  currentRunParameters %>% tbl_df
}
Bioconductor-mirror/basecallQC documentation built on June 10, 2017, 7:13 a.m.