knitr::opts_chunk$set(results='asis', echo=FALSE, warning=FALSE)
# needed for trouble shooting
#myConfig <- file.path(system.file(package="ContDataQC"), "extdata", "config.ORIG.R")
#source(myConfig)

DATA FILE INFORMATION

  # Report Info
  myReportDate <- format(Sys.Date(),ContData.env$myFormat.Date)
  cat(paste("**Report Date:** ",myReportDate,"\n\n",sep=""))
  myUser <- Sys.getenv("USERNAME")
  cat(paste("**Generate By:** ",myUser,"\n\n",sep=""))

  #filename
  cat("**Filename:** ",strFile,"\n\n",sep="")

  mySiteID <- data.import[1,ContData.env$myName.SiteID]
  cat(paste("**SiteID:** ",mySiteID,"\n\n",sep=""))

  if(exists("fun.myData.DateRange.Start")==TRUE){
    POR.Requested <- paste(fun.myData.DateRange.Start," to ",fun.myData.DateRange.End, sep="")
  } else {
    POR.Requested <- "NA"
  }
  cat(paste("**Period of Record, Requested:** ",POR.Requested,sep="",collapse="\n\n"))

  #myTimeDiff <- difftime(data.import[11,ContData.env$myName.DateTime],data.import[10,ContData.env$myName.DateTime],units="mins")
  #x <- data.import[,ContData.env$myName.DateTime]
  myT <- strptime(data.import[,ContData.env$myName.DateTime],format=ContData.env$myFormat.DateTime)
  myTimeDiff.all <- difftime(myT[-1],myT[-length(myT)],units="mins")
  myTimeDiff <- median(as.vector(myTimeDiff.all),na.rm=TRUE)

  cat(paste("\n\n**Period of Record, Actual:** ",min(data.import[,ContData.env$myName.Date])," to ",max(data.import[,ContData.env$myName.Date]),"\n\n",sep=""))

  cat(paste("**Recording Interval:** ",myTimeDiff[1]," minutes\n\n",sep=""))

  if(exists("strFile.DataType")==TRUE){
    myDataType <- strFile.DataType
  } else {
    myDataType <- "NA"
  }
  cat(paste("**Data Type:** ",myDataType,"\n\n",sep=""))  # need to do better

   myParameters.ALL     <- ContData.env$myNames.DataFields[ContData.env$myNames.DataFields %in% names(data.import)==TRUE] 
   myParameters.Lab.ALL <- ContData.env$myNames.DataFields.Lab[ContData.env$myNames.DataFields %in% names(data.import)==TRUE]
   # Filter out Discrete
   myParameters     <- myParameters.ALL[!grepl(ContData.env$myPrefix.Discrete,myParameters.ALL)]
   myParameters.Lab <- myParameters.Lab.ALL[!grepl(ContData.env$myPrefix.Discrete,myParameters.Lab.ALL)]


 #cat("**Parameters:** ",paste(myParameters.Lab,", ",sep=""),"\n\n",sep="")
  # above line not working, preventing pandoc conversion to WORD
  #cat("**Included Parameters:** \n\n")
  cat("**Parameters Included:** ",paste(myParameters.ALL,", ",sep=""),"\n\n",sep="")

DATA SUMMARY, OVERALL

    #
    cat("Number of records by year and month (with totals).")
    cat("\n")
    myTable <- addmargins(table(data.import[,ContData.env$myName.Yr],data.import[,ContData.env$myName.Mo]))
        print(knitr::kable(myTable, format="markdown", row.names=TRUE))
    cat("\n\n")


  # revert to longer table (20170228)
    cat("Number of records by day and month (with totals).")
    cat("\n")
    myTable <- addmargins(table(data.import[,ContData.env$myName.Day],data.import[,ContData.env$myName.Mo]))
        print(knitr::kable(myTable, format="markdown", row.names=TRUE))
    cat("\n\n") 

#     
#     pandoc.table(myTable.month.day.rec.GT15,style="rmarkdown")
#    
#     
    myTable.month.day.rec <- table(data.import[,ContData.env$myName.Mo],data.import[,ContData.env$myName.Day])
#     use apply function to count number of records not equal to the expected value
    cat("\n\n")


    #
    myTable.DateTime.N <- aggregate(data.import[,ContData.env$myName.SiteID] ~ data.import[,ContData.env$myName.DateTime]
                                    , data=data.import,FUN=length)
    # myTable <- myTable.DateTime.N[myTable.DateTime.N[,ContData.env$myName.SiteID]!=1,]
    # cat(paste0("Date and time records with more than one entry; N=",nrow(myTable),". Duplicate date.time records are shown below."))
    # cat("\n")
    #   print(knitr::kable(myTable, format="markdown", row.names=TRUE))

    # Missing Dates
     myDateRange.Data <- seq(as.Date(min(data.import[,ContData.env$myName.Date]))
                            ,as.Date(max(data.import[,ContData.env$myName.Date]))
                            ,by="day")

    if(exists("fun.myData.DateRange.Start")==TRUE){
      myDateRange.Target <- seq(as.Date(fun.myData.DateRange.Start)
                              ,as.Date(fun.myData.DateRange.End)
                              ,by="day")
    } else {
      myDateRange.Target <- myDateRange.Data
    }

    myDateRange.Target.Missing <- myDateRange.Target[!myDateRange.Target %in% myDateRange.Data]
    cat(paste0("Below are the dates with missing data between the min (",min(myDateRange.Data)
               ,") and max(",max(myDateRange.Data)," for the provided data. There are "
               ,length(myDateRange.Target.Missing)," records."))
    cat("\n\n")
    print(ifelse(length(myDateRange.Target.Missing)==0,"",myDateRange.Target.Missing))     

PLOTS, PARAMETERS

 # 3. Data Info, by Parameter
    # Individual Data Type Report
    # QC
    #i <- myParameters[1]

     for (i in myParameters) {##FOR.i.START
#       #
       i.num <- match(i,myParameters)
       myTitle.Sub <- myParameters.Lab[i.num]

       # 3.3. Plot
       # remove NA (but some could be intentional)
       #data.plot <- data.import[is.na(as.numeric(data.import[,i]))==FALSE,]
       data.plot <- data.import


      plot_format <- "ggplot" # "base" or "ggplot"
      #
      if(plot_format=="base"){##IF~plot_format~START 
        #plot, base ####
        #data.plot <- data.import
        myPlot.Y <- data.plot[,i]

        # cheat on Date/Time axis
        n.Total <- length(data.plot[,ContData.env$myName.Date])
        pct <- c(20,40,60,80,100)*.01
        myAT <- c(1,round(n.Total * pct,0))
        myLab <- data.plot[,ContData.env$myName.Date][myAT]
        #
        # X axis as time
        #myPlot.X <- strptime(data.import[,myName.DateTime],myFormat.DateTime)

        myPlot.Ylab <- myParameters.Lab[i.num]
       # plot(myPlot.Y,type="l",main=mySiteID,xlab=myLab.Date,ylab=myPlot.Ylab,col="gray", xaxt="n")
         plot(myPlot.Y,type="p",main=mySiteID,xlab=ContData.env$myLab.Date,ylab=myPlot.Ylab,col="gray", xaxt="n",cex=0.5)
        axis(1,at=myAT,labels=myLab,tick=TRUE)
        # Add discrete (only if present)
        myDiscrete <- paste(ContData.env$myPrefix.Discrete,i,sep=".")
        if (myDiscrete %in% myParameters.ALL == TRUE){##IF.Discrete.START
          data.plot[,myDiscrete] <- as.numeric(data.plot[,myDiscrete]) # coerce "NA" to NA
          data.plot.points <- na.omit(data.plot[,c(ContData.env$myName.DateTime,myDiscrete)]) # remove NA
          myPoints.X <- as.numeric(rownames(data.plot.points))
          myPoints.Y <- data.plot.points[,myDiscrete]
          points(myPoints.Y~myPoints.X,col="black",pch=19) 
        }##IF.Discrete.END
        cat("\n\n")
        cat("Discrete measurements, if any, show up as points on the plot.")
        cat("\n\n")
      } else if(plot_format=="ggplot"){
        # plot, ggplot ####
        data.plot[, ContData.env$myName.DateTime] <- as.POSIXct(data.plot[, ContData.env$myName.DateTime])
        # ggplot, main
        scale_lab   <- c("Continuous", "Discrete")
        scale_col   <- c("dark gray", "black")
        scale_shape <- c(21, 24)
        scale_fill  <- scale_col
        p_i <- ggplot2::ggplot(data=data.plot, ggplot2::aes_string(x=ContData.env$myName.DateTime, y=i)) +
                ggplot2::geom_point(ggplot2::aes(color="continuous"
                                                 , shape="continuous"
                                                 , fill="continuous"), na.rm=TRUE) + 
                ggplot2::scale_x_datetime(date_labels = "%Y-%m-%d") + 
                ggplot2::labs(title=mySiteID, x=ContData.env$myLab.Date, y=myParameters.Lab[i.num]) +
                ggplot2::theme(plot.title=ggplot2::element_text(hjust=0.5)
                               , plot.subtitle=ggplot2::element_text(hjust=0.5))
        # ggplot, discrete points
        # Add discrete (only if present)
        myDiscrete <- paste(ContData.env$myPrefix.Discrete,i,sep=".")
        if (myDiscrete %in% myParameters.ALL == TRUE){##IF.Discrete.START
          p_i <- p_i + ggplot2::geom_point(data=data.plot
                                           , ggplot2::aes_q(x=as.name(ContData.env$myName.DateTime)
                                                            , y=as.name(myDiscrete)
                                                            , color="discrete"
                                                            , shape="discrete"
                                                            , fill="discrete"), na.rm=TRUE) 
        } else {
         p_i <- p_i + ggplot2::geom_blank(ggplot2::aes(color="discrete"
                                                       , shape="discrete"
                                                       , fill="discrete"))
        }##IF.Discrete.END
        # ggplot, Legend
        p_i <- p_i + 
                ggplot2::scale_color_manual(name="Data Type", labels=scale_lab, values=scale_col) +
                ggplot2::scale_shape_manual(name="Data Type", labels=scale_lab, values=scale_shape) +
                ggplot2::scale_fill_manual(name="Data Type", labels=scale_lab, values=scale_fill) 
        # ggplot, show 
        print(p_i)
        cat("\n\n")
      }##IF~plot_format~END 

      #
    }##FOR.i.END

PLOTS, MULTIPARAMETER

Multiparameter plots if exist in the data.

 # BOTH plots

    # 3.3. Plot
    data.plot <- data.import


    plot_format <- "ggplot" # "base" or "ggplot"
    #
    if(plot_format=="base"){##IF~plot_format~START
    # plot, base ####
    #
    # cheat on Date/Time axis
    n.Total <- length(data.plot[,ContData.env$myName.Date])
    pct <- c(20,40,60,80,100)*.01
    myAT <- c(1,round(n.Total * pct,0))
    myLab <- data.plot[,ContData.env$myName.Date][myAT]
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Need to check for parameters before plot
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  

    # Temp, Air vs. Water
    if (ContData.env$myName.AirTemp %in% myParameters & ContData.env$myName.WaterTemp %in% myParameters == TRUE){##IF.Temp.START
      cat("## PLOT, Temperature (Air vs. Water) \n\n" )

      myPlot.Y <- data.plot[,ContData.env$myName.AirTemp]
      myPlot.Y2 <- data.plot[,ContData.env$myName.WaterTemp]
      myPlot.Ylab <- ContData.env$myLab.Temp.BOTH
      #plot(myPlot.Y,type="l",main=mySiteID,xlab=myLab.Date,ylab=myPlot.Ylab,col="green", xaxt="n")
      plot(myPlot.Y,type="p",main=mySiteID,xlab=ContData.env$myLab.Date,ylab=myPlot.Ylab,col="green", xaxt="n",cex=0.5)
      axis(1,at=myAT,labels=myLab,tick=TRUE)
      #lines(myPlot.Y2,type="l",col="blue")
      points(myPlot.Y2,type="p",col="blue",cex=0.5)
      legend(x="bottomright",lty=1,col=c("green","blue"),legend=c("air","water"))
      # Add discrete, AirTemp (only if present)
      myDiscrete <- paste(ContData.env$myPrefix.Discrete,ContData.env$myName.AirTemp,sep=".")
      if (myDiscrete %in% myParameters.ALL == TRUE){##IF.Discrete.START
        data.plot[,myDiscrete] <- as.numeric(data.plot[,myDiscrete]) # coerce "NA" to NA
        data.plot.points <- na.omit(data.plot[,c(ContData.env$myName.DateTime,myDiscrete)]) # remove NA
        myPoints.X <- as.numeric(rownames(data.plot.points))
        myPoints.Y <- data.plot.points[,myDiscrete]
        points(myPoints.Y~myPoints.X,col="green",pch=19) 
      }##IF.Discrete.END
      # Add discrete, Water.Temp (only if present)
      myDiscrete <- paste(ContData.env$myPrefix.Discrete,ContData.env$myName.WaterTemp,sep=".")
      if (myDiscrete %in% myParameters.ALL == TRUE){##IF.Discrete.START
        data.plot[,myDiscrete] <- as.numeric(data.plot[,myDiscrete]) # coerce "NA" to NA
        data.plot.points <- na.omit(data.plot[,c(ContData.env$myName.DateTime,myDiscrete)]) # remove NA
        myPoints.X <- as.numeric(rownames(data.plot.points))
        myPoints.Y <- data.plot.points[,myDiscrete]
        points(myPoints.Y~myPoints.X,col="blue",pch=19) 
      }##IF.Discrete.END
      # Future mod, add points to legend
      cat("\n\n")
      cat("Discrete measurements, if any, show up as points on the plot.")
      cat("\n\n")

      #
    }##IF.Temp.END
    #
    # Water, Temp vs Level
    if (ContData.env$myName.WaterTemp %in% myParameters & ContData.env$myName.SensorDepth %in% myParameters == TRUE){##IF.Temp_Level.START
      cat("## PLOT, Sensor Depth vs. Water Temperature \n\n")

      par.orig <- par(no.readonly=TRUE) # save original par settings
        par(oma=c(0,0,0,2))
        myPlot.Y <- data.plot[,ContData.env$myName.WaterTemp]
        myPlot.Ylab <- ContData.env$myLab.WaterTemp
        myPlot.Y2 <- data.plot[,ContData.env$myName.SensorDepth]
        myPlot.Y2lab <- ContData.env$myLab.SensorDepth
        #
        #plot(myPlot.Y,type="l",main=mySiteID,xlab=myLab.Date,ylab=myPlot.Ylab,col="blue", xaxt="n")
        plot(myPlot.Y,type="p",main=mySiteID,xlab=ContData.env$myLab.Date,ylab=myPlot.Ylab,col="blue", xaxt="n",cex=0.5)
        axis(1,at=myAT,labels=myLab,tick=TRUE)
        # Add discrete, Water.Temp (only if present)
        myDiscrete <- paste(ContData.env$myPrefix.Discrete,ContData.env$myName.WaterTemp,sep=".")
        if (myDiscrete %in% myParameters.ALL == TRUE){##IF.Discrete.START
          data.plot[,myDiscrete] <- as.numeric(data.plot[,myDiscrete]) # coerce "NA" to NA
          data.plot.points <- na.omit(data.plot[,c(ContData.env$myName.DateTime,myDiscrete)]) # remove NA
          myPoints.X <- as.numeric(rownames(data.plot.points))
          myPoints.Y <- data.plot.points[,myDiscrete]
          points(myPoints.Y~myPoints.X,col="blue",pch=19) 
        }##IF.Discrete.END
        #
        # Add 2nd y axis (2nd color is black)
        par(new=TRUE)
        #plot(myPlot.Y2,type="l",col="black",axes=FALSE,ann=FALSE)
        plot(myPlot.Y2,type="p",col="black",axes=FALSE,ann=FALSE,cex=0.5)
        axis(4)
        mtext(myPlot.Y2lab,side=4,line=2.5)
        # Add discrete, SensorDepth (only if present)
        myDiscrete <- paste(ContData.env$myPrefix.Discrete,ContData.env$myName.SensorDepth,sep=".")
        if (myDiscrete %in% myParameters.ALL == TRUE){##IF.Discrete.START
          data.plot[,myDiscrete] <- as.numeric(data.plot[,myDiscrete]) # coerce "NA" to NA
          data.plot.points <- na.omit(data.plot[,c(ContData.env$myName.DateTime,myDiscrete)]) # remove NA
          myPoints.X <- as.numeric(rownames(data.plot.points))
          myPoints.Y <- data.plot.points[,myDiscrete]
          points(myPoints.Y~myPoints.X,col="black",pch=19) 
        }##IF.Discrete.END
        #
        # Future mod, add points to legend
        # LEGEND
        par(mar=c(0,0,0,0))
        plot.new()
        legend(x="center",lty=1,col=c("blue","black"),legend=c("temperature","sensor depth"),bty="n")
    #
        #
      par(par.orig) # return to original par settings
      cat("\n\n")
      cat("Discrete measurements, if any, show up as points on the plot.")
      cat("\n\n")
      }##IF.Temp_Level.END
      #
    } else if(plot_format=="ggplot"){
    # plot, ggplot ####
      # date to POSIX
      data.plot[, ContData.env$myName.DateTime] <- as.POSIXct(data.plot[, ContData.env$myName.DateTime])
      #
      if (ContData.env$myName.AirTemp %in% myParameters & ContData.env$myName.WaterTemp %in% myParameters ){##IF.Temp.START
        cat("## PLOT, Temperature (Air vs. Water) \n\n" )
        #
        # ggplot, main
        scale_lab   <- c("Air", "Water")
        scale_col   <- c("green", "blue")
        scale_fill  <- scale_col   
        scale_shape <- c(21, 21)

        p2_t_aw <- ggplot2::ggplot(data=data.plot, ggplot2::aes(color="a"
                                                                , fill="b"
                                                                , shape="c")) +
                    ggplot2::geom_path(ggplot2::aes_q(x=as.name(ContData.env$myName.DateTime)
                                                    , y=as.name(ContData.env$myName.AirTemp)
                                                    , color="air"), na.rm=TRUE) + 
                    ggplot2::geom_path(ggplot2::aes_q(x=as.name(ContData.env$myName.DateTime)
                                                    ,y=as.name(ContData.env$myName.WaterTemp)
                                                    , color="water"), na.rm=TRUE) + 
                    ggplot2::scale_x_datetime(date_labels = "%Y-%m-%d") + 
                    ggplot2::labs(title=mySiteID
                                  , x=ContData.env$myLab.Date
                                  , y=ContData.env$myLab.Temp.BOTH) +
                    ggplot2::theme(plot.title=ggplot2::element_text(hjust=0.5)
                                   , plot.subtitle=ggplot2::element_text(hjust=0.5))
        # ggplot, discrete points
        ## skip

        # ggplot, Legend
        p2_t_aw <- p2_t_aw + 
                ggplot2::scale_color_manual(name="Data Type", labels=scale_lab, values=scale_col) 

        # ggplot, show 
        print(p2_t_aw)
        cat("\n\n")
        #
      }##IF.Temp.END
      #
      # Water, Temp vs Level
      if (ContData.env$myName.WaterTemp %in% myParameters & ContData.env$myName.SensorDepth %in% myParameters){##IF.Temp_Level.START
        cat("## PLOT, Sensor Depth vs. Water Temperature \n\n")
        #
        # ggplot, main
        scale_lab   <- c("Water Temp", "Sensor Depth")
        scale_col   <- c("blue", "black")
        scale_fill  <- scale_col   
        scale_shape <- c(21, 21)

        # ggplot intentionaly not designed for dual y plotting
        min_diff <- min(data.plot[, ContData.env$myName.WaterTemp], na.rm=TRUE) - min(data.plot[, ContData.env$myName.SensorDepth], na.rm=TRUE)
        rd_y1 <- diff(range(data.plot[, ContData.env$myName.WaterTemp], na.rm = TRUE))
        rd_y2 <- diff(range(data.plot[, ContData.env$myName.SensorDepth], na.rm = TRUE))
        min_y1 <- min(data.plot[, ContData.env$myName.WaterTemp]) 
        max_y1 <- max(data.plot[, ContData.env$myName.WaterTemp])
        avg_y1 <- mean(data.plot[, ContData.env$myName.WaterTemp])
        data.plot[, "depth4plot"] <- (data.plot[, ContData.env$myName.SensorDepth] * rd_y1/rd_y2) + min_diff

        p3_td <- ggplot2::ggplot(data=data.plot) +
                    ggplot2::geom_path(ggplot2::aes_q(x=as.name(ContData.env$myName.DateTime)
                                                    , y=as.name(ContData.env$myName.WaterTemp)
                                                    , color="water"), na.rm=TRUE) + 
                    ggplot2::geom_path(ggplot2::aes_q(x=as.name(ContData.env$myName.DateTime)
                                                      ,y=as.name("depth4plot")
                                                      , color="depth"), na.rm = TRUE) + 
                    ggplot2::scale_x_datetime(date_labels = "%Y-%m-%d") + 
                    ggplot2::labs(title=mySiteID
                                  , x=ContData.env$myLab.Date
                                  , y=ContData.env$myLab.WaterTemp) +
                    ggplot2::theme(plot.title=ggplot2::element_text(hjust=0.5)
                                   , plot.subtitle=ggplot2::element_text(hjust=0.5))
        # ggplot, add 2nd Y
        p3_td <- p3_td +  ggplot2::scale_y_continuous(sec.axis = ggplot2::sec_axis(
                  trans=~./(rd_y1/rd_y2)-(min_diff/(rd_y1/rd_y2))
                  , name=ContData.env$myLab.SensorDepth))

        # ggplot, discrete points
        ## skip

        # ggplot, Legend
        ## need to reverse legend items to be correct.
        p3_td <- p3_td + 
                ggplot2::scale_color_manual(name="Data Type", labels=rev(scale_lab), values=rev(scale_col))

        # ggplot, show 
        print(p3_td)
        cat("\n\n")
        #
      }##IF.Temp_Level.END  
      #
    }##IF~plot_format~END


leppott/ContDataQC documentation built on Jan. 5, 2025, 10:12 a.m.