PROJECT <- params$PROJECT
date_DQC <- params$date_DQC
report_dataframe <- params$report_dataframe

Report start at r date_DQC

r paste(PROJECT, "report")

mydf = report_dataframe[,-ncol(report_dataframe)]
orig_mydf = mydf

mydf[] <- lapply(mydf, as.character)

# colnames(mydf)  ######
# mydf[1,8] = 1   ######

for(i in 2:(ncol(mydf))){
  mydf[,i] = as.numeric(mydf[,i])
  orig_mydf[,i] = as.numeric(orig_mydf[,i])
}

sum_col = apply(mydf[,c(2:ncol(mydf))],1,sum)

# -----------------------


new_order = c("Station",
              "Offline", 
              "err_no_new_data", 
              "err_empty",
              "err_structure",
              "err_overlap", 
              "err_missing_record", 
              "err_restart_record",
              "err_date_missing",
              "err_duplicates_rows",
              "err_logger_number",
              "err_structure_change",
              "var_flagged",
              "err_range_alert",
              "err_out_of_range")

new_colnames = c("Station",
                 "Station Offline",
                 "No new data",
                 "File empty",
                 "File wrong structure",
                 "Data overlap",
                 "Data gap (record gap)",
                 "Data gap (new software)",
                 "Data gap (not recoverable)",
                 "Rows duplicated",
                 "Logger information changed", 
                 "File structure changed",
                  "Silent variables",
                 "Data overrange",
                 "Data overrange physical")

mydf = mydf[,new_order]
# mydf[1,7] = 1

j=1

for(j in 1:nrow(mydf)){
  if(any(mydf[j,] == 1,na.rm = T)){

    # --- Offline (2 column) ---

    if(mydf[j,2] == 1 & !is.na(mydf[j,2])){  # station offline == 1 => station offline
      mydf[j,3:ncol(mydf)] = NA
    }
    if(mydf[j,2] == 2 & !is.na(mydf[j,2])){  # station offline == 2 => manually excluded
      mydf[j,3:ncol(mydf)] = NA
    }

    # --- err_no_new_data (3 column) ---

    if(mydf[j,3] == 1 & !is.na(mydf[j,3])){  # err_no_new_data == 1 => no new data, but the file (empty and structure) are OK!
      mydf[j,6:ncol(mydf)] = NA
    }

    # --- err_empty (4 column) ---

    if(mydf[j,4] == 1 & !is.na(mydf[j,4])){  # err_empty == 1 => the file is empty (no new data are not evaluated)
      mydf[j,c(3,5:ncol(mydf))] = NA
    }

     # --- err_structure (5 column) ---

    if(mydf[j,5] == 1 & !is.na(mydf[j,5])){  # err_structure == 1 => the file has a wrong structure (no new data are not evaluated)
      mydf[j,c(3,6:ncol(mydf))] = NA
    }

     # --- err_overlap (6 column) ---

    if(mydf[j,6] == 1 & !is.na(mydf[j,6])){  # err_structure == 1 => the file has an overlap
      mydf[j,c(9:ncol(mydf))] = NA
    }

    # --- err_missing_record (7 column) ---

    if(mydf[j,7] == 1 & !is.na(mydf[j,7])){  # err_missing_record == 1 => the file has a date gap due to missing record (recoverable)
      mydf[j,c(9:ncol(mydf))] = NA
    }

    # --- err_restart_record (8 column) ---

    if(mydf[j,8] == 1 & !is.na(mydf[j,8])){  # err_restart_record == 1 => the file has a date gap due to restart record (recoverable)
      mydf[j,c(9:ncol(mydf))] = NA
    }


  }
}


# -----------------------

mydf[is.na(mydf)] = "grey"

mydf$Offline[mydf$Offline == 1] = "firebrick1"
mydf$Offline[mydf$Offline == 2] = "blue"
mydf$Offline[mydf$Offline == 0] = "forestgreen"
mydf$Offline[mydf$Offline == 3] = "greenyellow"

mydf$err_empty[mydf$err_empty == 0] = "forestgreen"
mydf$err_empty[mydf$err_empty == 1] = "firebrick1"

mydf$err_logger_number[mydf$err_logger_number == 0] = "forestgreen"
mydf$err_logger_number[mydf$err_logger_number == 1] = "gold"

mydf$err_structure[mydf$err_structure == 0] = "forestgreen"
mydf$err_structure[mydf$err_structure == 1] = "firebrick1"

mydf$err_structure_change[mydf$err_structure_change == 0] = "forestgreen"
mydf$err_structure_change[mydf$err_structure_change == 1] = "gold"

mydf$err_no_new_data[mydf$err_no_new_data == 0] = "forestgreen"
mydf$err_no_new_data[mydf$err_no_new_data == 1] = "firebrick1"

mydf$err_overlap[mydf$err_overlap == 0] = "forestgreen"
mydf$err_overlap[mydf$err_overlap == 1] = "firebrick1"

mydf$err_missing_record[mydf$err_missing_record == 0] = "forestgreen"
mydf$err_missing_record[mydf$err_missing_record == 1] = "firebrick1"
mydf$err_missing_record[mydf$err_missing_record == 2] = "blue"


mydf$err_restart_record[mydf$err_restart_record == 0] = "forestgreen"
mydf$err_restart_record[mydf$err_restart_record == 1] = "firebrick1"
mydf$err_restart_record[mydf$err_restart_record == 2] = "blue"


mydf$err_date_missing[mydf$err_date_missing == 0] = "forestgreen"
mydf$err_date_missing[mydf$err_date_missing == 1] = "gold"

mydf$err_range_alert[mydf$err_range_alert == 0] = "forestgreen"
mydf$err_range_alert[mydf$err_range_alert == 1] = "gold"

mydf$err_out_of_range[mydf$err_out_of_range == 0] = "forestgreen"
mydf$err_out_of_range[mydf$err_out_of_range == 1] = "gold"

mydf$err_duplicates_rows[mydf$err_duplicates_rows == 0] = "forestgreen"
mydf$err_duplicates_rows[mydf$err_duplicates_rows == 1] = "gold"

mydf$var_flagged[mydf$var_flagged == 0] = "forestgreen"
mydf$var_flagged[mydf$var_flagged == 1] = "gold"

mydf[is.na(mydf)] = "grey"

colnames(mydf) = new_colnames

mydf = mydf[,-which(colnames(mydf) == "Data overrange physical")]   # remove warning of data overrange (physical range)

aaa = cbind(mydf[,1:8], rep("white", nrow(mydf)),mydf[,9:ncol(mydf)])
colnames(aaa)[9] = ""

mycolors = c("forestgreen", "greenyellow","gold","firebrick1", "grey", "blue")
mycolors_value = c("OK!", "Currently offline! (>24h)","Check it out!","Action required!", "Not evaluated!", "Bypassed!")

melt_final = melt(aaa,id.vars = "Station")
melt_final$value = factor(melt_final$value,levels = mycolors)

station_level = unique(melt_final$Station)

ggplot(melt_final, aes(x = variable, y = Station))+
  theme(text = element_text(size = 18),
    axis.text.x = element_text(angle = 45, hjust = 0, vjust =0),
        axis.ticks.x = element_blank(),
        panel.background = element_rect(fill = "white", colour = "grey"),
        panel.border = element_rect(fill = "NA", colour = "white"))+
  geom_tile(aes(fill = value),colour = "white")+
  scale_fill_manual(name = "",values = mycolors,limits = mycolors,labels = mycolors_value, drop = FALSE)+
  scale_y_discrete(name = "Stations",limits = rev(station_level))+
  scale_x_discrete(name = "",position = "top")

##################################
# report_dataframe
ff = as.data.frame(report_dataframe,stringsAsFactors = F)
ff[] <- lapply(ff, as.character)

for(i in 2:(ncol(ff)-1)){
  ff[,i] = as.numeric(ff[,i])
}

r_link = ff$report_link
r_link_text =  r_link
# r_link_text =  gsub(pattern = "/",replacement = "\\\\",r_link)


w1 = which(is.na(r_link_text))
w2 = which(!is.na(r_link_text))


r_link_text[w1] = "---"
r_link_text[w2] ="Report available"

# d_folder_text[w3] = "No data file"
# d_folder_text[w4] ="Data available"

ff$report_link[w1]<- paste0(r_link_text[w1])
ff$report_link[w2]<- paste0("[", r_link_text[w2], "](\\", r_link[w2], ")")

# ff$report_link[w2]<- paste(r_link[w2],sep = "")

new_ff = ff[,c(1, ncol(ff))]
colnames(new_ff) = c("Station", "Report")


knitr::kable(new_ff,row.names = FALSE)

# datatable(new_ff)

Report end at r Sys.time()



bridachristian/DataQualityCheckEuracAlpEnv documentation built on Oct. 27, 2019, 5:55 p.m.