R/read_lmx.R

Defines functions read_lmx

Documented in read_lmx

#'read_lmx
#'@description  read lmx raw file to create a summarizedexperiment
#'              >HOD is replaced by Inf, <LOD is replaced by -Inf;
#'              retrieve STD3, and the lowest, the hightest STD's MFI, Conc.
#'              (correlated to LLOD and HLOD and matching MFI)
#'@import dplyr
#'@export
#'@param f a string file path
#'@param lot: optional parameter to record lot information
#'@param analyt_start_row 8 for npx, 9 for quant
#'@return a summarizedexperimet object.
#'        rowData is the feature and HOD, and LOD,
#'        colData is the unique_id and subject id given by the operator, and
#'        assay file name where the sample was assayed
#'@md

read_lmx <- function(f, lot = "default", analyt_start_row){

  if(grepl("xls$", f)){
    fdata <- readxl::read_xls(f, sheet = 1, col_names = T, .name_repair = "unique_quiet" )
  }
  if(grepl("xlsx$", f)){
    fdata <- read.xlsx(f, sheetName = "Summary", header = F, .name_repair = "unique_quiet" )
  }

  fdata <- fdata[1 : (which(fdata[ , 1] == "Notes:")-1), ]

  coldata <- data.frame(Location = unlist(fdata[-c(1 : 4), 1]),
                        Sample = unlist(fdata[-c(1 : 4), 2]))%>%
    mutate(unique_id = paste(Location, Sample, sep = "_"),
           File = f,
           Lot = lot)%>%
    magrittr::set_rownames(value = .$unique_id)

  analyt <- fdata[3, -c(1:2)]
  analyt <- analyt[!is.na(analyt)]
  rowdata <- data.frame(Analyt = make.names(analyt),
                        Unit = as.character(fdata[4, -c(1:2)][1: length(analyt)]))%>%
    magrittr::set_rownames(value = .$Analyt)

  fdata <- data.frame(fdata[-c(1:4), -c(1:2)])
  fdata <- data.frame(fdata[, 1:length(analyt)])

  # get lod
  rowdata$LOD <- sapply(fdata, function(x){
    as.numeric(gsub("(↓|↑|<|>)", "", x[grep("<", x)][1]))
  })
  # get hod
  rowdata$HOD <- sapply(fdata, function(x){
    as.numeric(gsub("(↓|↑|<|>)", "", x[grep(">", x)][1]))
  })

  # replace  lod to -Inf Inf
  fdata_na <- apply(fdata, 2, function(x){
    case_when(
      grepl("(<|↓)", x) ~ -Inf,
      grepl("(>|↑)", x) ~ Inf,
      TRUE ~ as.numeric(x)
    )
  })

  # get missing lod pct
  rowdata$oor_pct <- sapply(data.frame(is.infinite(fdata_na)), function(x){
    round(sum(x) / length(x) * 100, 2)
  })


  # replace below lod to facevalue
  fdata <- apply(fdata, 2, function(x){
    as.numeric(gsub("(<|↓|>|↑)", "", x))
  })

  colnames(fdata_na) <- colnames(fdata) <-rowdata$Analyt
  rownames(fdata_na) <- rownames(fdata) <- coldata$unique_id

  #---flour intensity raw
  #sheet.list <- make.names(readxl::excel_sheets(f))
  sheet.list <- rowdata$Analyt

  rdata <- lapply(rowdata$Analyt, function(x){
    #temp <- read.xlsx(f, sheetIndex = which(sheet.list == x), header = T, startRow = analyt_start_row)
    if(grepl("xls$", f)){
      temp <- readxl::read_xls(f, sheet = (2 + which(sheet.list == x)), col_names = T, skip = (analyt_start_row - 1),
                               .name_repair = "unique_quiet" )
      temp <- temp[1: (grep("Note", temp$Location)-2), ]
    }
    if(grepl("xlsx$", f)){
      temp <- read.xlsx(f, sheetIndex = (2 + which(sheet.list == x)), header = T, startRow = analyt_start_row,
                        .name_repair = "unique_quiet" )
      temp <- temp[1: (grep("Note", temp$Location)-1), ]
    }


    temp <- temp%>%
      dplyr::select(Location, Sample, MFI, CV)%>%
      dplyr::filter(!is.na(Sample))%>%
      magrittr::set_colnames(value = c("Location", "Sample",
                             paste0(x, "_MFI"),
                             paste0(x, "_CV")))
    temp
  })

  rstd <- lapply(rowdata$Analyt, function(x){
    #temp <- read.xlsx(f, sheetIndex = which(sheet.list == x), header = T, startRow = analyt_start_row)
    if(grepl("xls$", f)){
      temp_std <- readxl::read_xls(f, sheet = (2 + which(sheet.list == x)), col_names = T, skip = 6, .name_repair = "unique_quiet" )
      temp_std <- temp_std[1: (grep("Note", unlist(temp_std[ , 1]))[1]-2), ]
    }
    if(grepl("xlsx$", f)){
      temp_std <- read.xlsx(f, sheetIndex = (2 + which(sheet.list == x)), header = T, startRow = 7, )
      temp_std <- temp_std[1: (grep("Note", unlist(temp_std[ , 1]))[1]-1), ]
    }

    idx_l <- which(!is.na(temp_std$MFI))[2]
    idx_h <- which(!is.na(temp_std$MFI))
    idx_h <- idx_h[length(idx_h)]
    #c(temp_std[idx_l, "MFI"], temp_std[idx_l, 11], # lowest std gradient's mfi and conc.
    #  temp_std[idx_h, "MFI"], temp_std[idx_h, 11], # highest std gradient's mfi and conc.
    #  temp_std[temp_std$Location == "1A1", "MFI"], temp_std[temp_std$Location == "1A1", 11], temp_std[temp_std$Location == "1A1", 13],
    #  temp_std[temp_std$Location == "1C1", "MFI"], temp_std[temp_std$Location == "1C1", 11], temp_std[temp_std$Location == "1C1", 13],
    #  temp_std[temp_std$Location == "1E1", "MFI"], temp_std[temp_std$Location == "1E1", 11], temp_std[temp_std$Location == "1E1", 13],
    #  temp_std[temp_std$Location == "1G1", "MFI"], temp_std[temp_std$Location == "1G1", 11], temp_std[temp_std$Location == "1G1", 13],
    #  temp_std[temp_std$Location == "1I1", "MFI"], temp_std[temp_std$Location == "1I1", 11], temp_std[temp_std$Location == "1I1", 13],
    #  temp_std[temp_std$Location == "1K1", "MFI"], temp_std[temp_std$Location == "1K1", 11], temp_std[temp_std$Location == "1K1", 13],
    #  temp_std[temp_std$Location == "1M1", "MFI"], temp_std[temp_std$Location == "1M1", 11], temp_std[temp_std$Location == "1M1", 13],
    #  temp_std[temp_std$Location == "1O1", "MFI"], temp_std[temp_std$Location == "1O1", 11], temp_std[temp_std$Location == "1O1", 13]
    #  )%>%
    #  as.numeric()
    std_read <- as.numeric(c(temp_std[idx_l, "MFI"], temp_std[idx_l, 11], # lowest std gradient's mfi and conc.
                             temp_std[idx_h, "MFI"], temp_std[idx_h, 11])) # highest std gradient's mfi and conc.
    for (y in 0 : (nrow(temp_std)/2 - 1)) {
      std_read <- as.numeric(c(std_read, temp_std[y*2+1, "MFI"], temp_std[y*2+1, 11]))
    }
    std_read
  })%>%
    do.call(what = rbind)

  colnames(rstd) <- c("low_MFI", "low_conc.", "high_MFI", "high_conc.",
                      make.names(rep(c("MFI", "conc"), (ncol(rstd)-4)/2), unique = T))

  rowdata <- rowdata%>%
    cbind(data.frame(rstd))

  merge.rdata <- rdata[[1]]
  #merge.rdata$Sample[which(merge.rdata$Sample == "MM-0535-96-H_")] <- "MM-0535-96-H_2"
  if(length(rdata) > 1){
    for (i in 2 : length(rdata)) {
      merge.rdata <- merge(merge.rdata, rdata[[i]])
    }
  }
  rownames(merge.rdata) <- paste(merge.rdata$Location, merge.rdata$Sample, sep = "_")

  rdata <- data.frame(merge.rdata[match(rownames(fdata), rownames(merge.rdata)) , grep("MFI$", colnames(merge.rdata))])

  cvs <- data.frame(merge.rdata[match(rownames(fdata), rownames(merge.rdata)) , grep("CV$", colnames(merge.rdata))])
  cvs <- sapply(cvs, function(x){
    as.numeric(gsub("%", "", x))/100
  })
  colnames(rdata) <- colnames(cvs) <- colnames(fdata)
  rownames(rdata) <- rownames(cvs) <- rownames(fdata)

  re <- SummarizedExperiment(colData = coldata,
                             rowData = rowdata,
                             assays = list(data_default = t(fdata_na),
                                           data_imputed = t(fdata),
                                           mfi_default = t(rdata),
                                           cv = t(cvs)),
                             metadata = list("file_name" = toString(f)))
  return(re)
}
ismms-himc/LuminexTools documentation built on July 2, 2024, 2:08 a.m.