R/cleanMetadata.GSE34171.R

Defines functions cleanMetadata.GSE34171

Documented in cleanMetadata.GSE34171

#' @rdname cleanMetadata
#' @details
#'    GSE34171:\cr
#'    The cleanup of GSE34171 (MDFCI) adds three batches corresponding to each
#'    platform (HG-U133 plus 2, HG-U133A, HG-U133B).
#' @export
cleanMetadata.GSE34171 <- function(meta_data) {
  message("Cleaning GSE34171 (MDFCI)!")

  # Generic clean
  suppressMessages(meta_data <- cleanMetadata.data.frame(meta_data))

  # Added factor describing the batches and CEL files
  meta_data$Batch <- factor(meta_data$platform_id,
                            levels = c("GPL570", "GPL96", "GPL97"))
  meta_data$CEL <-
   gsub("^.+/(GSM[0-9]+)\\..+$", "\\1", meta_data$supplementary_file)
  meta_data$GSM <- as.character(meta_data$geo_accession)
  rownames(meta_data) <- meta_data$CEL

  return(meta_data)
}


#   meta_data_Clinical <-
#     read.table(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata",
#                               "GSE34171_clinical_info.txt"),
#                header = TRUE, stringsAsFactors = FALSE)
#
#   meta_data_Outcome <-
#     read.table(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata",
#                               "GSE34171_outcome_data.txt"),
#                skip = 2, stringsAsFactors = FALSE)
#   colnames(meta_data_Outcome) <- c("Title", "Class", "os", "Followup")
#
#   meta_data_Sample <-
#     read.csv(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata",
#                             "sample.csv"),
#              stringsAsFactors = FALSE)
#
#   meta_data <- merge(merge(meta_data_Sample,
#                                meta_data_Outcome, all.x=TRUE, all.y=TRUE),
#                          meta_data_Clinical, all.x=TRUE)
#
#   table(meta_data$Title, meta_data$Platform)
#
#   xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
#                                meta_data_Sample$Platform == "GPL570" , c(1, 2)]
#
#   colnames(xx)[1] <- "HGU133Plus2"
#   meta_data <- merge(meta_data, xx, all.x = TRUE)
#
#   xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
#                                meta_data_Sample$Platform == "GPL6801" , c(1, 2)]
#
#   colnames(xx)[1] <- "GenomeWideSNP6"
#   meta_data <- merge(meta_data, xx, all.x = TRUE)
#
#
#   xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
#                                meta_data_Sample$Platform == "GPL96" , c(1, 2)]
#
#   colnames(xx)[1] <- "HGU133A"
#   meta_data <- merge(meta_data, xx, all.x = TRUE)
#
#   xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
#                                meta_data_Sample$Platform == "GPL97" , c(1, 2)]
#
#   colnames(xx)[1] <- "HGU133B"
#   meta_data <- merge(meta_data, xx, all.x = TRUE)
#
#
#
#
#   meta_data <- meta_data[!duplicated(meta_data$Title),]
#
#   meta_data$IPI <- as.factor(as.numeric(meta_data$IPI))
#
#   meta_data$ipi.hl <- ifelse(is.na(meta_data$IPI), NA, "0-1")
#   meta_data$ipi.hl[meta_data$IPI %in% c(2, 3)] <- "2-3"
#   meta_data$ipi.hl[meta_data$IPI %in% c(4, 5)] <- "4-5"
#
#   table(meta_data[, c("IPI", "ipi.hl")])
#
#   Monti.Suppl5 <-
#     read.delim("../Litterature/Monti_Suppl5_NIHMS398769-supplement-06(1).txt",
#                stringsAsFactors = FALSE)
#
#   rownames(Monti.Suppl5) <- Monti.Suppl5$CaseID
#
#   colnames(Monti.Suppl5)[5] <- "WrightClass"
#
#
#
#   meta_data <- merge(meta_data, Monti.Suppl5,
#                          by.x = "Title", all.x=TRUE,
#                          by.y = "CaseID")
#
#
#   meta_data$TP53.mut[meta_data$TP53.mut == "na"] <- NA
#
#
#
#
#
#   ABCGCBclass <- read.delim(file.path(MDFCI.ext.dir,
#                                       "../ABCGCBclassification",
#                                       "ABCGCBclass.txt"))
#   meta_data$WrightClass_own <-
#     ABCGCBclass[meta_data$Title,2]
#
#   ABCGCBclass[paste(meta_data$GPL570, ".CEL", sep = ""),2]
#
#
#   meta_data$OS <- Surv(meta_data$os/365.25, meta_data$Followup)
#
#   os5  <- ifelse(meta_data$os/365.25 > 5, 5, meta_data$os/365.25)
#   ios5 <- pmin(ifelse(meta_data$os/365.25 > 5, 0, 1), meta_data$Followup)
#
#   meta_data$OS5 <- Surv(as.numeric(os5), ios5)
#
#
#   meta_data$HGU133Plus2[meta_data$HGU133Plus2 == "GSM844275"] <- NA
#
#   meta_data <- meta_data[, c(
#     "Title", "Class", "os", "Followup", "Type", "Entity", "Primary",
#     "IPI", "HGU133Plus2", "GenomeWideSNP6", "HGU133A", "HGU133B", "ipi.hl",
#     "SNP.ScanID", "GEP.SampleID", "CCC..Best.10.13.", "WrightClass",
#     "WrightClass_own", "TP53.mut")]
AEBilgrau/DLBCLdata documentation built on May 5, 2019, 11:29 a.m.