R/sscore2.R

Defines functions sscore2

sscore2 <- function(celFile1 = NULL, celFile2 = NULL, celTable = NULL, celTab.names = FALSE, typeFilter = 1, method = 1, SF1 = NULL, SF2 = NULL, fileout = FALSE, gzip = FALSE, verbose = FALSE) {
  # Insert fake-NULL fix to prevent 'no visible binding for global variable' in R CMD check:
  fs1_man_fsetid <- transcript_cluster_id <- probeset_id <- transcriptClusterID <- category <- probesetType <- transcriptID <- '.' <- geneName <- probe_type <- probesetID <- NULL
  # suppress warnings in functin due to 'closing unsed connection' warning for ReadCel()
  # as used within the sscore2() function:
  # options(warn = -1)
   if (is.null(celTable)) stopifnot(!is.null(celFile1), !is.null(celFile2))
   		else if (is.null(celFile1) & is.null(celFile2)) stopifnot(!is.null(celTable))
		  else stop("INPUT EITHER CELFILES OR CELTABLE")
   # DEFINE CHIP-TYPES THAT THE SSCORE2 CODE IS COMPATIABLE WITH
   chip430 <- c("Rhesus", "Mouse430_2")
   chipGene <- c("MoGene-2_1-st", "MoGene-1_0-st-v1", "DroGene-1_0-st")
   chipTrans <- c("MTA-1_0", "Clariom_S_Mouse")
	
   chipType <- c(chip430, chipGene, chipTrans)

   if (!is.null(celTable)) {
   		if (!is.data.table(celTable)) celTable <- as.data.table(celTable)
		  chipType1 <- chipType2 <- vector("character", length = nrow(celTable))
		  for (i in 1:nrow(celTable)) {
			  stopifnot (!is.null(celTable[[2]][i]), !is.null(celTable[[3]][i]))  
			  chipType1[i] <- readCelHeader(celTable[[2]][i])$chiptype
			  chipType2[i] <- readCelHeader(celTable[[3]][i])$chiptype
		  }
  
		  if (all.equal(chipType1, chipType2)) chip <- chipType1[1] 
		  
   } else if (!is.null(celFile1) & !is.null(celFile2)) {
   		celHead1 <- readCelHeader(celFile1)
   		celHead2 <- readCelHeader(celFile2)
   		# closeAllConnections()
   		# gc()
   		if (identical(celHead1$chiptype, celHead2$chiptype)) assign("chip", celHead1$chiptype)
   }

   if (chip %in% chipType) {
     # packageName <- paste(chipType, "data.gz", sep = "")
     # while (!require(packageName)) biocLite(packageName)
     print("READING PROBE FILE")
     if (chip == "Clariom_S_Mouse") {probeFile <- fread(gunzip(system.file("extdata","Mouse_ClariomS_probefile_affyannot.csv.gz",
                                                                           package = "Sscore2"),remove=FALSE,temporary=TRUE,overwrite=TRUE))}
     else if (chip == "MTA-1_0" & method == 1) {probeFile <- fread(gunzip(system.file("extdata","Mouse_ClariomD_geneCDF_probefile.csv.gz",
                                                                                      package = "Sscore2"),remove=FALSE,temporary=TRUE,overwrite=TRUE))
     print("READ IN: MTA gene-level PROBE FILE")}
     else if (chip == "MTA-1_0" & method == 2) {probeFile <- fread(gunzip(system.file("extdata","Mouse_ClariomD_exonCDF_probefile.csv.gz",
                                                                                      package = "Sscore2"),remove=FALSE,temporary=TRUE,overwrite=TRUE))
     print("READ IN: MTA exon-level PROBE FILE")}
     else if (chip == "MTA-1_0" & method == 3) {probeFile <- fread(gunzip(system.file("extdata","Mouse_ClariomD_to_S_probefile_affyannot.csv.gz",
                                                                                      package = "Sscore2"),remove=FALSE,temporary=TRUE,overwrite=TRUE))
     print("READ IN: ClariomD_to_S PROBE FILE")}
     else if (chip == "Mouse430_2") {probeFile <- fread(gunzip(system.file("extdata","Mouse_430_2_probefile_affyannot.csv.gz",
                                                                           package = "Sscore2"),remove=FALSE,temporary=TRUE,overwrite=TRUE))
     print("READ IN: Mouse430_2 PROBE FILE")}
     else if (chip == "MoGene-1_0-st-v1") {probeFile <- fread(gunzip(system.file("extdata","Mouse_Gene_1_0ST_probefile_affyannot.csv.gz",
                                                                                 package = "Sscore2"),remove=FALSE,temporary=TRUE,overwrite=TRUE))
     print("READ IN: MoGene-1_0-st PROBE FILE")}
   }
   
   while (method != 1 & method != 2 & method != 3 & method != 4) method <- as.numeric(readline("ENTER METHOD BASED ON 'chipType': "))
   
	
   if (!(chip %chin% chip430)) {
   		trim <- 0.04
   		if (chip == "MTA-1_0" & method == 1) {
   		  bgp_list <- fread(gunzip(system.file("extdata","Mouse_ClariomD_bgp_probelist.csv.gz",package = "Sscore2"),remove=FALSE,temporary=TRUE,overwrite=TRUE))
   		  bgp <- probeFile[fs1_man_fsetid %in% bgp_list$probesets_bgp]}
   		else if (chip == "MTA-1_0" & method == 2) {
   		  bgp <- probeFile[fs1_man_fsetid %like% "AFFX-BkGr-GC"]}
   		else if (chip == "MTA-1_0" & method == 3) {
   		  bgp <- probeFile[transcriptClusterID %like% "AFFX-BkGr-GC"]}
   		else if (chip == "MoGene-1_0-st-v1" & method == 1) {
   		  bgp <- probeFile[category %chin% "control->bgp->antigenomic"]}   		
   		else bgp <- probeFile[probesetType %chin% "control->bgp->antigenomic"]
   		
   		while (typeFilter != 0 & typeFilter != 1) typeFilter <- as.numeric(readline("USE ALL PROBES (0) OR MAIN PROBES (1) FOR S-SCORE CALCULATION: "))
   	   
   		if (chip %chin% chipTrans) {
		   if (typeFilter == 0) print("USING ALL PROBES FOR S-SCORE CALCULATION")
				else if (method == 1 & chip == "MTA-1_0") {
					# print("USING ALL MAIN PSRS PROBES FOR S-SCORE CALCULATION")
				  print("USING ALL 'MTA gene-level CDF' probes FOR S-SCORE CALCULATION")
					# probeFile <- probeFile[probesetType %chin% "main->psrs"]
				} else if (method == 2 & chip == "MTA-1_0") {
				  print("USING ALL 'MTA exon-level CDF' probes FOR S-SCORE CALCULATION")
				}
   		  else if (typeFilter == 0 & chip == "Clariom_S_Mouse") {
					print("USING ALL PROBES FOR S-SCORE CALCULATION")
   		  }
   		  else if (typeFilter == 1 & chip == "Clariom_S_Mouse") {
   		      probeFile <- probeFile[category %chin% "main"]
   		      print("USING ONLY MAIN-TC PROBES FOR S-SCORE CALCULATION")
				}
       } else if (chip %chin% chipGene) {
       		if (typeFilter == 0) print("USING ALL PROBES FOR S-SCORE CALCULATION")
       			else if (typeFilter == 1) {
       				print("USING ALL MAIN PROBES FOR S-SCORE CALCULATION")
       				probeFile <- probeFile[category %chin% "main"]
       			}
       } else print("'typeFilter' NOT APPLIED")
       
        
       if (method == 1) {
		      if (chip == "MTA-1_0") {
		        methodTag <- "geneCDF"
		      		method <- "fs1_man_fsetid"
		      		} 
              else if (chip == "Clariom_S_Mouse") {
		      	  methodTag <- "TCid"
		      	  method <- c("transcriptClusterID", "probeset_list", "geneName", "locus_type", "probesetType", "chr_num", "chr_start", "chr_stop", "chr_strand")
		      	  # method <- "transcriptClusterID"
		      	  }
       	  	  else if (chip == "MoGene-1_0-st-v1") {
       	  	    methodTag <- "TCid"
       	  	    method <- "transcriptClusterID"
       	  	    }
		      info <- probeFile[,.(nProbes = .N), keyby = method]
		    # Use 'nProbes' to check against 'total_probes' from 'MTA_annot_TCid' from affy
		      } 
   		else if (method == 2 & chip == "MTA-1_0") {
   		 	methodTag <- "exonCDF"
   		 	method <- "fs1_man_fsetid"
   		 	info <- probeFile[,.(nProbes = .N), keyby = method]
   	   }
   		else if (method == 3 & chip == "MTA-1_0") {
   		 	  methodTag <- "D_to_S"
   		 	  probeFile <- probeFile[!geneName %chin% ""]
   		 	  method <- "transcriptClusterID"
   		 	  mergeKey = c(method, "probeID")
   		 	  info <- probeFile[,.N, keyby = mergeKey][,.(nProbes = .N), keyby = method]
   		 	} else stop("NO METHOD APPLIED")
       
   		if (!identical(key(info), method)) setkeyv(info, method)
   		
   	   } else if (chip %in% chip430) {
   	   	   trim <- 0.02
   	   	   # bgp <- probeFile[probeType %chin% "misMatch"]
   	   	   bgp <- probeFile[probe_type %chin% "mm"] 
   	   	   # probeFile <- probeFile[probeType %chin% "perfectMatch"]
   	   	   probeFile <- probeFile[probe_type %chin% "pm"]
   	   
   	       if (method == 1) method <- methodTag <- "pmmm"
              else if (method == 2) method <- methodTag <- "gc"
    
           # info <- probeFile[,.N, keyby = .(probesetID, probesetName)]
           info <- probeFile[,.N, keyby = .(probesetID)]
      
   	   } else stop("CHIP TYPE NOT FOUND")
   	   
   if (!identical(key(probeFile), key(info))) {
   		setkeyv(probeFile, key(info)) 
   		setkeyv(bgp, key(info))
   		# setkey(bgp, probeset_id)
   }
   
   cIdx <- length(colnames(info))
   infoKey <- key(info)
   

    if (is.character(method)) {
  		if (!is.null(celTable))  {
  		   for (i in 1:nrow(celTable)) {
  			  print("READING CEL FILES")
  			  cel1 <- readCel(celTable[[2]][i], readIntensities = TRUE, readStdvs = TRUE, readPixels = TRUE, readXY = TRUE)
  			  cel2 <- readCel(celTable[[3]][i], readIntensities = TRUE, readStdvs = TRUE, readPixels = TRUE, readXY = TRUE)
  			  Score <- computeSscore(cel1, cel2, probeFile, bgp, method, SF1 = SF1, SF2 = SF2, verbose = verbose, trim = trim)
  			  info <- info[Score, on = infoKey]
  			  if (celTab.names){
  			    print("Custom run name for BATCH job:")
  			    colnames(info)[cIdx + i] <- as.character(celTable[i,1])
  			    print(as.character(celTable[i,1]))
  			  }
  			  else{
  			    print("Standard run name for BATCH job:")
  			  #colnames(info)[cIdx + i] <- paste("RUN_", i, sep = "")
  			  # Add filenames to colname (CEL_1 vs CEL_2)
  			  celName1 <- strsplit(cel1$header$filename, "/")[[1]]; celName1 <- celName1[length(celName1)]
  			  celName2 <- strsplit(cel2$header$filename, "/")[[1]]; celName2 <- celName2[length(celName2)]
  			  colnames(info)[cIdx + i] <- paste(celName1, "vs", celName2)
  			  print(paste(celName1, "vs", celName2))
  			  }
  		   }
		} else if (!is.null(celFile1) & !is.null(celFile2)) {
			print("READING CEL FILES")
			cel1 <- readCel(celFile1, readIntensities = TRUE, readStdvs = TRUE, readPixels = TRUE, readXY = TRUE)
			cel2 <- readCel(celFile2, readIntensities = TRUE, readStdvs = TRUE, readPixels = TRUE, readXY = TRUE)
			Score <- computeSscore(cel1, cel2, probeFile, bgp, method, SF1 = SF1, SF2 = SF2, verbose = verbose, trim = trim)
			info <- info[Score, on = infoKey]
			colnames(info)[cIdx + 1] <- "Sscore"
			# Add filenames to colname (CEL_1 vs CEL_2):
			# celName1 <- strsplit(cel1$header$filename, "/")[[1]]; celName1 <- celName1[length(celName1)]
			# celName2 <- strsplit(cel2$header$filename, "/")[[1]]; celName2 <- celName2[length(celName2)]
			# colnames(info)[cIdx + 1] <- paste(celName1, "vs", celName2)
			# print("Sscore2 run output assigned to column name:")
			# print(paste(celName1, "vs", celName2))
			}
		
		# if (verbose) View (info)
    }
   # Add annotations to ClariomD/MTA arrays:
   if (methodTag == "geneCDF" & chip == "MTA-1_0"){
     print("Adding gene-level annotations to MTA/ClariomD Sscores")
     annot <- fread(gunzip(system.file("extdata","Mouse_ClariomD_TCid_geneCDF_affyannot.csv.gz",
                                       package = "Sscore2"),remove=FALSE,temporary=TRUE,overwrite=TRUE))
     setkey(annot,transcript_cluster_id)
     info <- annot[info]
     info <- info[transcript_cluster_id %like% ".mm.1"]
     info <- info[,probeset_id := NULL]
   }
   if (methodTag == "exonCDF" & chip == "MTA-1_0"){
     print("Adding exon-level annotations to MTA/ClariomD Ssscores")
     annot <- fread(gunzip(system.file("extdata","Mouse_ClariomD_PSR_exonCDF_affyannot.csv.gz",
                                       package = "Sscore2"),remove=FALSE,temporary=TRUE,overwrite=TRUE))
     setkey(annot,probeset_id)
     info <- annot[info]
     info <- info[probeset_id %like% ".mm.1"]
     # info <- annot[transcript_cluster_id %like% ".mm.1"]
   }
   if (methodTag == "D_to_S" & chip == "MTA-1_0"){
     print("Adding gene-level annotations for to ClariomD-masked-to-ClariomS Sscores")
     annot <- fread(gunzip(system.file("extdata","Mouse_ClariomS_affyyannot_na36.csv.gz",
                                           package = "Sscore2"),remove=FALSE,temporary=TRUE,overwrite=TRUE))
     setkey(annot,transcript_cluster_id)
     info <- annot[info]
   }

	
	if (fileout) {
	#ID tag: a custom formatted timestamp applied to all written files
	ID <- format(Sys.time(), "%Y%m%d_%H%M%S")
	fileName <- paste(chip, "_", methodTag, "_", "Sscore2_", ID, ".csv", sep = "")
  print("WRITING S-SCORE OUTPUT TO DIRECTORY")
  fwrite(info, fileName)
  }
  if (gzip) {
    	print("COMPRESSING SSCORE OUTPUT FILE")
    	system(paste("gzip", fileName))
  }
  		
   print("DONE")
   # options(warn = 0)
   return(info)	
   
}
mfmiles/Sscore2 documentation built on Oct. 1, 2019, 6:04 a.m.