R/HudsonAlphaXYTcgaDataFile.R
In aroma.tcga: Methods for accessing online The Cancer Genome Atlas (TCGA) data

setConstructorS3("HudsonAlphaXYTcgaDataFile", function(...) {
  this <- extend(TcgaDataFile(...), "HudsonAlphaXYTcgaDataFile");
  this;
})


setMethodS3("getExtensionPattern", "HudsonAlphaXYTcgaDataFile", function(static, ...) {
  "[.](XandYintensity[.]txt)$";
}, static=TRUE)



setMethodS3("getReadArguments", "HudsonAlphaXYTcgaDataFile", function(this, ..., colClassPatterns=c("*"="character", "(Chr|Pos)"="integer", "(X|Y)$"="double")) {
  NextMethod("getReadArguments", this, ..., colClassPatterns=colClassPatterns);
}, protected=TRUE)



setMethodS3("extractTheta", "HudsonAlphaXYTcgaDataFile", function(this, ..., drop=TRUE) {
  colClassPatterns <- c("CompositeElement REF"="character", "(X|Y)$"="double");

  throw("Not implemented");

  data <- readDataFrame(this, colClassPatterns=colClassPatterns, ...);

  data;
})



setMethodS3("extractTotalAndFracB", "HudsonAlphaXYTcgaDataFile", function(this, ..., drop=TRUE) {
  data <- extractTheta(this, ..., drop=FALSE);

  # Add data[,2,] to the total data[,1,], if non-NA.
  ok <- whichVector(!is.na(data[,2,]));
  data[ok,1,] <- data[ok,1,] + data[ok,2,];
  rm(ok);

  # Calculate allele B fraction
  data[,2,] <- data[,2,] / data[,1,];

  # Drop singleton dimensions?
  nbrOfSamples <- dim[3];
  if (drop && nbrOfSamples == 1) {
    dimnames <- dimnames[-3];
    dim <- dim[-3];
  }

  dimnames(data)[[2]] <- c("total", "fracB");

  data;
})


setMethodS3("exportTotalAndFracB", "HudsonAlphaXYTcgaDataFile", function(this, dataSet, unf, samplePatterns=NULL, ..., rootPath="totalAndFracBData", maxNbrOfUnknownUnitNames=0, force=FALSE, verbose=FALSE) {
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Validate arguments
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Argument 'dataSet':
  dataSet <- Arguments$getCharacter(dataSet, length=c(1,1));

  # Argument 'unf':
  unf <- Arguments$getInstanceOf(unf, "UnitNamesFile");

  # Argument 'samplePatterns':
  if (!is.null(samplePatterns)) {
    samplePatterns <- sapply(samplePatterns, FUN=function(s) {
      Arguments$getRegularExpression(s);
    });
  }

  # Argument 'rootPath':
  rootPath <- Arguments$getWritablePath(rootPath);

  # Argument 'force':
  force <- Arguments$getLogical(force);

  # Argument 'verbose':
  verbose <- Arguments$getVerbose(verbose);
  if (verbose) {
    pushState(verbose);
    on.exit(popState(verbose));
  }


  verbose && enter(verbose, "Exporting ", class(this)[1]);
  chipType <- getChipType(unf, fullname=FALSE);
  verbose && cat(verbose, "Chip type: ", chipType);
  
  path <- file.path(rootPath, dataSet, chipType);
  path <- Arguments$getWritablePath(path);
  verbose && cat(verbose, "Exporting to path: ", path);

  # Unit indices (to be inferred)
  units <- NULL;

  allColumnNames <- getColumnNames(this);

  # Sample names to be imported
  pattern <- ",(X|Y)$";
  sampleNames <- grep(pattern, allColumnNames, value=TRUE);
  sampleNames <- gsub(pattern, "", sampleNames);
  # Sanity check
  stopifnot(length(sampleNames) %% 2 == 0);
  sampleNames <- unique(sampleNames);
  verbose && cat(verbose, "Samples to be processed:");
  verbose && print(verbose, sampleNames);

  # Subset samples by patterns?
  if (!is.null(samplePatterns)) {
    idxs <- lapply(samplePatterns, FUN=function(pattern) {
      grep(pattern, sampleNames);
    });
    idxs <- unlist(idxs, use.names=FALSE);
    idxs <- sort(unique(idxs));
    sampleNames <- sampleNames[idxs];
    verbose && cat(verbose, "Filtered set of samples to be processed:");
    verbose && print(verbose, sampleNames);
  }

  nbrOfSamples <- length(sampleNames);

  typeList <- list(
    total = AromaUnitTotalCnBinarySet,
    fracB = AromaUnitFracBCnBinarySet
  );

  for (ii in seq(length=nbrOfSamples)) {
    sampleName <- sampleNames[ii];

    verbose && enter(verbose, sprintf("Exporting sample #%d ('%s') of %d", 
                                      ii, sampleName, nbrOfSamples));

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    # Files to be exported
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    tags <- names(typeList);
    fullnames <- paste(sampleName, tags, sep=",");
    verbose && cat(verbose, "Full names to be exported:");
    verbose && print(verbose, fullnames);

    filenames <- sprintf("%s.asb", fullnames);
    pathnames <- file.path(path, filenames);

    res <- vector("list", length(typeList));
    names(res) <- names(typeList);
    for (kk in seq(along=typeList)) {
      name <- names(typeList)[kk];
      pathname <- pathnames[kk];
      verbose && cat(verbose, "Pathname: ", pathname);
      if (!force && isFile(pathname)) {
        verbose && cat(verbose, "Loading already existing file.");
        setClazz <- typeList[[kk]];
        className <- setClazz$getFileClass();
        fileClazz <- Class$forName(className);
        res[[name]] <- newInstance(fileClazz, pathname);
      }
    } # for (type ...)

    isDone <- (!sapply(res, FUN=is.null));
    if (!force && all(isDone)) {
      verbose && cat(verbose, "Sample already exported. Skipping.");
      verbose && exit(verbose);
      next;
    }

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    # Write to temporary files
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    pathnamesT <- sprintf("%s.tmp", pathnames);
    # Early sanity check
    for (pathnameT in pathnamesT) {
      if (isFile(pathnameT)) {
        throw("Temporary file already exists: ", pathnameT);
      }
    }
    

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    # Map unit indices
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    if (is.null(units)) {
      verbose && enter(verbose, "Mapping unit names to indices");

      verbose && enter(verbose, "Reading unit names");
      colClassPatterns <- c("*"="NULL", "CompositeElement REF"="character");
      data <- readDataFrame(this, colClassPatterns=colClassPatterns);
      unitNames <- data[,1, drop=TRUE];
      verbose && cat(verbose, "Unit names:");
      verbose && str(verbose, unitNames);
      verbose && exit(verbose);

      verbose && enter(verbose, "Mapping to unit-names file");
      verbose && print(verbose, unf);
      units <- indexOf(unf, names=unitNames);
      verbose && cat(verbose, "Units:");
      verbose && str(verbose, units);
      verbose && summary(verbose, units);
      verbose && exit(verbose);

      # Sanity check
      missing <- unitNames[is.na(units)];
      n <- length(missing);
      if (n > 0) {
        if (n > 3) missing <- c(missing[1:2], "...", missing[n]);
        missing <- paste(missing, collapse=", ");
        msg <- sprintf("Detected %s unknown unit names: %s", n, missing);
        verbose && cat(verbose, msg);
        if (n > maxNbrOfUnknownUnitNames) throw(msg);
      }
      verbose && exit(verbose);
    }


    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    # Reading data for current sample
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    verbose && enter(verbose, "Reading data");
    verbose && cat(verbose, "Sample name: ", sampleName);
    pattern <- sprintf("%s,(X|Y)$", sampleName);
    colClassPatterns <- c("double");
    names(colClassPatterns) <- pattern;
    colClassPatterns <- c("*"="NULL", colClassPatterns);
    verbose && cat(verbose, "Column pattern:");
    verbose && print(verbose, colClassPatterns);
    data <- readDataFrame(this, colClassPatterns=colClassPatterns);
    verbose && cat(verbose, "Data read:");
    verbose && str(verbose, data);
    columnNames <- colnames(data);
    verbose && cat(verbose, "Column names:");
    verbose && str(verbose, columnNames);
    verbose && exit(verbose);


    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    # Generating tracability information
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    verbose && enter(verbose, "Generating 'srcFile' footer");
    srcFile <- list(
      filename = getFilename(this),
      filesize = getFileSize(this),
      checksum = getChecksum(this),
      sampleName = sampleName,
      columnNames = columnNames,
      valuesChecksum = digest(data)
    );
    verbose && str(verbose, srcFile);
    verbose && exit(verbose);


    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    # Transforming data
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    verbose && enter(verbose, "Transforming (X,Y) to (total,fracB)");
    data[,1] <- data[,1,drop=TRUE] + data[,2,drop=TRUE];
    data[,2] <- data[,2,drop=TRUE] / data[,1,drop=TRUE];
    colnames(data) <- names(typeList);
    verbose && str(verbose, data);
    verbose && exit(verbose);

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    # Dropping unknown units
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    if (anyMissing(units)) {
      keep <- which(!is.na(units));
      units <- units[keep];
      data <- data[keep,,drop=FALSE];
      rm(keep);
    }

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    # Writing data
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    for (kk in seq(along=typeList)) {
      name <- names(typeList)[kk];

      verbose && enter(verbose, sprintf("Exporting #%d ('%s') of %d", 
                                          kk, name, length(typeList)));

      df <- res[[name]];
      if (!force && !is.null(df)) {
        verbose && cat(verbose, "Already exported. Skipping.");
        verbose && exit(verbose);
        next;
      }

      pathname <- pathnames[kk];
      verbose && cat(verbose, "Pathname: ", pathname);
      pathnameT <- pathnamesT[kk];
      verbose && cat(verbose, "Temporary pathname: ", pathnameT);
      # Sanity check
      if (!force && isFile(pathnameT)) {
        throw("Temporary file already exists: ", pathnameT);
      }

      on.exit({
        if (!is.null(pathnameT) && isFile(pathnameT)) {
          file.remove(pathnameT);
        }
      }, add=TRUE);


      setClazz <- typeList[[kk]];
      className <- setClazz$getFileClass();
      fileClazz <- Class$forName(className);

      verbose && enter(verbose, "Allocating temporary file");
      df <- fileClazz$allocateFromUnitNamesFile(unf, 
                                 filename=pathnameT, path=NULL);

      footer <- readFooter(df);
      footer$srcFile <- srcFile;
      writeFooter(df, footer);
      verbose && exit(verbose);

      verbose && enter(verbose, "Writing signals");
      df[units,1] <- data[,name, drop=TRUE];
      verbose && exit(verbose);

      verbose && enter(verbose, "Renaming temporary file");
      # Rename temporary file
      file.rename(pathnameT, pathname);
      if (isFile(pathnameT) || !isFile(pathname)) {
        throw("Failed to rename temporary file: ", pathnameT, " -> ", pathname);
      }
      pathnameT <- NULL;
      verbose && exit(verbose);

      df <- newInstance(fileClazz, pathname);
      verbose && cat(verbose, "Exported data file:");
      verbose && print(verbose, df);

      res[[name]] <- df;

      rm(df);
      verbose && exit(verbose);
    } # for (kk ...)
    rm(data);

    verbose && cat(verbose, "Sample name:", sampleName);
    verbose && cat(verbose, "Data files written:");
    verbose && print(verbose, res);

    rm(res);

    verbose && exit(verbose);
  } # for (ii ...)

  verbose && enter(verbose, "Setting up exported data sets");
  dsList <- list();
  for (kk in seq(along=typeList)) {
    name <- names(typeList)[kk];
    setClazz <- typeList[[kk]];
    pattern <- sprintf(",%s.asb$", tags[kk]);
    ds <- setClazz$byPath(path=path, pattern=pattern);
    dsList[[name]] <- ds;
  }
  verbose && print(verbose, dsList);
  verbose && exit(verbose);
  
  verbose && exit(verbose);

  invisible(dsList);
})


############################################################################
# HISTORY:
# 2010-01-06
# o Added argument 'samplePatterns' to exportTotalAndFracB() for
#   HudsonAlphaXYTcgaDataFile, which can be used to export a subset of
#   the data for particular TCGA samples.
# o CLEAN UP: No need for assign NAs when allocating new files; this is now
#   always the default way (in aroma.core v1.4.1).
# 2009-11-06
# o Created.
############################################################################
Any scripts or data that you put into this service are public.
aroma.tcga documentation built on May 2, 2019, 4:09 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
aroma.tcga
Methods for accessing online The Cancer Genome Atlas (TCGA) data

R/HudsonAlphaXYTcgaDataFile.R
In aroma.tcga: Methods for accessing online The Cancer Genome Atlas (TCGA) data

Try the aroma.tcga package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

aroma.tcga Methods for accessing online The Cancer Genome Atlas (TCGA) data

R/HudsonAlphaXYTcgaDataFile.R In aroma.tcga: Methods for accessing online The Cancer Genome Atlas (TCGA) data

Try the aroma.tcga package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

aroma.tcga
Methods for accessing online The Cancer Genome Atlas (TCGA) data

R/HudsonAlphaXYTcgaDataFile.R
In aroma.tcga: Methods for accessing online The Cancer Genome Atlas (TCGA) data